kordoc 2.2.3 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-AIG7SDWU.js → chunk-SY2RFVLW.js} +1051 -149
- package/dist/chunk-SY2RFVLW.js.map +1 -0
- package/dist/cli.js +149 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +799 -238
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +97 -7
- package/dist/index.d.ts +97 -7
- package/dist/index.js +795 -238
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +126 -6
- package/dist/mcp.js.map +1 -1
- package/dist/{watch-H672QAW2.js → watch-5P7DJ3HG.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-AIG7SDWU.js.map +0 -1
- /package/dist/{watch-H672QAW2.js.map → watch-5P7DJ3HG.js.map} +0 -0
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from "./chunk-MOL7MDBG.js";
|
|
9
9
|
|
|
10
10
|
// src/utils.ts
|
|
11
|
-
var VERSION = true ? "2.2.
|
|
11
|
+
var VERSION = true ? "2.2.4" : "0.0.0-dev";
|
|
12
12
|
function toArrayBuffer(buf) {
|
|
13
13
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
14
14
|
return buf.buffer;
|
|
@@ -5189,12 +5189,1040 @@ function mergeKoreanLines(text) {
|
|
|
5189
5189
|
return result.join("\n");
|
|
5190
5190
|
}
|
|
5191
5191
|
|
|
5192
|
+
// src/form/recognize.ts
|
|
5193
|
+
var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
5194
|
+
"\uC131\uBA85",
|
|
5195
|
+
"\uC774\uB984",
|
|
5196
|
+
"\uC8FC\uC18C",
|
|
5197
|
+
"\uC804\uD654",
|
|
5198
|
+
"\uC804\uD654\uBC88\uD638",
|
|
5199
|
+
"\uD734\uB300\uD3F0",
|
|
5200
|
+
"\uD578\uB4DC\uD3F0",
|
|
5201
|
+
"\uC5F0\uB77D\uCC98",
|
|
5202
|
+
"\uC0DD\uB144\uC6D4\uC77C",
|
|
5203
|
+
"\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
|
|
5204
|
+
"\uC18C\uC18D",
|
|
5205
|
+
"\uC9C1\uC704",
|
|
5206
|
+
"\uC9C1\uAE09",
|
|
5207
|
+
"\uBD80\uC11C",
|
|
5208
|
+
"\uC774\uBA54\uC77C",
|
|
5209
|
+
"\uD329\uC2A4",
|
|
5210
|
+
"\uD559\uAD50",
|
|
5211
|
+
"\uD559\uB144",
|
|
5212
|
+
"\uBC18",
|
|
5213
|
+
"\uBC88\uD638",
|
|
5214
|
+
"\uC2E0\uCCAD\uC778",
|
|
5215
|
+
"\uB300\uD45C\uC790",
|
|
5216
|
+
"\uB2F4\uB2F9\uC790",
|
|
5217
|
+
"\uC791\uC131\uC790",
|
|
5218
|
+
"\uD655\uC778\uC790",
|
|
5219
|
+
"\uC2B9\uC778\uC790",
|
|
5220
|
+
"\uC77C\uC2DC",
|
|
5221
|
+
"\uB0A0\uC9DC",
|
|
5222
|
+
"\uAE30\uAC04",
|
|
5223
|
+
"\uC7A5\uC18C",
|
|
5224
|
+
"\uBAA9\uC801",
|
|
5225
|
+
"\uC0AC\uC720",
|
|
5226
|
+
"\uBE44\uACE0",
|
|
5227
|
+
"\uAE08\uC561",
|
|
5228
|
+
"\uC218\uB7C9",
|
|
5229
|
+
"\uB2E8\uAC00",
|
|
5230
|
+
"\uD569\uACC4",
|
|
5231
|
+
"\uACC4",
|
|
5232
|
+
"\uC18C\uACC4",
|
|
5233
|
+
"\uB4F1\uB85D\uAE30\uC900\uC9C0",
|
|
5234
|
+
"\uBCF8\uC801",
|
|
5235
|
+
"\uC704\uC784\uC778",
|
|
5236
|
+
"\uCCAD\uAD6C\uC0AC\uC720",
|
|
5237
|
+
"\uC18C\uBA85\uC790\uB8CC"
|
|
5238
|
+
]);
|
|
5239
|
+
function isLabelCell(text) {
|
|
5240
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
5241
|
+
if (!trimmed || trimmed.length > 30) return false;
|
|
5242
|
+
for (const kw of LABEL_KEYWORDS) {
|
|
5243
|
+
if (trimmed.includes(kw)) return true;
|
|
5244
|
+
}
|
|
5245
|
+
if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
|
|
5246
|
+
if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
|
|
5247
|
+
return false;
|
|
5248
|
+
}
|
|
5249
|
+
function extractFormFields(blocks) {
|
|
5250
|
+
const fields = [];
|
|
5251
|
+
let totalTables = 0;
|
|
5252
|
+
let formTables = 0;
|
|
5253
|
+
for (const block of blocks) {
|
|
5254
|
+
if (block.type !== "table" || !block.table) continue;
|
|
5255
|
+
totalTables++;
|
|
5256
|
+
const tableFields = extractFromTable(block.table);
|
|
5257
|
+
if (tableFields.length > 0) {
|
|
5258
|
+
formTables++;
|
|
5259
|
+
fields.push(...tableFields);
|
|
5260
|
+
}
|
|
5261
|
+
}
|
|
5262
|
+
for (const block of blocks) {
|
|
5263
|
+
if (block.type === "paragraph" && block.text) {
|
|
5264
|
+
const inlineFields = extractInlineFields(block.text);
|
|
5265
|
+
fields.push(...inlineFields);
|
|
5266
|
+
}
|
|
5267
|
+
}
|
|
5268
|
+
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
5269
|
+
return { fields, confidence: Math.min(confidence, 1) };
|
|
5270
|
+
}
|
|
5271
|
+
function extractFromTable(table) {
|
|
5272
|
+
const fields = [];
|
|
5273
|
+
if (table.cols >= 2) {
|
|
5274
|
+
for (let r = 0; r < table.rows; r++) {
|
|
5275
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
5276
|
+
const labelCell = table.cells[r][c];
|
|
5277
|
+
const valueCell = table.cells[r][c + 1];
|
|
5278
|
+
if (isLabelCell(labelCell.text)) {
|
|
5279
|
+
fields.push({
|
|
5280
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
5281
|
+
value: valueCell.text.trim(),
|
|
5282
|
+
row: r,
|
|
5283
|
+
col: c
|
|
5284
|
+
});
|
|
5285
|
+
}
|
|
5286
|
+
}
|
|
5287
|
+
}
|
|
5288
|
+
}
|
|
5289
|
+
if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
|
|
5290
|
+
const headerRow = table.cells[0];
|
|
5291
|
+
const allLabels = headerRow.every((cell) => {
|
|
5292
|
+
const t = cell.text.trim();
|
|
5293
|
+
return t.length > 0 && t.length <= 20;
|
|
5294
|
+
});
|
|
5295
|
+
if (allLabels) {
|
|
5296
|
+
for (let r = 1; r < table.rows; r++) {
|
|
5297
|
+
for (let c = 0; c < table.cols; c++) {
|
|
5298
|
+
const label = headerRow[c].text.trim();
|
|
5299
|
+
const value = table.cells[r][c].text.trim();
|
|
5300
|
+
if (label && value) {
|
|
5301
|
+
fields.push({ label, value, row: r, col: c });
|
|
5302
|
+
}
|
|
5303
|
+
}
|
|
5304
|
+
}
|
|
5305
|
+
}
|
|
5306
|
+
}
|
|
5307
|
+
return fields;
|
|
5308
|
+
}
|
|
5309
|
+
function extractInlineFields(text) {
|
|
5310
|
+
const fields = [];
|
|
5311
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
|
|
5312
|
+
let match;
|
|
5313
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
5314
|
+
const label = match[1].trim();
|
|
5315
|
+
const value = match[2].trim();
|
|
5316
|
+
if (value) {
|
|
5317
|
+
fields.push({ label, value, row: -1, col: -1 });
|
|
5318
|
+
}
|
|
5319
|
+
}
|
|
5320
|
+
return fields;
|
|
5321
|
+
}
|
|
5322
|
+
|
|
5323
|
+
// src/form/match.ts
|
|
5324
|
+
function normalizeLabel(label) {
|
|
5325
|
+
return label.trim().replace(/[::\s()()·]/g, "");
|
|
5326
|
+
}
|
|
5327
|
+
function findMatchingKey(cellLabel, values) {
|
|
5328
|
+
if (values.has(cellLabel)) return cellLabel;
|
|
5329
|
+
let bestKey;
|
|
5330
|
+
let bestLen = 0;
|
|
5331
|
+
for (const key of values.keys()) {
|
|
5332
|
+
if (cellLabel.startsWith(key)) {
|
|
5333
|
+
if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
|
|
5334
|
+
bestLen = key.length;
|
|
5335
|
+
bestKey = key;
|
|
5336
|
+
}
|
|
5337
|
+
} else if (key.startsWith(cellLabel)) {
|
|
5338
|
+
if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
|
|
5339
|
+
bestLen = cellLabel.length;
|
|
5340
|
+
bestKey = key;
|
|
5341
|
+
}
|
|
5342
|
+
}
|
|
5343
|
+
}
|
|
5344
|
+
return bestKey;
|
|
5345
|
+
}
|
|
5346
|
+
function isKeywordLabel(text) {
|
|
5347
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
5348
|
+
if (!trimmed || trimmed.length > 15) return false;
|
|
5349
|
+
for (const kw of LABEL_KEYWORDS) {
|
|
5350
|
+
if (trimmed.includes(kw)) return true;
|
|
5351
|
+
}
|
|
5352
|
+
return false;
|
|
5353
|
+
}
|
|
5354
|
+
function fillInCellPatterns(cellText, values, matchedLabels) {
|
|
5355
|
+
let text = cellText;
|
|
5356
|
+
const matches = [];
|
|
5357
|
+
text = text.replace(
|
|
5358
|
+
/([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
|
|
5359
|
+
(match, prefix, suffix) => {
|
|
5360
|
+
const label = prefix + suffix;
|
|
5361
|
+
const normalizedLabel = normalizeLabel(label);
|
|
5362
|
+
const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
|
|
5363
|
+
if (matchKey === void 0) return match;
|
|
5364
|
+
const newValue = values.get(matchKey);
|
|
5365
|
+
matchedLabels.add(matchKey);
|
|
5366
|
+
matches.push({ key: matchKey, label, value: newValue });
|
|
5367
|
+
return `${prefix}(${newValue})${suffix}`;
|
|
5368
|
+
}
|
|
5369
|
+
);
|
|
5370
|
+
text = text.replace(
|
|
5371
|
+
/□([가-힣A-Za-z]+)/g,
|
|
5372
|
+
(match, keyword) => {
|
|
5373
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
5374
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
5375
|
+
if (matchKey === void 0) return match;
|
|
5376
|
+
const val = values.get(matchKey);
|
|
5377
|
+
const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
|
|
5378
|
+
if (!isTruthy) return match;
|
|
5379
|
+
matchedLabels.add(matchKey);
|
|
5380
|
+
matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
|
|
5381
|
+
return `\u2611${keyword}`;
|
|
5382
|
+
}
|
|
5383
|
+
);
|
|
5384
|
+
text = text.replace(
|
|
5385
|
+
/\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
|
|
5386
|
+
(match, keyword) => {
|
|
5387
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
5388
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
5389
|
+
if (matchKey === void 0) return match;
|
|
5390
|
+
const newValue = values.get(matchKey);
|
|
5391
|
+
matchedLabels.add(matchKey);
|
|
5392
|
+
matches.push({ key: matchKey, label: keyword, value: newValue });
|
|
5393
|
+
return `(${keyword}\uFF1A${newValue})`;
|
|
5394
|
+
}
|
|
5395
|
+
);
|
|
5396
|
+
return matches.length > 0 ? { text, matches } : null;
|
|
5397
|
+
}
|
|
5398
|
+
function normalizeValues(values) {
|
|
5399
|
+
const map = /* @__PURE__ */ new Map();
|
|
5400
|
+
for (const [label, value] of Object.entries(values)) {
|
|
5401
|
+
map.set(normalizeLabel(label), value);
|
|
5402
|
+
}
|
|
5403
|
+
return map;
|
|
5404
|
+
}
|
|
5405
|
+
function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
|
|
5406
|
+
return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
|
|
5407
|
+
for (const orig of Object.keys(originalValues)) {
|
|
5408
|
+
if (normalizeLabel(orig) === k) return orig;
|
|
5409
|
+
}
|
|
5410
|
+
return k;
|
|
5411
|
+
});
|
|
5412
|
+
}
|
|
5413
|
+
|
|
5414
|
+
// src/form/filler.ts
|
|
5415
|
+
function fillFormFields(blocks, values) {
|
|
5416
|
+
const cloned = structuredClone(blocks);
|
|
5417
|
+
const filled = [];
|
|
5418
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
5419
|
+
const normalizedValues = normalizeValues(values);
|
|
5420
|
+
const patternFilledCells = /* @__PURE__ */ new Set();
|
|
5421
|
+
for (const block of cloned) {
|
|
5422
|
+
if (block.type !== "table" || !block.table) continue;
|
|
5423
|
+
for (let r = 0; r < block.table.rows; r++) {
|
|
5424
|
+
for (let c = 0; c < block.table.cols; c++) {
|
|
5425
|
+
const cell = block.table.cells[r]?.[c];
|
|
5426
|
+
if (!cell) continue;
|
|
5427
|
+
const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
|
|
5428
|
+
if (result) {
|
|
5429
|
+
cell.text = result.text;
|
|
5430
|
+
patternFilledCells.add(`${r},${c}`);
|
|
5431
|
+
for (const m of result.matches) {
|
|
5432
|
+
filled.push({ label: m.label, value: m.value, row: r, col: c });
|
|
5433
|
+
}
|
|
5434
|
+
}
|
|
5435
|
+
}
|
|
5436
|
+
}
|
|
5437
|
+
}
|
|
5438
|
+
for (const block of cloned) {
|
|
5439
|
+
if (block.type !== "table" || !block.table) continue;
|
|
5440
|
+
fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
|
|
5441
|
+
}
|
|
5442
|
+
for (const block of cloned) {
|
|
5443
|
+
if (block.type !== "paragraph" || !block.text) continue;
|
|
5444
|
+
const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
|
|
5445
|
+
if (newText !== block.text) block.text = newText;
|
|
5446
|
+
}
|
|
5447
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
5448
|
+
return { blocks: cloned, filled, unmatched };
|
|
5449
|
+
}
|
|
5450
|
+
function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
|
|
5451
|
+
if (table.cols < 2) return;
|
|
5452
|
+
for (let r = 0; r < table.rows; r++) {
|
|
5453
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
5454
|
+
const labelCell = table.cells[r][c];
|
|
5455
|
+
const valueCell = table.cells[r][c + 1];
|
|
5456
|
+
if (!labelCell || !valueCell) continue;
|
|
5457
|
+
if (!isLabelCell(labelCell.text)) continue;
|
|
5458
|
+
if (isKeywordLabel(valueCell.text)) continue;
|
|
5459
|
+
const normalizedCellLabel = normalizeLabel(labelCell.text);
|
|
5460
|
+
if (!normalizedCellLabel) continue;
|
|
5461
|
+
const matchKey = findMatchingKey(normalizedCellLabel, values);
|
|
5462
|
+
if (matchKey === void 0) continue;
|
|
5463
|
+
const newValue = values.get(matchKey);
|
|
5464
|
+
if (patternFilledCells?.has(`${r},${c + 1}`)) {
|
|
5465
|
+
valueCell.text = newValue + " " + valueCell.text;
|
|
5466
|
+
} else {
|
|
5467
|
+
valueCell.text = newValue;
|
|
5468
|
+
}
|
|
5469
|
+
matchedLabels.add(matchKey);
|
|
5470
|
+
filled.push({
|
|
5471
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
5472
|
+
value: newValue,
|
|
5473
|
+
row: r,
|
|
5474
|
+
col: c
|
|
5475
|
+
});
|
|
5476
|
+
}
|
|
5477
|
+
}
|
|
5478
|
+
if (table.rows >= 2 && table.cols >= 2) {
|
|
5479
|
+
const headerRow = table.cells[0];
|
|
5480
|
+
const allLabels = headerRow.every((cell) => {
|
|
5481
|
+
const t = cell.text.trim();
|
|
5482
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
5483
|
+
});
|
|
5484
|
+
if (!allLabels) return;
|
|
5485
|
+
for (let r = 1; r < table.rows; r++) {
|
|
5486
|
+
for (let c = 0; c < table.cols; c++) {
|
|
5487
|
+
const headerLabel = normalizeLabel(headerRow[c].text);
|
|
5488
|
+
const matchKey = findMatchingKey(headerLabel, values);
|
|
5489
|
+
if (matchKey === void 0) continue;
|
|
5490
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
5491
|
+
const newValue = values.get(matchKey);
|
|
5492
|
+
table.cells[r][c].text = newValue;
|
|
5493
|
+
matchedLabels.add(matchKey);
|
|
5494
|
+
filled.push({
|
|
5495
|
+
label: headerRow[c].text.trim(),
|
|
5496
|
+
value: newValue,
|
|
5497
|
+
row: r,
|
|
5498
|
+
col: c
|
|
5499
|
+
});
|
|
5500
|
+
}
|
|
5501
|
+
}
|
|
5502
|
+
}
|
|
5503
|
+
}
|
|
5504
|
+
function fillInlineFields(text, values, filled, matchedLabels) {
|
|
5505
|
+
return text.replace(
|
|
5506
|
+
/([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
|
|
5507
|
+
(match, rawLabel, _oldValue) => {
|
|
5508
|
+
const normalized = normalizeLabel(rawLabel);
|
|
5509
|
+
const matchKey = findMatchingKey(normalized, values);
|
|
5510
|
+
if (matchKey === void 0) return match;
|
|
5511
|
+
const newValue = values.get(matchKey);
|
|
5512
|
+
matchedLabels.add(matchKey);
|
|
5513
|
+
filled.push({
|
|
5514
|
+
label: rawLabel.trim(),
|
|
5515
|
+
value: newValue,
|
|
5516
|
+
row: -1,
|
|
5517
|
+
col: -1
|
|
5518
|
+
});
|
|
5519
|
+
return `${rawLabel}: ${newValue}`;
|
|
5520
|
+
}
|
|
5521
|
+
);
|
|
5522
|
+
}
|
|
5523
|
+
|
|
5524
|
+
// src/form/filler-hwpx.ts
|
|
5525
|
+
import JSZip2 from "jszip";
|
|
5526
|
+
import { DOMParser as DOMParser2, XMLSerializer } from "@xmldom/xmldom";
|
|
5527
|
+
async function fillHwpx(hwpxBuffer, values) {
|
|
5528
|
+
const zip = await JSZip2.loadAsync(hwpxBuffer);
|
|
5529
|
+
const filled = [];
|
|
5530
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
5531
|
+
const normalizedValues = normalizeValues(values);
|
|
5532
|
+
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
5533
|
+
if (sectionFiles.length === 0) {
|
|
5534
|
+
throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
5535
|
+
}
|
|
5536
|
+
const xmlParser = new DOMParser2();
|
|
5537
|
+
const xmlSerializer = new XMLSerializer();
|
|
5538
|
+
for (const sectionPath of sectionFiles) {
|
|
5539
|
+
const zipEntry = zip.file(sectionPath);
|
|
5540
|
+
if (!zipEntry) continue;
|
|
5541
|
+
const rawXml = await zipEntry.async("text");
|
|
5542
|
+
const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
|
|
5543
|
+
if (!doc.documentElement) continue;
|
|
5544
|
+
let modified = false;
|
|
5545
|
+
const tables = findAllElements(doc.documentElement, "tbl");
|
|
5546
|
+
const cellPatternApplied = /* @__PURE__ */ new Set();
|
|
5547
|
+
for (const tblEl of tables) {
|
|
5548
|
+
const allCells = findAllElements(tblEl, "tc");
|
|
5549
|
+
for (const tcEl of allCells) {
|
|
5550
|
+
const tNodes = collectCellTextNodes(tcEl);
|
|
5551
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
5552
|
+
const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
|
|
5553
|
+
if (!result) continue;
|
|
5554
|
+
applyTextReplacements(tNodes, fullText, result.text);
|
|
5555
|
+
cellPatternApplied.add(tcEl);
|
|
5556
|
+
for (const m of result.matches) {
|
|
5557
|
+
filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
|
|
5558
|
+
}
|
|
5559
|
+
modified = true;
|
|
5560
|
+
}
|
|
5561
|
+
}
|
|
5562
|
+
for (const tblEl of tables) {
|
|
5563
|
+
const rows = findDirectChildren(tblEl, "tr");
|
|
5564
|
+
for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
|
|
5565
|
+
const trEl = rows[rowIdx];
|
|
5566
|
+
const cells = findDirectChildren(trEl, "tc");
|
|
5567
|
+
for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
|
|
5568
|
+
const labelText = extractCellText(cells[colIdx]);
|
|
5569
|
+
if (!isLabelCell(labelText)) continue;
|
|
5570
|
+
const valueCell = cells[colIdx + 1];
|
|
5571
|
+
const valueText = extractCellText(valueCell);
|
|
5572
|
+
if (isKeywordLabel(valueText)) continue;
|
|
5573
|
+
const normalizedCellLabel = normalizeLabel(labelText);
|
|
5574
|
+
if (!normalizedCellLabel) continue;
|
|
5575
|
+
const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
|
|
5576
|
+
if (matchKey === void 0) continue;
|
|
5577
|
+
const newValue = normalizedValues.get(matchKey);
|
|
5578
|
+
if (cellPatternApplied.has(valueCell)) {
|
|
5579
|
+
prependCellText(valueCell, newValue);
|
|
5580
|
+
} else {
|
|
5581
|
+
replaceCellText(valueCell, newValue);
|
|
5582
|
+
}
|
|
5583
|
+
matchedLabels.add(matchKey);
|
|
5584
|
+
filled.push({
|
|
5585
|
+
label: labelText.trim().replace(/[::]\s*$/, ""),
|
|
5586
|
+
value: newValue,
|
|
5587
|
+
row: rowIdx,
|
|
5588
|
+
col: colIdx
|
|
5589
|
+
});
|
|
5590
|
+
modified = true;
|
|
5591
|
+
}
|
|
5592
|
+
}
|
|
5593
|
+
if (rows.length >= 2) {
|
|
5594
|
+
const headerCells = findDirectChildren(rows[0], "tc");
|
|
5595
|
+
const allLabels = headerCells.every((cell) => {
|
|
5596
|
+
const t = extractCellText(cell).trim();
|
|
5597
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
5598
|
+
});
|
|
5599
|
+
if (allLabels) {
|
|
5600
|
+
for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
|
|
5601
|
+
const dataCells = findDirectChildren(rows[rowIdx], "tc");
|
|
5602
|
+
for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
|
|
5603
|
+
const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
|
|
5604
|
+
const matchKey = findMatchingKey(headerLabel, normalizedValues);
|
|
5605
|
+
if (matchKey === void 0) continue;
|
|
5606
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
5607
|
+
const newValue = normalizedValues.get(matchKey);
|
|
5608
|
+
replaceCellText(dataCells[colIdx], newValue);
|
|
5609
|
+
matchedLabels.add(matchKey);
|
|
5610
|
+
filled.push({
|
|
5611
|
+
label: extractCellText(headerCells[colIdx]).trim(),
|
|
5612
|
+
value: newValue,
|
|
5613
|
+
row: rowIdx,
|
|
5614
|
+
col: colIdx
|
|
5615
|
+
});
|
|
5616
|
+
modified = true;
|
|
5617
|
+
}
|
|
5618
|
+
}
|
|
5619
|
+
}
|
|
5620
|
+
}
|
|
5621
|
+
}
|
|
5622
|
+
const allParagraphs = findAllElements(doc.documentElement, "p");
|
|
5623
|
+
for (const pEl of allParagraphs) {
|
|
5624
|
+
if (isInsideTable(pEl)) continue;
|
|
5625
|
+
const tNodes = collectTextNodes(pEl);
|
|
5626
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
5627
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
|
|
5628
|
+
let match;
|
|
5629
|
+
while ((match = pattern.exec(fullText)) !== null) {
|
|
5630
|
+
const rawLabel = match[1];
|
|
5631
|
+
const normalized = normalizeLabel(rawLabel);
|
|
5632
|
+
const matchKey = findMatchingKey(normalized, normalizedValues);
|
|
5633
|
+
if (matchKey === void 0) continue;
|
|
5634
|
+
const newValue = normalizedValues.get(matchKey);
|
|
5635
|
+
const valueStart = match.index + match[0].length - match[2].length;
|
|
5636
|
+
const valueEnd = match.index + match[0].length;
|
|
5637
|
+
replaceTextRange(tNodes, valueStart, valueEnd, newValue);
|
|
5638
|
+
matchedLabels.add(matchKey);
|
|
5639
|
+
filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
|
|
5640
|
+
modified = true;
|
|
5641
|
+
break;
|
|
5642
|
+
}
|
|
5643
|
+
}
|
|
5644
|
+
if (modified) {
|
|
5645
|
+
const newXml = xmlSerializer.serializeToString(doc);
|
|
5646
|
+
zip.file(sectionPath, newXml);
|
|
5647
|
+
}
|
|
5648
|
+
}
|
|
5649
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
5650
|
+
const buffer = await zip.generateAsync({ type: "arraybuffer" });
|
|
5651
|
+
return { buffer, filled, unmatched };
|
|
5652
|
+
}
|
|
5653
|
+
function localName(el) {
|
|
5654
|
+
return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
|
|
5655
|
+
}
|
|
5656
|
+
function findAllElements(node, tagLocalName) {
|
|
5657
|
+
const result = [];
|
|
5658
|
+
const walk = (n) => {
|
|
5659
|
+
const children = n.childNodes;
|
|
5660
|
+
if (!children) return;
|
|
5661
|
+
for (let i = 0; i < children.length; i++) {
|
|
5662
|
+
const child = children[i];
|
|
5663
|
+
if (child.nodeType !== 1) continue;
|
|
5664
|
+
if (localName(child) === tagLocalName) result.push(child);
|
|
5665
|
+
walk(child);
|
|
5666
|
+
}
|
|
5667
|
+
};
|
|
5668
|
+
walk(node);
|
|
5669
|
+
return result;
|
|
5670
|
+
}
|
|
5671
|
+
function findDirectChildren(parent, tagLocalName) {
|
|
5672
|
+
const result = [];
|
|
5673
|
+
const children = parent.childNodes;
|
|
5674
|
+
if (!children) return result;
|
|
5675
|
+
for (let i = 0; i < children.length; i++) {
|
|
5676
|
+
const child = children[i];
|
|
5677
|
+
if (child.nodeType === 1 && localName(child) === tagLocalName) {
|
|
5678
|
+
result.push(child);
|
|
5679
|
+
}
|
|
5680
|
+
}
|
|
5681
|
+
return result;
|
|
5682
|
+
}
|
|
5683
|
+
function isInsideTable(el) {
|
|
5684
|
+
let parent = el.parentNode;
|
|
5685
|
+
while (parent) {
|
|
5686
|
+
if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
|
|
5687
|
+
parent = parent.parentNode;
|
|
5688
|
+
}
|
|
5689
|
+
return false;
|
|
5690
|
+
}
|
|
5691
|
+
function extractCellText(tcEl) {
|
|
5692
|
+
const parts = [];
|
|
5693
|
+
const walk = (node) => {
|
|
5694
|
+
const children = node.childNodes;
|
|
5695
|
+
if (!children) return;
|
|
5696
|
+
for (let i = 0; i < children.length; i++) {
|
|
5697
|
+
const child = children[i];
|
|
5698
|
+
if (child.nodeType === 3) {
|
|
5699
|
+
parts.push(child.textContent || "");
|
|
5700
|
+
} else if (child.nodeType === 1) {
|
|
5701
|
+
const tag = localName(child);
|
|
5702
|
+
if (tag === "t") walk(child);
|
|
5703
|
+
else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
|
|
5704
|
+
else if (tag === "tab") parts.push(" ");
|
|
5705
|
+
else if (tag === "br") parts.push("\n");
|
|
5706
|
+
}
|
|
5707
|
+
}
|
|
5708
|
+
};
|
|
5709
|
+
walk(tcEl);
|
|
5710
|
+
return parts.join("");
|
|
5711
|
+
}
|
|
5712
|
+
function prependCellText(tcEl, text) {
|
|
5713
|
+
const tElements = findAllElements(tcEl, "t");
|
|
5714
|
+
if (tElements.length === 0) return;
|
|
5715
|
+
const firstT = tElements[0];
|
|
5716
|
+
const existing = firstT.textContent || "";
|
|
5717
|
+
clearChildren(firstT);
|
|
5718
|
+
firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
|
|
5719
|
+
}
|
|
5720
|
+
function replaceCellText(tcEl, newValue) {
|
|
5721
|
+
const paragraphs = findAllElements(tcEl, "p");
|
|
5722
|
+
if (paragraphs.length === 0) return;
|
|
5723
|
+
const firstP = paragraphs[0];
|
|
5724
|
+
const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
|
|
5725
|
+
if (runs.length > 0) {
|
|
5726
|
+
setRunText(runs[0], newValue);
|
|
5727
|
+
for (let i = 1; i < runs.length; i++) {
|
|
5728
|
+
setRunText(runs[i], "");
|
|
5729
|
+
}
|
|
5730
|
+
} else {
|
|
5731
|
+
const tElements = findAllElements(firstP, "t");
|
|
5732
|
+
if (tElements.length > 0) {
|
|
5733
|
+
clearChildren(tElements[0]);
|
|
5734
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
|
|
5735
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
5736
|
+
clearChildren(tElements[i]);
|
|
5737
|
+
}
|
|
5738
|
+
}
|
|
5739
|
+
}
|
|
5740
|
+
for (let i = 1; i < paragraphs.length; i++) {
|
|
5741
|
+
const p = paragraphs[i];
|
|
5742
|
+
if (p.parentNode) {
|
|
5743
|
+
const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
|
|
5744
|
+
for (const run of pRuns) setRunText(run, "");
|
|
5745
|
+
const pTs = findAllElements(p, "t");
|
|
5746
|
+
for (const t of pTs) clearChildren(t);
|
|
5747
|
+
}
|
|
5748
|
+
}
|
|
5749
|
+
}
|
|
5750
|
+
function setRunText(runEl, text) {
|
|
5751
|
+
const tElements = findAllElements(runEl, "t");
|
|
5752
|
+
if (tElements.length > 0) {
|
|
5753
|
+
clearChildren(tElements[0]);
|
|
5754
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
|
|
5755
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
5756
|
+
clearChildren(tElements[i]);
|
|
5757
|
+
}
|
|
5758
|
+
}
|
|
5759
|
+
}
|
|
5760
|
+
function clearChildren(el) {
|
|
5761
|
+
while (el.firstChild) el.removeChild(el.firstChild);
|
|
5762
|
+
}
|
|
5763
|
+
function collectTextNodes(pEl) {
|
|
5764
|
+
const tElements = findAllElements(pEl, "t");
|
|
5765
|
+
const result = [];
|
|
5766
|
+
let offset = 0;
|
|
5767
|
+
for (const t of tElements) {
|
|
5768
|
+
const text = t.textContent || "";
|
|
5769
|
+
result.push({ element: t, text, offset });
|
|
5770
|
+
offset += text.length;
|
|
5771
|
+
}
|
|
5772
|
+
return result;
|
|
5773
|
+
}
|
|
5774
|
+
function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
|
|
5775
|
+
let replaced = false;
|
|
5776
|
+
for (const node of tNodes) {
|
|
5777
|
+
const nodeStart = node.offset;
|
|
5778
|
+
const nodeEnd = node.offset + node.text.length;
|
|
5779
|
+
if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
|
|
5780
|
+
const localStart = Math.max(0, globalStart - nodeStart);
|
|
5781
|
+
const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
|
|
5782
|
+
if (!replaced) {
|
|
5783
|
+
const before = node.text.slice(0, localStart);
|
|
5784
|
+
const after = node.text.slice(localEnd);
|
|
5785
|
+
const newText = before + newValue + after;
|
|
5786
|
+
clearChildren(node.element);
|
|
5787
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
5788
|
+
replaced = true;
|
|
5789
|
+
} else {
|
|
5790
|
+
const before = node.text.slice(0, localStart);
|
|
5791
|
+
const after = node.text.slice(localEnd);
|
|
5792
|
+
const newText = before + after;
|
|
5793
|
+
clearChildren(node.element);
|
|
5794
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
5795
|
+
}
|
|
5796
|
+
}
|
|
5797
|
+
}
|
|
5798
|
+
function collectCellTextNodes(tcEl) {
|
|
5799
|
+
const tElements = findAllElements(tcEl, "t");
|
|
5800
|
+
const result = [];
|
|
5801
|
+
let offset = 0;
|
|
5802
|
+
for (const t of tElements) {
|
|
5803
|
+
const text = t.textContent || "";
|
|
5804
|
+
result.push({ element: t, text, offset });
|
|
5805
|
+
offset += text.length;
|
|
5806
|
+
}
|
|
5807
|
+
return result;
|
|
5808
|
+
}
|
|
5809
|
+
function applyTextReplacements(tNodes, originalFull, replacedFull) {
|
|
5810
|
+
if (originalFull === replacedFull) return;
|
|
5811
|
+
if (tNodes.length === 1) {
|
|
5812
|
+
clearChildren(tNodes[0].element);
|
|
5813
|
+
tNodes[0].element.appendChild(
|
|
5814
|
+
tNodes[0].element.ownerDocument.createTextNode(replacedFull)
|
|
5815
|
+
);
|
|
5816
|
+
return;
|
|
5817
|
+
}
|
|
5818
|
+
let diffStart = 0;
|
|
5819
|
+
while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
|
|
5820
|
+
diffStart++;
|
|
5821
|
+
}
|
|
5822
|
+
let diffEndOrig = originalFull.length;
|
|
5823
|
+
let diffEndRepl = replacedFull.length;
|
|
5824
|
+
while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
|
|
5825
|
+
diffEndOrig--;
|
|
5826
|
+
diffEndRepl--;
|
|
5827
|
+
}
|
|
5828
|
+
const newPart = replacedFull.slice(diffStart, diffEndRepl);
|
|
5829
|
+
replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
|
|
5830
|
+
}
|
|
5831
|
+
|
|
5832
|
+
// src/hwpx/generator.ts
|
|
5833
|
+
import JSZip3 from "jszip";
|
|
5834
|
+
var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
|
|
5835
|
+
var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
|
|
5836
|
+
var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
|
|
5837
|
+
var NS_OPF = "http://www.idpf.org/2007/opf/";
|
|
5838
|
+
var NS_HPF = "http://www.hancom.co.kr/schema/2011/hpf";
|
|
5839
|
+
var NS_OCF = "urn:oasis:names:tc:opendocument:xmlns:container";
|
|
5840
|
+
var CHAR_NORMAL = 0;
|
|
5841
|
+
var CHAR_BOLD = 1;
|
|
5842
|
+
var CHAR_ITALIC = 2;
|
|
5843
|
+
var CHAR_BOLD_ITALIC = 3;
|
|
5844
|
+
var CHAR_CODE = 4;
|
|
5845
|
+
var CHAR_H1 = 5;
|
|
5846
|
+
var CHAR_H2 = 6;
|
|
5847
|
+
var CHAR_H3 = 7;
|
|
5848
|
+
var CHAR_H4 = 8;
|
|
5849
|
+
var PARA_NORMAL = 0;
|
|
5850
|
+
var PARA_H1 = 1;
|
|
5851
|
+
var PARA_H2 = 2;
|
|
5852
|
+
var PARA_H3 = 3;
|
|
5853
|
+
var PARA_H4 = 4;
|
|
5854
|
+
var PARA_CODE = 5;
|
|
5855
|
+
var PARA_QUOTE = 6;
|
|
5856
|
+
var PARA_LIST = 7;
|
|
5857
|
+
async function markdownToHwpx(markdown) {
|
|
5858
|
+
const blocks = parseMarkdownToBlocks(markdown);
|
|
5859
|
+
const sectionXml = blocksToSectionXml(blocks);
|
|
5860
|
+
const zip = new JSZip3();
|
|
5861
|
+
zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
|
|
5862
|
+
zip.file("META-INF/container.xml", generateContainerXml());
|
|
5863
|
+
zip.file("Contents/content.hpf", generateManifest());
|
|
5864
|
+
zip.file("Contents/header.xml", generateHeaderXml());
|
|
5865
|
+
zip.file("Contents/section0.xml", sectionXml);
|
|
5866
|
+
return await zip.generateAsync({ type: "arraybuffer" });
|
|
5867
|
+
}
|
|
5868
|
+
function parseMarkdownToBlocks(md) {
|
|
5869
|
+
const lines = md.split("\n");
|
|
5870
|
+
const blocks = [];
|
|
5871
|
+
let i = 0;
|
|
5872
|
+
while (i < lines.length) {
|
|
5873
|
+
const line = lines[i];
|
|
5874
|
+
if (!line.trim()) {
|
|
5875
|
+
i++;
|
|
5876
|
+
continue;
|
|
5877
|
+
}
|
|
5878
|
+
const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/);
|
|
5879
|
+
if (fenceMatch) {
|
|
5880
|
+
const fence = fenceMatch[1];
|
|
5881
|
+
const lang = fenceMatch[2].trim();
|
|
5882
|
+
const codeLines = [];
|
|
5883
|
+
i++;
|
|
5884
|
+
while (i < lines.length && !lines[i].startsWith(fence)) {
|
|
5885
|
+
codeLines.push(lines[i]);
|
|
5886
|
+
i++;
|
|
5887
|
+
}
|
|
5888
|
+
if (i < lines.length) i++;
|
|
5889
|
+
blocks.push({ type: "code_block", text: codeLines.join("\n"), lang });
|
|
5890
|
+
continue;
|
|
5891
|
+
}
|
|
5892
|
+
if (/^(\*{3,}|-{3,}|_{3,})\s*$/.test(line.trim())) {
|
|
5893
|
+
blocks.push({ type: "hr" });
|
|
5894
|
+
i++;
|
|
5895
|
+
continue;
|
|
5896
|
+
}
|
|
5897
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
5898
|
+
if (headingMatch) {
|
|
5899
|
+
blocks.push({ type: "heading", text: headingMatch[2].trim(), level: headingMatch[1].length });
|
|
5900
|
+
i++;
|
|
5901
|
+
continue;
|
|
5902
|
+
}
|
|
5903
|
+
if (line.trimStart().startsWith("|")) {
|
|
5904
|
+
const tableRows = [];
|
|
5905
|
+
while (i < lines.length && lines[i].trimStart().startsWith("|")) {
|
|
5906
|
+
const row = lines[i];
|
|
5907
|
+
if (/^[\s|:\-]+$/.test(row)) {
|
|
5908
|
+
i++;
|
|
5909
|
+
continue;
|
|
5910
|
+
}
|
|
5911
|
+
const cells = row.split("|").slice(1, -1).map((c) => c.trim());
|
|
5912
|
+
if (cells.length > 0) tableRows.push(cells);
|
|
5913
|
+
i++;
|
|
5914
|
+
}
|
|
5915
|
+
if (tableRows.length > 0) blocks.push({ type: "table", rows: tableRows });
|
|
5916
|
+
continue;
|
|
5917
|
+
}
|
|
5918
|
+
if (line.trimStart().startsWith("> ")) {
|
|
5919
|
+
const quoteLines = [];
|
|
5920
|
+
while (i < lines.length && (lines[i].trimStart().startsWith("> ") || lines[i].trimStart().startsWith(">"))) {
|
|
5921
|
+
quoteLines.push(lines[i].replace(/^>\s?/, ""));
|
|
5922
|
+
i++;
|
|
5923
|
+
}
|
|
5924
|
+
for (const ql of quoteLines) {
|
|
5925
|
+
blocks.push({ type: "blockquote", text: ql.trim() || "" });
|
|
5926
|
+
}
|
|
5927
|
+
continue;
|
|
5928
|
+
}
|
|
5929
|
+
const listMatch = line.match(/^(\s*)([-*+]|\d+[.)]) (.+)$/);
|
|
5930
|
+
if (listMatch) {
|
|
5931
|
+
const indent = Math.floor(listMatch[1].length / 2);
|
|
5932
|
+
const ordered = /\d/.test(listMatch[2]);
|
|
5933
|
+
blocks.push({ type: "list_item", text: listMatch[3].trim(), ordered, indent });
|
|
5934
|
+
i++;
|
|
5935
|
+
continue;
|
|
5936
|
+
}
|
|
5937
|
+
blocks.push({ type: "paragraph", text: line.trim() });
|
|
5938
|
+
i++;
|
|
5939
|
+
}
|
|
5940
|
+
return blocks;
|
|
5941
|
+
}
|
|
5942
|
+
function parseInlineMarkdown(text) {
|
|
5943
|
+
text = text.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
|
|
5944
|
+
text = text.replace(/\[([^\]]*)\]\(([^)]*)\)/g, (_, t, u) => t || u);
|
|
5945
|
+
text = text.replace(/~~([^~]+)~~/g, "$1");
|
|
5946
|
+
const spans = [];
|
|
5947
|
+
const regex = /(`[^`]+`|\*{3}[^*]+\*{3}|\*{2}[^*]+\*{2}|\*[^*]+\*|_{2}[^_]+_{2}|_[^_]+_)/g;
|
|
5948
|
+
let lastIdx = 0;
|
|
5949
|
+
for (const match of text.matchAll(regex)) {
|
|
5950
|
+
const idx = match.index;
|
|
5951
|
+
if (idx > lastIdx) {
|
|
5952
|
+
spans.push({ text: text.slice(lastIdx, idx), bold: false, italic: false, code: false });
|
|
5953
|
+
}
|
|
5954
|
+
const raw = match[0];
|
|
5955
|
+
if (raw.startsWith("`")) {
|
|
5956
|
+
spans.push({ text: raw.slice(1, -1), bold: false, italic: false, code: true });
|
|
5957
|
+
} else if (raw.startsWith("***") || raw.startsWith("___")) {
|
|
5958
|
+
spans.push({ text: raw.slice(3, -3), bold: true, italic: true, code: false });
|
|
5959
|
+
} else if (raw.startsWith("**") || raw.startsWith("__")) {
|
|
5960
|
+
spans.push({ text: raw.slice(2, -2), bold: true, italic: false, code: false });
|
|
5961
|
+
} else {
|
|
5962
|
+
spans.push({ text: raw.slice(1, -1), bold: false, italic: true, code: false });
|
|
5963
|
+
}
|
|
5964
|
+
lastIdx = idx + raw.length;
|
|
5965
|
+
}
|
|
5966
|
+
if (lastIdx < text.length) {
|
|
5967
|
+
spans.push({ text: text.slice(lastIdx), bold: false, italic: false, code: false });
|
|
5968
|
+
}
|
|
5969
|
+
if (spans.length === 0) {
|
|
5970
|
+
spans.push({ text, bold: false, italic: false, code: false });
|
|
5971
|
+
}
|
|
5972
|
+
return spans;
|
|
5973
|
+
}
|
|
5974
|
+
function spanToCharPrId(span) {
|
|
5975
|
+
if (span.code) return CHAR_CODE;
|
|
5976
|
+
if (span.bold && span.italic) return CHAR_BOLD_ITALIC;
|
|
5977
|
+
if (span.bold) return CHAR_BOLD;
|
|
5978
|
+
if (span.italic) return CHAR_ITALIC;
|
|
5979
|
+
return CHAR_NORMAL;
|
|
5980
|
+
}
|
|
5981
|
+
function escapeXml(text) {
|
|
5982
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
5983
|
+
}
|
|
5984
|
+
function generateRuns(text, defaultCharPr = CHAR_NORMAL) {
|
|
5985
|
+
const spans = parseInlineMarkdown(text);
|
|
5986
|
+
return spans.map((span) => {
|
|
5987
|
+
const charId = span.code || span.bold || span.italic ? spanToCharPrId(span) : defaultCharPr;
|
|
5988
|
+
return `<hp:run charPrIDRef="${charId}"><hp:t>${escapeXml(span.text)}</hp:t></hp:run>`;
|
|
5989
|
+
}).join("");
|
|
5990
|
+
}
|
|
5991
|
+
function generateParagraph(text, paraPrId = PARA_NORMAL, charPrId = CHAR_NORMAL) {
|
|
5992
|
+
if (paraPrId === PARA_CODE) {
|
|
5993
|
+
return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0"><hp:run charPrIDRef="${CHAR_CODE}"><hp:t>${escapeXml(text)}</hp:t></hp:run></hp:p>`;
|
|
5994
|
+
}
|
|
5995
|
+
const runs = generateRuns(text, charPrId);
|
|
5996
|
+
return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0">${runs}</hp:p>`;
|
|
5997
|
+
}
|
|
5998
|
+
function headingParaPrId(level) {
|
|
5999
|
+
if (level === 1) return PARA_H1;
|
|
6000
|
+
if (level === 2) return PARA_H2;
|
|
6001
|
+
if (level === 3) return PARA_H3;
|
|
6002
|
+
return PARA_H4;
|
|
6003
|
+
}
|
|
6004
|
+
function headingCharPrId(level) {
|
|
6005
|
+
if (level === 1) return CHAR_H1;
|
|
6006
|
+
if (level === 2) return CHAR_H2;
|
|
6007
|
+
if (level === 3) return CHAR_H3;
|
|
6008
|
+
return CHAR_H4;
|
|
6009
|
+
}
|
|
6010
|
+
function generateContainerXml() {
|
|
6011
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6012
|
+
<ocf:container xmlns:ocf="${NS_OCF}" xmlns:hpf="${NS_HPF}">
|
|
6013
|
+
<ocf:rootfiles>
|
|
6014
|
+
<ocf:rootfile full-path="Contents/content.hpf" media-type="application/hwpml-package+xml"/>
|
|
6015
|
+
</ocf:rootfiles>
|
|
6016
|
+
</ocf:container>`;
|
|
6017
|
+
}
|
|
6018
|
+
function generateManifest() {
|
|
6019
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6020
|
+
<opf:package xmlns:opf="${NS_OPF}" xmlns:hpf="${NS_HPF}" xmlns:hh="${NS_HEAD}">
|
|
6021
|
+
<opf:manifest>
|
|
6022
|
+
<opf:item id="header" href="Contents/header.xml" media-type="application/xml"/>
|
|
6023
|
+
<opf:item id="section0" href="Contents/section0.xml" media-type="application/xml"/>
|
|
6024
|
+
</opf:manifest>
|
|
6025
|
+
<opf:spine>
|
|
6026
|
+
<opf:itemref idref="header" linear="no"/>
|
|
6027
|
+
<opf:itemref idref="section0" linear="yes"/>
|
|
6028
|
+
</opf:spine>
|
|
6029
|
+
</opf:package>`;
|
|
6030
|
+
}
|
|
6031
|
+
function charPr(id, height, bold, italic, fontId = 0) {
|
|
6032
|
+
const boldAttr = bold ? ` bold="1"` : "";
|
|
6033
|
+
const italicAttr = italic ? ` italic="1"` : "";
|
|
6034
|
+
return ` <hh:charPr id="${id}" height="${height}" textColor="#000000" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0"${boldAttr}${italicAttr}>
|
|
6035
|
+
<hh:fontRef hangul="${fontId}" latin="${fontId}" hanja="${fontId}" japanese="${fontId}" other="${fontId}" symbol="${fontId}" user="${fontId}"/>
|
|
6036
|
+
<hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
|
|
6037
|
+
<hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
|
|
6038
|
+
<hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
|
|
6039
|
+
<hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
|
|
6040
|
+
</hh:charPr>`;
|
|
6041
|
+
}
|
|
6042
|
+
function paraPr(id, opts = {}) {
|
|
6043
|
+
const { align = "JUSTIFY", spaceBefore = 0, spaceAfter = 0, lineSpacing = 160, indent = 0 } = opts;
|
|
6044
|
+
return ` <hh:paraPr id="${id}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="1" suppressLineNumbers="0" checked="0" textDir="AUTO">
|
|
6045
|
+
<hh:align horizontal="${align}" vertical="BASELINE"/>
|
|
6046
|
+
<hh:heading type="NONE" idRef="0" level="0"/>
|
|
6047
|
+
<hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="BREAK_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/>
|
|
6048
|
+
<hh:autoSpacing eAsianEng="0" eAsianNum="0"/>
|
|
6049
|
+
<hh:margin indent="${indent}" left="0" right="0" prev="${spaceBefore}" next="${spaceAfter}"/>
|
|
6050
|
+
<hh:lineSpacing type="PERCENT" value="${lineSpacing}"/>
|
|
6051
|
+
<hh:border borderFillIDRef="0" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/>
|
|
6052
|
+
</hh:paraPr>`;
|
|
6053
|
+
}
|
|
6054
|
+
function generateHeaderXml() {
|
|
6055
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6056
|
+
<hh:head xmlns:hh="${NS_HEAD}" xmlns:hp="${NS_PARA}" version="1.4" secCnt="1">
|
|
6057
|
+
<hh:beginNum page="1" footnote="1" endnote="1" pic="1" tbl="1" equation="1"/>
|
|
6058
|
+
<hh:refList>
|
|
6059
|
+
<hh:fontfaces itemCnt="7">
|
|
6060
|
+
<hh:fontface lang="HANGUL" fontCnt="2">
|
|
6061
|
+
<hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
|
|
6062
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6063
|
+
</hh:font>
|
|
6064
|
+
<hh:font id="1" face="\uD568\uCD08\uB86C\uB3CB\uC6C0" type="TTF" isEmbedded="0">
|
|
6065
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6066
|
+
</hh:font>
|
|
6067
|
+
</hh:fontface>
|
|
6068
|
+
<hh:fontface lang="LATIN" fontCnt="2">
|
|
6069
|
+
<hh:font id="0" face="Times New Roman" type="TTF" isEmbedded="0">
|
|
6070
|
+
<hh:typeInfo familyType="FCAT_OLDSTYLE" weight="5" proportion="4" contrast="2" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="4"/>
|
|
6071
|
+
</hh:font>
|
|
6072
|
+
<hh:font id="1" face="Consolas" type="TTF" isEmbedded="0">
|
|
6073
|
+
<hh:typeInfo familyType="FCAT_MODERN" weight="5" proportion="0" contrast="0" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="0"/>
|
|
6074
|
+
</hh:font>
|
|
6075
|
+
</hh:fontface>
|
|
6076
|
+
<hh:fontface lang="HANJA" fontCnt="1">
|
|
6077
|
+
<hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
|
|
6078
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6079
|
+
</hh:font>
|
|
6080
|
+
</hh:fontface>
|
|
6081
|
+
<hh:fontface lang="JAPANESE" fontCnt="1">
|
|
6082
|
+
<hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
|
|
6083
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6084
|
+
</hh:font>
|
|
6085
|
+
</hh:fontface>
|
|
6086
|
+
<hh:fontface lang="OTHER" fontCnt="1">
|
|
6087
|
+
<hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
|
|
6088
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6089
|
+
</hh:font>
|
|
6090
|
+
</hh:fontface>
|
|
6091
|
+
<hh:fontface lang="SYMBOL" fontCnt="1">
|
|
6092
|
+
<hh:font id="0" face="Symbol" type="TTF" isEmbedded="0">
|
|
6093
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6094
|
+
</hh:font>
|
|
6095
|
+
</hh:fontface>
|
|
6096
|
+
<hh:fontface lang="USER" fontCnt="1">
|
|
6097
|
+
<hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
|
|
6098
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6099
|
+
</hh:font>
|
|
6100
|
+
</hh:fontface>
|
|
6101
|
+
</hh:fontfaces>
|
|
6102
|
+
<hh:borderFills itemCnt="1">
|
|
6103
|
+
<hh:borderFill id="0" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
|
|
6104
|
+
<hh:slash type="NONE" Crooked="0" isCounter="0"/>
|
|
6105
|
+
<hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
|
|
6106
|
+
<hh:leftBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6107
|
+
<hh:rightBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6108
|
+
<hh:topBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6109
|
+
<hh:bottomBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6110
|
+
<hh:diagonal type="NONE" width="0.1mm" color="#000000"/>
|
|
6111
|
+
<hh:fillInfo/>
|
|
6112
|
+
</hh:borderFill>
|
|
6113
|
+
</hh:borderFills>
|
|
6114
|
+
<hh:charProperties itemCnt="9">
|
|
6115
|
+
${charPr(0, 1e3, false, false)}
|
|
6116
|
+
${charPr(1, 1e3, true, false)}
|
|
6117
|
+
${charPr(2, 1e3, false, true)}
|
|
6118
|
+
${charPr(3, 1e3, true, true)}
|
|
6119
|
+
${charPr(4, 900, false, false, 1)}
|
|
6120
|
+
${charPr(5, 1800, true, false, 1)}
|
|
6121
|
+
${charPr(6, 1400, true, false, 1)}
|
|
6122
|
+
${charPr(7, 1200, true, false, 1)}
|
|
6123
|
+
${charPr(8, 1100, true, false, 1)}
|
|
6124
|
+
</hh:charProperties>
|
|
6125
|
+
<hh:tabProperties itemCnt="0"/>
|
|
6126
|
+
<hh:numberings itemCnt="0"/>
|
|
6127
|
+
<hh:bullets itemCnt="0"/>
|
|
6128
|
+
<hh:paraProperties itemCnt="8">
|
|
6129
|
+
${paraPr(0)}
|
|
6130
|
+
${paraPr(1, { align: "LEFT", spaceBefore: 800, spaceAfter: 200, lineSpacing: 180 })}
|
|
6131
|
+
${paraPr(2, { align: "LEFT", spaceBefore: 600, spaceAfter: 150, lineSpacing: 170 })}
|
|
6132
|
+
${paraPr(3, { align: "LEFT", spaceBefore: 400, spaceAfter: 100, lineSpacing: 160 })}
|
|
6133
|
+
${paraPr(4, { align: "LEFT", spaceBefore: 300, spaceAfter: 100, lineSpacing: 160 })}
|
|
6134
|
+
${paraPr(5, { align: "LEFT", lineSpacing: 130, indent: 400 })}
|
|
6135
|
+
${paraPr(6, { align: "LEFT", lineSpacing: 150, indent: 600 })}
|
|
6136
|
+
${paraPr(7, { align: "LEFT", lineSpacing: 160, indent: 600 })}
|
|
6137
|
+
</hh:paraProperties>
|
|
6138
|
+
<hh:styles itemCnt="1">
|
|
6139
|
+
<hh:style id="0" type="PARA" name="\uBC14\uD0D5\uAE00" engName="Normal" paraPrIDRef="0" charPrIDRef="0" nextStyleIDRef="0" langIDRef="1042" lockForm="0"/>
|
|
6140
|
+
</hh:styles>
|
|
6141
|
+
</hh:refList>
|
|
6142
|
+
<hh:compatibleDocument targetProgram="HWP2018"/>
|
|
6143
|
+
</hh:head>`;
|
|
6144
|
+
}
|
|
6145
|
+
function generateSecPr() {
|
|
6146
|
+
return `<hp:secPr textDirection="HORIZONTAL" spaceColumns="1134" tabStop="8000" outlineShapeIDRef="0" memoShapeIDRef="0" textVerticalWidthHead="0" masterPageCnt="0"><hp:grid lineGrid="0" charGrid="0" wonggojiFormat="0"/><hp:startNum pageStartsOn="BOTH" page="0" pic="0" tbl="0" equation="0"/><hp:visibility hideFirstHeader="0" hideFirstFooter="0" hideFirstMasterPage="0" border="SHOW_ALL" fill="SHOW_ALL" hideFirstPageNum="0" hideFirstEmptyLine="0" showLineNumber="0"/><hp:pagePr landscape="WIDELY" width="59528" height="84188" gutterType="LEFT_ONLY"><hp:margin header="2835" footer="2835" gutter="0" left="5670" right="4252" top="8504" bottom="4252"/></hp:pagePr><hp:footNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="-1" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="283" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="EACH_COLUMN" beneathText="0"/></hp:footNotePr><hp:endNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="14692344" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="0" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="END_OF_DOCUMENT" beneathText="0"/></hp:endNotePr></hp:secPr>`;
|
|
6147
|
+
}
|
|
6148
|
+
function generateTable(rows) {
|
|
6149
|
+
const trElements = rows.map((row) => {
|
|
6150
|
+
const tdElements = row.map((cell) => {
|
|
6151
|
+
const runs = generateRuns(cell);
|
|
6152
|
+
return `<hp:tc><hp:cellSpan colSpan="1" rowSpan="1"/><hp:p paraPrIDRef="0" styleIDRef="0">${runs}</hp:p></hp:tc>`;
|
|
6153
|
+
}).join("");
|
|
6154
|
+
return `<hp:tr>${tdElements}</hp:tr>`;
|
|
6155
|
+
}).join("");
|
|
6156
|
+
return `<hp:tbl>${trElements}</hp:tbl>`;
|
|
6157
|
+
}
|
|
6158
|
+
function blocksToSectionXml(blocks) {
|
|
6159
|
+
const paraXmls = [];
|
|
6160
|
+
let isFirst = true;
|
|
6161
|
+
for (const block of blocks) {
|
|
6162
|
+
let xml = "";
|
|
6163
|
+
switch (block.type) {
|
|
6164
|
+
case "heading": {
|
|
6165
|
+
const pId = headingParaPrId(block.level || 1);
|
|
6166
|
+
const cId = headingCharPrId(block.level || 1);
|
|
6167
|
+
xml = generateParagraph(block.text || "", pId, cId);
|
|
6168
|
+
break;
|
|
6169
|
+
}
|
|
6170
|
+
case "paragraph":
|
|
6171
|
+
xml = generateParagraph(block.text || "");
|
|
6172
|
+
break;
|
|
6173
|
+
case "code_block": {
|
|
6174
|
+
const codeLines = (block.text || "").split("\n");
|
|
6175
|
+
xml = codeLines.map((line) => generateParagraph(line || " ", PARA_CODE)).join("\n ");
|
|
6176
|
+
break;
|
|
6177
|
+
}
|
|
6178
|
+
case "blockquote":
|
|
6179
|
+
xml = generateParagraph(block.text || "", PARA_QUOTE);
|
|
6180
|
+
break;
|
|
6181
|
+
case "list_item": {
|
|
6182
|
+
const marker = block.ordered ? `${(block.indent || 0) + 1}. ` : "\xB7 ";
|
|
6183
|
+
const indentPrefix = " ".repeat(block.indent || 0);
|
|
6184
|
+
xml = generateParagraph(indentPrefix + marker + (block.text || ""), PARA_LIST);
|
|
6185
|
+
break;
|
|
6186
|
+
}
|
|
6187
|
+
case "hr":
|
|
6188
|
+
xml = `<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0"><hp:t>\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500</hp:t></hp:run></hp:p>`;
|
|
6189
|
+
break;
|
|
6190
|
+
case "table":
|
|
6191
|
+
if (block.rows) {
|
|
6192
|
+
if (isFirst) {
|
|
6193
|
+
const secRun = `<hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run>`;
|
|
6194
|
+
paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0">${secRun}</hp:p>`);
|
|
6195
|
+
isFirst = false;
|
|
6196
|
+
}
|
|
6197
|
+
xml = generateTable(block.rows);
|
|
6198
|
+
}
|
|
6199
|
+
break;
|
|
6200
|
+
}
|
|
6201
|
+
if (!xml) continue;
|
|
6202
|
+
if (isFirst && block.type !== "table") {
|
|
6203
|
+
xml = xml.replace(
|
|
6204
|
+
/<hp:run charPrIDRef="(\d+)">/,
|
|
6205
|
+
`<hp:run charPrIDRef="$1">${generateSecPr()}`
|
|
6206
|
+
);
|
|
6207
|
+
isFirst = false;
|
|
6208
|
+
}
|
|
6209
|
+
paraXmls.push(xml);
|
|
6210
|
+
}
|
|
6211
|
+
if (paraXmls.length === 0) {
|
|
6212
|
+
paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run></hp:p>`);
|
|
6213
|
+
}
|
|
6214
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6215
|
+
<hs:sec xmlns:hs="${NS_SECTION}" xmlns:hp="${NS_PARA}">
|
|
6216
|
+
${paraXmls.join("\n ")}
|
|
6217
|
+
</hs:sec>`;
|
|
6218
|
+
}
|
|
6219
|
+
|
|
5192
6220
|
// src/index.ts
|
|
5193
6221
|
import { readFile } from "fs/promises";
|
|
5194
6222
|
|
|
5195
6223
|
// src/xlsx/parser.ts
|
|
5196
|
-
import
|
|
5197
|
-
import { DOMParser as
|
|
6224
|
+
import JSZip4 from "jszip";
|
|
6225
|
+
import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
|
|
5198
6226
|
var MAX_SHEETS = 100;
|
|
5199
6227
|
var MAX_DECOMPRESS_SIZE3 = 100 * 1024 * 1024;
|
|
5200
6228
|
var MAX_ROWS2 = 1e4;
|
|
@@ -5231,7 +6259,7 @@ function getTextContent(el) {
|
|
|
5231
6259
|
return el.textContent?.trim() ?? "";
|
|
5232
6260
|
}
|
|
5233
6261
|
function parseXml(text) {
|
|
5234
|
-
return new
|
|
6262
|
+
return new DOMParser3().parseFromString(stripDtd(text), "text/xml");
|
|
5235
6263
|
}
|
|
5236
6264
|
function parseSharedStrings(xml) {
|
|
5237
6265
|
const doc = parseXml(xml);
|
|
@@ -5384,7 +6412,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
5384
6412
|
}
|
|
5385
6413
|
async function parseXlsxDocument(buffer, options) {
|
|
5386
6414
|
precheckZipSize(buffer, MAX_DECOMPRESS_SIZE3);
|
|
5387
|
-
const zip = await
|
|
6415
|
+
const zip = await JSZip4.loadAsync(buffer);
|
|
5388
6416
|
const warnings = [];
|
|
5389
6417
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
5390
6418
|
if (!workbookFile) {
|
|
@@ -5474,24 +6502,24 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
5474
6502
|
}
|
|
5475
6503
|
|
|
5476
6504
|
// src/docx/parser.ts
|
|
5477
|
-
import
|
|
5478
|
-
import { DOMParser as
|
|
6505
|
+
import JSZip5 from "jszip";
|
|
6506
|
+
import { DOMParser as DOMParser4 } from "@xmldom/xmldom";
|
|
5479
6507
|
var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
|
|
5480
|
-
function getChildElements(parent,
|
|
6508
|
+
function getChildElements(parent, localName2) {
|
|
5481
6509
|
const result = [];
|
|
5482
6510
|
const children = parent.childNodes;
|
|
5483
6511
|
for (let i = 0; i < children.length; i++) {
|
|
5484
6512
|
const node = children[i];
|
|
5485
6513
|
if (node.nodeType === 1) {
|
|
5486
6514
|
const el = node;
|
|
5487
|
-
if (el.localName ===
|
|
6515
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5488
6516
|
result.push(el);
|
|
5489
6517
|
}
|
|
5490
6518
|
}
|
|
5491
6519
|
}
|
|
5492
6520
|
return result;
|
|
5493
6521
|
}
|
|
5494
|
-
function findElements(parent,
|
|
6522
|
+
function findElements(parent, localName2) {
|
|
5495
6523
|
const result = [];
|
|
5496
6524
|
const walk = (node) => {
|
|
5497
6525
|
const children = node.childNodes;
|
|
@@ -5499,7 +6527,7 @@ function findElements(parent, localName) {
|
|
|
5499
6527
|
const child = children[i];
|
|
5500
6528
|
if (child.nodeType === 1) {
|
|
5501
6529
|
const el = child;
|
|
5502
|
-
if (el.localName ===
|
|
6530
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5503
6531
|
result.push(el);
|
|
5504
6532
|
}
|
|
5505
6533
|
walk(el);
|
|
@@ -5509,16 +6537,16 @@ function findElements(parent, localName) {
|
|
|
5509
6537
|
walk(parent);
|
|
5510
6538
|
return result;
|
|
5511
6539
|
}
|
|
5512
|
-
function getAttr(el,
|
|
6540
|
+
function getAttr(el, localName2) {
|
|
5513
6541
|
const attrs = el.attributes;
|
|
5514
6542
|
for (let i = 0; i < attrs.length; i++) {
|
|
5515
6543
|
const attr = attrs[i];
|
|
5516
|
-
if (attr.localName ===
|
|
6544
|
+
if (attr.localName === localName2 || attr.name === localName2) return attr.value;
|
|
5517
6545
|
}
|
|
5518
6546
|
return null;
|
|
5519
6547
|
}
|
|
5520
6548
|
function parseXml2(text) {
|
|
5521
|
-
return new
|
|
6549
|
+
return new DOMParser4().parseFromString(stripDtd(text), "text/xml");
|
|
5522
6550
|
}
|
|
5523
6551
|
function parseStyles(xml) {
|
|
5524
6552
|
const doc = parseXml2(xml);
|
|
@@ -5812,7 +6840,7 @@ async function extractImages(zip, rels, doc) {
|
|
|
5812
6840
|
}
|
|
5813
6841
|
async function parseDocxDocument(buffer, options) {
|
|
5814
6842
|
precheckZipSize(buffer, MAX_DECOMPRESS_SIZE4);
|
|
5815
|
-
const zip = await
|
|
6843
|
+
const zip = await JSZip5.loadAsync(buffer);
|
|
5816
6844
|
const warnings = [];
|
|
5817
6845
|
const docFile = zip.file("word/document.xml");
|
|
5818
6846
|
if (!docFile) {
|
|
@@ -5860,11 +6888,11 @@ async function parseDocxDocument(buffer, options) {
|
|
|
5860
6888
|
const node = children[i];
|
|
5861
6889
|
if (node.nodeType !== 1) continue;
|
|
5862
6890
|
const el = node;
|
|
5863
|
-
const
|
|
5864
|
-
if (
|
|
6891
|
+
const localName2 = el.localName ?? el.tagName?.split(":").pop();
|
|
6892
|
+
if (localName2 === "p") {
|
|
5865
6893
|
const block = parseParagraph(el, styles, numbering, footnotes, rels);
|
|
5866
6894
|
if (block) blocks.push(block);
|
|
5867
|
-
} else if (
|
|
6895
|
+
} else if (localName2 === "tbl") {
|
|
5868
6896
|
const block = parseTable(el, styles, numbering, footnotes, rels);
|
|
5869
6897
|
if (block) blocks.push(block);
|
|
5870
6898
|
}
|
|
@@ -5902,135 +6930,6 @@ async function parseDocxDocument(buffer, options) {
|
|
|
5902
6930
|
};
|
|
5903
6931
|
}
|
|
5904
6932
|
|
|
5905
|
-
// src/form/recognize.ts
|
|
5906
|
-
var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
5907
|
-
"\uC131\uBA85",
|
|
5908
|
-
"\uC774\uB984",
|
|
5909
|
-
"\uC8FC\uC18C",
|
|
5910
|
-
"\uC804\uD654",
|
|
5911
|
-
"\uC804\uD654\uBC88\uD638",
|
|
5912
|
-
"\uD734\uB300\uD3F0",
|
|
5913
|
-
"\uD578\uB4DC\uD3F0",
|
|
5914
|
-
"\uC5F0\uB77D\uCC98",
|
|
5915
|
-
"\uC0DD\uB144\uC6D4\uC77C",
|
|
5916
|
-
"\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
|
|
5917
|
-
"\uC18C\uC18D",
|
|
5918
|
-
"\uC9C1\uC704",
|
|
5919
|
-
"\uC9C1\uAE09",
|
|
5920
|
-
"\uBD80\uC11C",
|
|
5921
|
-
"\uC774\uBA54\uC77C",
|
|
5922
|
-
"\uD329\uC2A4",
|
|
5923
|
-
"\uD559\uAD50",
|
|
5924
|
-
"\uD559\uB144",
|
|
5925
|
-
"\uBC18",
|
|
5926
|
-
"\uBC88\uD638",
|
|
5927
|
-
"\uC2E0\uCCAD\uC778",
|
|
5928
|
-
"\uB300\uD45C\uC790",
|
|
5929
|
-
"\uB2F4\uB2F9\uC790",
|
|
5930
|
-
"\uC791\uC131\uC790",
|
|
5931
|
-
"\uD655\uC778\uC790",
|
|
5932
|
-
"\uC2B9\uC778\uC790",
|
|
5933
|
-
"\uC77C\uC2DC",
|
|
5934
|
-
"\uB0A0\uC9DC",
|
|
5935
|
-
"\uAE30\uAC04",
|
|
5936
|
-
"\uC7A5\uC18C",
|
|
5937
|
-
"\uBAA9\uC801",
|
|
5938
|
-
"\uC0AC\uC720",
|
|
5939
|
-
"\uBE44\uACE0",
|
|
5940
|
-
"\uAE08\uC561",
|
|
5941
|
-
"\uC218\uB7C9",
|
|
5942
|
-
"\uB2E8\uAC00",
|
|
5943
|
-
"\uD569\uACC4",
|
|
5944
|
-
"\uACC4",
|
|
5945
|
-
"\uC18C\uACC4"
|
|
5946
|
-
]);
|
|
5947
|
-
function isLabelCell(text) {
|
|
5948
|
-
const trimmed = text.trim();
|
|
5949
|
-
if (!trimmed || trimmed.length > 30) return false;
|
|
5950
|
-
for (const kw of LABEL_KEYWORDS) {
|
|
5951
|
-
if (trimmed.includes(kw)) return true;
|
|
5952
|
-
}
|
|
5953
|
-
if (/^[가-힣\s()·:]{2,8}$/.test(trimmed) && !/\d/.test(trimmed)) return true;
|
|
5954
|
-
if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
|
|
5955
|
-
return false;
|
|
5956
|
-
}
|
|
5957
|
-
function extractFormFields(blocks) {
|
|
5958
|
-
const fields = [];
|
|
5959
|
-
let totalTables = 0;
|
|
5960
|
-
let formTables = 0;
|
|
5961
|
-
for (const block of blocks) {
|
|
5962
|
-
if (block.type !== "table" || !block.table) continue;
|
|
5963
|
-
totalTables++;
|
|
5964
|
-
const tableFields = extractFromTable(block.table);
|
|
5965
|
-
if (tableFields.length > 0) {
|
|
5966
|
-
formTables++;
|
|
5967
|
-
fields.push(...tableFields);
|
|
5968
|
-
}
|
|
5969
|
-
}
|
|
5970
|
-
for (const block of blocks) {
|
|
5971
|
-
if (block.type === "paragraph" && block.text) {
|
|
5972
|
-
const inlineFields = extractInlineFields(block.text);
|
|
5973
|
-
fields.push(...inlineFields);
|
|
5974
|
-
}
|
|
5975
|
-
}
|
|
5976
|
-
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
5977
|
-
return { fields, confidence: Math.min(confidence, 1) };
|
|
5978
|
-
}
|
|
5979
|
-
function extractFromTable(table) {
|
|
5980
|
-
const fields = [];
|
|
5981
|
-
if (table.cols >= 2) {
|
|
5982
|
-
for (let r = 0; r < table.rows; r++) {
|
|
5983
|
-
for (let c = 0; c < table.cols - 1; c++) {
|
|
5984
|
-
const labelCell = table.cells[r][c];
|
|
5985
|
-
const valueCell = table.cells[r][c + 1];
|
|
5986
|
-
if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
|
|
5987
|
-
fields.push({
|
|
5988
|
-
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
5989
|
-
value: valueCell.text.trim(),
|
|
5990
|
-
row: r,
|
|
5991
|
-
col: c
|
|
5992
|
-
});
|
|
5993
|
-
}
|
|
5994
|
-
}
|
|
5995
|
-
}
|
|
5996
|
-
}
|
|
5997
|
-
if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
|
|
5998
|
-
const headerRow = table.cells[0];
|
|
5999
|
-
const allLabels = headerRow.every((cell) => {
|
|
6000
|
-
const t = cell.text.trim();
|
|
6001
|
-
return t.length > 0 && t.length <= 20;
|
|
6002
|
-
});
|
|
6003
|
-
if (allLabels) {
|
|
6004
|
-
for (let r = 1; r < table.rows; r++) {
|
|
6005
|
-
for (let c = 0; c < table.cols; c++) {
|
|
6006
|
-
const label = headerRow[c].text.trim();
|
|
6007
|
-
const value = table.cells[r][c].text.trim();
|
|
6008
|
-
if (label && value) {
|
|
6009
|
-
fields.push({ label, value, row: r, col: c });
|
|
6010
|
-
}
|
|
6011
|
-
}
|
|
6012
|
-
}
|
|
6013
|
-
}
|
|
6014
|
-
}
|
|
6015
|
-
return fields;
|
|
6016
|
-
}
|
|
6017
|
-
function extractInlineFields(text) {
|
|
6018
|
-
const fields = [];
|
|
6019
|
-
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
|
|
6020
|
-
let match;
|
|
6021
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
6022
|
-
const label = match[1].trim();
|
|
6023
|
-
const value = match[2].trim();
|
|
6024
|
-
if (value) {
|
|
6025
|
-
fields.push({ label, value, row: -1, col: -1 });
|
|
6026
|
-
}
|
|
6027
|
-
}
|
|
6028
|
-
return fields;
|
|
6029
|
-
}
|
|
6030
|
-
|
|
6031
|
-
// src/hwpx/generator.ts
|
|
6032
|
-
import JSZip4 from "jszip";
|
|
6033
|
-
|
|
6034
6933
|
// src/index.ts
|
|
6035
6934
|
async function parse(input, options) {
|
|
6036
6935
|
let buffer;
|
|
@@ -6294,8 +7193,11 @@ export {
|
|
|
6294
7193
|
extractHwpxMetadataOnly,
|
|
6295
7194
|
extractHwp5MetadataOnly,
|
|
6296
7195
|
extractPdfMetadataOnly,
|
|
6297
|
-
compare,
|
|
6298
7196
|
extractFormFields,
|
|
7197
|
+
fillFormFields,
|
|
7198
|
+
fillHwpx,
|
|
7199
|
+
markdownToHwpx,
|
|
7200
|
+
compare,
|
|
6299
7201
|
parse
|
|
6300
7202
|
};
|
|
6301
|
-
//# sourceMappingURL=chunk-
|
|
7203
|
+
//# sourceMappingURL=chunk-SY2RFVLW.js.map
|