kordoc 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ import {
8
8
  } from "./chunk-MOL7MDBG.js";
9
9
 
10
10
  // src/utils.ts
11
- var VERSION = true ? "2.2.3" : "0.0.0-dev";
11
+ var VERSION = true ? "2.2.4" : "0.0.0-dev";
12
12
  function toArrayBuffer(buf) {
13
13
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
14
14
  return buf.buffer;
@@ -5189,12 +5189,1040 @@ function mergeKoreanLines(text) {
5189
5189
  return result.join("\n");
5190
5190
  }
5191
5191
 
5192
+ // src/form/recognize.ts
5193
+ var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
5194
+ "\uC131\uBA85",
5195
+ "\uC774\uB984",
5196
+ "\uC8FC\uC18C",
5197
+ "\uC804\uD654",
5198
+ "\uC804\uD654\uBC88\uD638",
5199
+ "\uD734\uB300\uD3F0",
5200
+ "\uD578\uB4DC\uD3F0",
5201
+ "\uC5F0\uB77D\uCC98",
5202
+ "\uC0DD\uB144\uC6D4\uC77C",
5203
+ "\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
5204
+ "\uC18C\uC18D",
5205
+ "\uC9C1\uC704",
5206
+ "\uC9C1\uAE09",
5207
+ "\uBD80\uC11C",
5208
+ "\uC774\uBA54\uC77C",
5209
+ "\uD329\uC2A4",
5210
+ "\uD559\uAD50",
5211
+ "\uD559\uB144",
5212
+ "\uBC18",
5213
+ "\uBC88\uD638",
5214
+ "\uC2E0\uCCAD\uC778",
5215
+ "\uB300\uD45C\uC790",
5216
+ "\uB2F4\uB2F9\uC790",
5217
+ "\uC791\uC131\uC790",
5218
+ "\uD655\uC778\uC790",
5219
+ "\uC2B9\uC778\uC790",
5220
+ "\uC77C\uC2DC",
5221
+ "\uB0A0\uC9DC",
5222
+ "\uAE30\uAC04",
5223
+ "\uC7A5\uC18C",
5224
+ "\uBAA9\uC801",
5225
+ "\uC0AC\uC720",
5226
+ "\uBE44\uACE0",
5227
+ "\uAE08\uC561",
5228
+ "\uC218\uB7C9",
5229
+ "\uB2E8\uAC00",
5230
+ "\uD569\uACC4",
5231
+ "\uACC4",
5232
+ "\uC18C\uACC4",
5233
+ "\uB4F1\uB85D\uAE30\uC900\uC9C0",
5234
+ "\uBCF8\uC801",
5235
+ "\uC704\uC784\uC778",
5236
+ "\uCCAD\uAD6C\uC0AC\uC720",
5237
+ "\uC18C\uBA85\uC790\uB8CC"
5238
+ ]);
5239
+ function isLabelCell(text) {
5240
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
5241
+ if (!trimmed || trimmed.length > 30) return false;
5242
+ for (const kw of LABEL_KEYWORDS) {
5243
+ if (trimmed.includes(kw)) return true;
5244
+ }
5245
+ if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
5246
+ if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
5247
+ return false;
5248
+ }
5249
+ function extractFormFields(blocks) {
5250
+ const fields = [];
5251
+ let totalTables = 0;
5252
+ let formTables = 0;
5253
+ for (const block of blocks) {
5254
+ if (block.type !== "table" || !block.table) continue;
5255
+ totalTables++;
5256
+ const tableFields = extractFromTable(block.table);
5257
+ if (tableFields.length > 0) {
5258
+ formTables++;
5259
+ fields.push(...tableFields);
5260
+ }
5261
+ }
5262
+ for (const block of blocks) {
5263
+ if (block.type === "paragraph" && block.text) {
5264
+ const inlineFields = extractInlineFields(block.text);
5265
+ fields.push(...inlineFields);
5266
+ }
5267
+ }
5268
+ const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
5269
+ return { fields, confidence: Math.min(confidence, 1) };
5270
+ }
5271
+ function extractFromTable(table) {
5272
+ const fields = [];
5273
+ if (table.cols >= 2) {
5274
+ for (let r = 0; r < table.rows; r++) {
5275
+ for (let c = 0; c < table.cols - 1; c++) {
5276
+ const labelCell = table.cells[r][c];
5277
+ const valueCell = table.cells[r][c + 1];
5278
+ if (isLabelCell(labelCell.text)) {
5279
+ fields.push({
5280
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
5281
+ value: valueCell.text.trim(),
5282
+ row: r,
5283
+ col: c
5284
+ });
5285
+ }
5286
+ }
5287
+ }
5288
+ }
5289
+ if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
5290
+ const headerRow = table.cells[0];
5291
+ const allLabels = headerRow.every((cell) => {
5292
+ const t = cell.text.trim();
5293
+ return t.length > 0 && t.length <= 20;
5294
+ });
5295
+ if (allLabels) {
5296
+ for (let r = 1; r < table.rows; r++) {
5297
+ for (let c = 0; c < table.cols; c++) {
5298
+ const label = headerRow[c].text.trim();
5299
+ const value = table.cells[r][c].text.trim();
5300
+ if (label && value) {
5301
+ fields.push({ label, value, row: r, col: c });
5302
+ }
5303
+ }
5304
+ }
5305
+ }
5306
+ }
5307
+ return fields;
5308
+ }
5309
+ function extractInlineFields(text) {
5310
+ const fields = [];
5311
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
5312
+ let match;
5313
+ while ((match = pattern.exec(text)) !== null) {
5314
+ const label = match[1].trim();
5315
+ const value = match[2].trim();
5316
+ if (value) {
5317
+ fields.push({ label, value, row: -1, col: -1 });
5318
+ }
5319
+ }
5320
+ return fields;
5321
+ }
5322
+
5323
+ // src/form/match.ts
5324
+ function normalizeLabel(label) {
5325
+ return label.trim().replace(/[::\s()()·]/g, "");
5326
+ }
5327
+ function findMatchingKey(cellLabel, values) {
5328
+ if (values.has(cellLabel)) return cellLabel;
5329
+ let bestKey;
5330
+ let bestLen = 0;
5331
+ for (const key of values.keys()) {
5332
+ if (cellLabel.startsWith(key)) {
5333
+ if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
5334
+ bestLen = key.length;
5335
+ bestKey = key;
5336
+ }
5337
+ } else if (key.startsWith(cellLabel)) {
5338
+ if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
5339
+ bestLen = cellLabel.length;
5340
+ bestKey = key;
5341
+ }
5342
+ }
5343
+ }
5344
+ return bestKey;
5345
+ }
5346
+ function isKeywordLabel(text) {
5347
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
5348
+ if (!trimmed || trimmed.length > 15) return false;
5349
+ for (const kw of LABEL_KEYWORDS) {
5350
+ if (trimmed.includes(kw)) return true;
5351
+ }
5352
+ return false;
5353
+ }
5354
+ function fillInCellPatterns(cellText, values, matchedLabels) {
5355
+ let text = cellText;
5356
+ const matches = [];
5357
+ text = text.replace(
5358
+ /([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
5359
+ (match, prefix, suffix) => {
5360
+ const label = prefix + suffix;
5361
+ const normalizedLabel = normalizeLabel(label);
5362
+ const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
5363
+ if (matchKey === void 0) return match;
5364
+ const newValue = values.get(matchKey);
5365
+ matchedLabels.add(matchKey);
5366
+ matches.push({ key: matchKey, label, value: newValue });
5367
+ return `${prefix}(${newValue})${suffix}`;
5368
+ }
5369
+ );
5370
+ text = text.replace(
5371
+ /□([가-힣A-Za-z]+)/g,
5372
+ (match, keyword) => {
5373
+ const normalizedKw = normalizeLabel(keyword);
5374
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
5375
+ if (matchKey === void 0) return match;
5376
+ const val = values.get(matchKey);
5377
+ const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
5378
+ if (!isTruthy) return match;
5379
+ matchedLabels.add(matchKey);
5380
+ matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
5381
+ return `\u2611${keyword}`;
5382
+ }
5383
+ );
5384
+ text = text.replace(
5385
+ /\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
5386
+ (match, keyword) => {
5387
+ const normalizedKw = normalizeLabel(keyword);
5388
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
5389
+ if (matchKey === void 0) return match;
5390
+ const newValue = values.get(matchKey);
5391
+ matchedLabels.add(matchKey);
5392
+ matches.push({ key: matchKey, label: keyword, value: newValue });
5393
+ return `(${keyword}\uFF1A${newValue})`;
5394
+ }
5395
+ );
5396
+ return matches.length > 0 ? { text, matches } : null;
5397
+ }
5398
+ function normalizeValues(values) {
5399
+ const map = /* @__PURE__ */ new Map();
5400
+ for (const [label, value] of Object.entries(values)) {
5401
+ map.set(normalizeLabel(label), value);
5402
+ }
5403
+ return map;
5404
+ }
5405
+ function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
5406
+ return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
5407
+ for (const orig of Object.keys(originalValues)) {
5408
+ if (normalizeLabel(orig) === k) return orig;
5409
+ }
5410
+ return k;
5411
+ });
5412
+ }
5413
+
5414
+ // src/form/filler.ts
5415
+ function fillFormFields(blocks, values) {
5416
+ const cloned = structuredClone(blocks);
5417
+ const filled = [];
5418
+ const matchedLabels = /* @__PURE__ */ new Set();
5419
+ const normalizedValues = normalizeValues(values);
5420
+ const patternFilledCells = /* @__PURE__ */ new Set();
5421
+ for (const block of cloned) {
5422
+ if (block.type !== "table" || !block.table) continue;
5423
+ for (let r = 0; r < block.table.rows; r++) {
5424
+ for (let c = 0; c < block.table.cols; c++) {
5425
+ const cell = block.table.cells[r]?.[c];
5426
+ if (!cell) continue;
5427
+ const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
5428
+ if (result) {
5429
+ cell.text = result.text;
5430
+ patternFilledCells.add(`${r},${c}`);
5431
+ for (const m of result.matches) {
5432
+ filled.push({ label: m.label, value: m.value, row: r, col: c });
5433
+ }
5434
+ }
5435
+ }
5436
+ }
5437
+ }
5438
+ for (const block of cloned) {
5439
+ if (block.type !== "table" || !block.table) continue;
5440
+ fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
5441
+ }
5442
+ for (const block of cloned) {
5443
+ if (block.type !== "paragraph" || !block.text) continue;
5444
+ const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
5445
+ if (newText !== block.text) block.text = newText;
5446
+ }
5447
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
5448
+ return { blocks: cloned, filled, unmatched };
5449
+ }
5450
+ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
5451
+ if (table.cols < 2) return;
5452
+ for (let r = 0; r < table.rows; r++) {
5453
+ for (let c = 0; c < table.cols - 1; c++) {
5454
+ const labelCell = table.cells[r][c];
5455
+ const valueCell = table.cells[r][c + 1];
5456
+ if (!labelCell || !valueCell) continue;
5457
+ if (!isLabelCell(labelCell.text)) continue;
5458
+ if (isKeywordLabel(valueCell.text)) continue;
5459
+ const normalizedCellLabel = normalizeLabel(labelCell.text);
5460
+ if (!normalizedCellLabel) continue;
5461
+ const matchKey = findMatchingKey(normalizedCellLabel, values);
5462
+ if (matchKey === void 0) continue;
5463
+ const newValue = values.get(matchKey);
5464
+ if (patternFilledCells?.has(`${r},${c + 1}`)) {
5465
+ valueCell.text = newValue + " " + valueCell.text;
5466
+ } else {
5467
+ valueCell.text = newValue;
5468
+ }
5469
+ matchedLabels.add(matchKey);
5470
+ filled.push({
5471
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
5472
+ value: newValue,
5473
+ row: r,
5474
+ col: c
5475
+ });
5476
+ }
5477
+ }
5478
+ if (table.rows >= 2 && table.cols >= 2) {
5479
+ const headerRow = table.cells[0];
5480
+ const allLabels = headerRow.every((cell) => {
5481
+ const t = cell.text.trim();
5482
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
5483
+ });
5484
+ if (!allLabels) return;
5485
+ for (let r = 1; r < table.rows; r++) {
5486
+ for (let c = 0; c < table.cols; c++) {
5487
+ const headerLabel = normalizeLabel(headerRow[c].text);
5488
+ const matchKey = findMatchingKey(headerLabel, values);
5489
+ if (matchKey === void 0) continue;
5490
+ if (matchedLabels.has(matchKey)) continue;
5491
+ const newValue = values.get(matchKey);
5492
+ table.cells[r][c].text = newValue;
5493
+ matchedLabels.add(matchKey);
5494
+ filled.push({
5495
+ label: headerRow[c].text.trim(),
5496
+ value: newValue,
5497
+ row: r,
5498
+ col: c
5499
+ });
5500
+ }
5501
+ }
5502
+ }
5503
+ }
5504
+ function fillInlineFields(text, values, filled, matchedLabels) {
5505
+ return text.replace(
5506
+ /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
5507
+ (match, rawLabel, _oldValue) => {
5508
+ const normalized = normalizeLabel(rawLabel);
5509
+ const matchKey = findMatchingKey(normalized, values);
5510
+ if (matchKey === void 0) return match;
5511
+ const newValue = values.get(matchKey);
5512
+ matchedLabels.add(matchKey);
5513
+ filled.push({
5514
+ label: rawLabel.trim(),
5515
+ value: newValue,
5516
+ row: -1,
5517
+ col: -1
5518
+ });
5519
+ return `${rawLabel}: ${newValue}`;
5520
+ }
5521
+ );
5522
+ }
5523
+
5524
+ // src/form/filler-hwpx.ts
5525
+ import JSZip2 from "jszip";
5526
+ import { DOMParser as DOMParser2, XMLSerializer } from "@xmldom/xmldom";
5527
+ async function fillHwpx(hwpxBuffer, values) {
5528
+ const zip = await JSZip2.loadAsync(hwpxBuffer);
5529
+ const filled = [];
5530
+ const matchedLabels = /* @__PURE__ */ new Set();
5531
+ const normalizedValues = normalizeValues(values);
5532
+ const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
5533
+ if (sectionFiles.length === 0) {
5534
+ throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
5535
+ }
5536
+ const xmlParser = new DOMParser2();
5537
+ const xmlSerializer = new XMLSerializer();
5538
+ for (const sectionPath of sectionFiles) {
5539
+ const zipEntry = zip.file(sectionPath);
5540
+ if (!zipEntry) continue;
5541
+ const rawXml = await zipEntry.async("text");
5542
+ const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
5543
+ if (!doc.documentElement) continue;
5544
+ let modified = false;
5545
+ const tables = findAllElements(doc.documentElement, "tbl");
5546
+ const cellPatternApplied = /* @__PURE__ */ new Set();
5547
+ for (const tblEl of tables) {
5548
+ const allCells = findAllElements(tblEl, "tc");
5549
+ for (const tcEl of allCells) {
5550
+ const tNodes = collectCellTextNodes(tcEl);
5551
+ const fullText = tNodes.map((n) => n.text).join("");
5552
+ const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
5553
+ if (!result) continue;
5554
+ applyTextReplacements(tNodes, fullText, result.text);
5555
+ cellPatternApplied.add(tcEl);
5556
+ for (const m of result.matches) {
5557
+ filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
5558
+ }
5559
+ modified = true;
5560
+ }
5561
+ }
5562
+ for (const tblEl of tables) {
5563
+ const rows = findDirectChildren(tblEl, "tr");
5564
+ for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
5565
+ const trEl = rows[rowIdx];
5566
+ const cells = findDirectChildren(trEl, "tc");
5567
+ for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
5568
+ const labelText = extractCellText(cells[colIdx]);
5569
+ if (!isLabelCell(labelText)) continue;
5570
+ const valueCell = cells[colIdx + 1];
5571
+ const valueText = extractCellText(valueCell);
5572
+ if (isKeywordLabel(valueText)) continue;
5573
+ const normalizedCellLabel = normalizeLabel(labelText);
5574
+ if (!normalizedCellLabel) continue;
5575
+ const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
5576
+ if (matchKey === void 0) continue;
5577
+ const newValue = normalizedValues.get(matchKey);
5578
+ if (cellPatternApplied.has(valueCell)) {
5579
+ prependCellText(valueCell, newValue);
5580
+ } else {
5581
+ replaceCellText(valueCell, newValue);
5582
+ }
5583
+ matchedLabels.add(matchKey);
5584
+ filled.push({
5585
+ label: labelText.trim().replace(/[::]\s*$/, ""),
5586
+ value: newValue,
5587
+ row: rowIdx,
5588
+ col: colIdx
5589
+ });
5590
+ modified = true;
5591
+ }
5592
+ }
5593
+ if (rows.length >= 2) {
5594
+ const headerCells = findDirectChildren(rows[0], "tc");
5595
+ const allLabels = headerCells.every((cell) => {
5596
+ const t = extractCellText(cell).trim();
5597
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
5598
+ });
5599
+ if (allLabels) {
5600
+ for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
5601
+ const dataCells = findDirectChildren(rows[rowIdx], "tc");
5602
+ for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
5603
+ const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
5604
+ const matchKey = findMatchingKey(headerLabel, normalizedValues);
5605
+ if (matchKey === void 0) continue;
5606
+ if (matchedLabels.has(matchKey)) continue;
5607
+ const newValue = normalizedValues.get(matchKey);
5608
+ replaceCellText(dataCells[colIdx], newValue);
5609
+ matchedLabels.add(matchKey);
5610
+ filled.push({
5611
+ label: extractCellText(headerCells[colIdx]).trim(),
5612
+ value: newValue,
5613
+ row: rowIdx,
5614
+ col: colIdx
5615
+ });
5616
+ modified = true;
5617
+ }
5618
+ }
5619
+ }
5620
+ }
5621
+ }
5622
+ const allParagraphs = findAllElements(doc.documentElement, "p");
5623
+ for (const pEl of allParagraphs) {
5624
+ if (isInsideTable(pEl)) continue;
5625
+ const tNodes = collectTextNodes(pEl);
5626
+ const fullText = tNodes.map((n) => n.text).join("");
5627
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
5628
+ let match;
5629
+ while ((match = pattern.exec(fullText)) !== null) {
5630
+ const rawLabel = match[1];
5631
+ const normalized = normalizeLabel(rawLabel);
5632
+ const matchKey = findMatchingKey(normalized, normalizedValues);
5633
+ if (matchKey === void 0) continue;
5634
+ const newValue = normalizedValues.get(matchKey);
5635
+ const valueStart = match.index + match[0].length - match[2].length;
5636
+ const valueEnd = match.index + match[0].length;
5637
+ replaceTextRange(tNodes, valueStart, valueEnd, newValue);
5638
+ matchedLabels.add(matchKey);
5639
+ filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
5640
+ modified = true;
5641
+ break;
5642
+ }
5643
+ }
5644
+ if (modified) {
5645
+ const newXml = xmlSerializer.serializeToString(doc);
5646
+ zip.file(sectionPath, newXml);
5647
+ }
5648
+ }
5649
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
5650
+ const buffer = await zip.generateAsync({ type: "arraybuffer" });
5651
+ return { buffer, filled, unmatched };
5652
+ }
5653
+ function localName(el) {
5654
+ return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
5655
+ }
5656
+ function findAllElements(node, tagLocalName) {
5657
+ const result = [];
5658
+ const walk = (n) => {
5659
+ const children = n.childNodes;
5660
+ if (!children) return;
5661
+ for (let i = 0; i < children.length; i++) {
5662
+ const child = children[i];
5663
+ if (child.nodeType !== 1) continue;
5664
+ if (localName(child) === tagLocalName) result.push(child);
5665
+ walk(child);
5666
+ }
5667
+ };
5668
+ walk(node);
5669
+ return result;
5670
+ }
5671
+ function findDirectChildren(parent, tagLocalName) {
5672
+ const result = [];
5673
+ const children = parent.childNodes;
5674
+ if (!children) return result;
5675
+ for (let i = 0; i < children.length; i++) {
5676
+ const child = children[i];
5677
+ if (child.nodeType === 1 && localName(child) === tagLocalName) {
5678
+ result.push(child);
5679
+ }
5680
+ }
5681
+ return result;
5682
+ }
5683
+ function isInsideTable(el) {
5684
+ let parent = el.parentNode;
5685
+ while (parent) {
5686
+ if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
5687
+ parent = parent.parentNode;
5688
+ }
5689
+ return false;
5690
+ }
5691
+ function extractCellText(tcEl) {
5692
+ const parts = [];
5693
+ const walk = (node) => {
5694
+ const children = node.childNodes;
5695
+ if (!children) return;
5696
+ for (let i = 0; i < children.length; i++) {
5697
+ const child = children[i];
5698
+ if (child.nodeType === 3) {
5699
+ parts.push(child.textContent || "");
5700
+ } else if (child.nodeType === 1) {
5701
+ const tag = localName(child);
5702
+ if (tag === "t") walk(child);
5703
+ else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
5704
+ else if (tag === "tab") parts.push(" ");
5705
+ else if (tag === "br") parts.push("\n");
5706
+ }
5707
+ }
5708
+ };
5709
+ walk(tcEl);
5710
+ return parts.join("");
5711
+ }
5712
+ function prependCellText(tcEl, text) {
5713
+ const tElements = findAllElements(tcEl, "t");
5714
+ if (tElements.length === 0) return;
5715
+ const firstT = tElements[0];
5716
+ const existing = firstT.textContent || "";
5717
+ clearChildren(firstT);
5718
+ firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
5719
+ }
5720
+ function replaceCellText(tcEl, newValue) {
5721
+ const paragraphs = findAllElements(tcEl, "p");
5722
+ if (paragraphs.length === 0) return;
5723
+ const firstP = paragraphs[0];
5724
+ const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
5725
+ if (runs.length > 0) {
5726
+ setRunText(runs[0], newValue);
5727
+ for (let i = 1; i < runs.length; i++) {
5728
+ setRunText(runs[i], "");
5729
+ }
5730
+ } else {
5731
+ const tElements = findAllElements(firstP, "t");
5732
+ if (tElements.length > 0) {
5733
+ clearChildren(tElements[0]);
5734
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
5735
+ for (let i = 1; i < tElements.length; i++) {
5736
+ clearChildren(tElements[i]);
5737
+ }
5738
+ }
5739
+ }
5740
+ for (let i = 1; i < paragraphs.length; i++) {
5741
+ const p = paragraphs[i];
5742
+ if (p.parentNode) {
5743
+ const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
5744
+ for (const run of pRuns) setRunText(run, "");
5745
+ const pTs = findAllElements(p, "t");
5746
+ for (const t of pTs) clearChildren(t);
5747
+ }
5748
+ }
5749
+ }
5750
+ function setRunText(runEl, text) {
5751
+ const tElements = findAllElements(runEl, "t");
5752
+ if (tElements.length > 0) {
5753
+ clearChildren(tElements[0]);
5754
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
5755
+ for (let i = 1; i < tElements.length; i++) {
5756
+ clearChildren(tElements[i]);
5757
+ }
5758
+ }
5759
+ }
5760
+ function clearChildren(el) {
5761
+ while (el.firstChild) el.removeChild(el.firstChild);
5762
+ }
5763
+ function collectTextNodes(pEl) {
5764
+ const tElements = findAllElements(pEl, "t");
5765
+ const result = [];
5766
+ let offset = 0;
5767
+ for (const t of tElements) {
5768
+ const text = t.textContent || "";
5769
+ result.push({ element: t, text, offset });
5770
+ offset += text.length;
5771
+ }
5772
+ return result;
5773
+ }
5774
+ function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
5775
+ let replaced = false;
5776
+ for (const node of tNodes) {
5777
+ const nodeStart = node.offset;
5778
+ const nodeEnd = node.offset + node.text.length;
5779
+ if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
5780
+ const localStart = Math.max(0, globalStart - nodeStart);
5781
+ const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
5782
+ if (!replaced) {
5783
+ const before = node.text.slice(0, localStart);
5784
+ const after = node.text.slice(localEnd);
5785
+ const newText = before + newValue + after;
5786
+ clearChildren(node.element);
5787
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
5788
+ replaced = true;
5789
+ } else {
5790
+ const before = node.text.slice(0, localStart);
5791
+ const after = node.text.slice(localEnd);
5792
+ const newText = before + after;
5793
+ clearChildren(node.element);
5794
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
5795
+ }
5796
+ }
5797
+ }
5798
+ function collectCellTextNodes(tcEl) {
5799
+ const tElements = findAllElements(tcEl, "t");
5800
+ const result = [];
5801
+ let offset = 0;
5802
+ for (const t of tElements) {
5803
+ const text = t.textContent || "";
5804
+ result.push({ element: t, text, offset });
5805
+ offset += text.length;
5806
+ }
5807
+ return result;
5808
+ }
5809
+ function applyTextReplacements(tNodes, originalFull, replacedFull) {
5810
+ if (originalFull === replacedFull) return;
5811
+ if (tNodes.length === 1) {
5812
+ clearChildren(tNodes[0].element);
5813
+ tNodes[0].element.appendChild(
5814
+ tNodes[0].element.ownerDocument.createTextNode(replacedFull)
5815
+ );
5816
+ return;
5817
+ }
5818
+ let diffStart = 0;
5819
+ while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
5820
+ diffStart++;
5821
+ }
5822
+ let diffEndOrig = originalFull.length;
5823
+ let diffEndRepl = replacedFull.length;
5824
+ while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
5825
+ diffEndOrig--;
5826
+ diffEndRepl--;
5827
+ }
5828
+ const newPart = replacedFull.slice(diffStart, diffEndRepl);
5829
+ replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
5830
+ }
5831
+
5832
+ // src/hwpx/generator.ts
5833
+ import JSZip3 from "jszip";
5834
+ var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
5835
+ var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
5836
+ var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
5837
+ var NS_OPF = "http://www.idpf.org/2007/opf/";
5838
+ var NS_HPF = "http://www.hancom.co.kr/schema/2011/hpf";
5839
+ var NS_OCF = "urn:oasis:names:tc:opendocument:xmlns:container";
5840
+ var CHAR_NORMAL = 0;
5841
+ var CHAR_BOLD = 1;
5842
+ var CHAR_ITALIC = 2;
5843
+ var CHAR_BOLD_ITALIC = 3;
5844
+ var CHAR_CODE = 4;
5845
+ var CHAR_H1 = 5;
5846
+ var CHAR_H2 = 6;
5847
+ var CHAR_H3 = 7;
5848
+ var CHAR_H4 = 8;
5849
+ var PARA_NORMAL = 0;
5850
+ var PARA_H1 = 1;
5851
+ var PARA_H2 = 2;
5852
+ var PARA_H3 = 3;
5853
+ var PARA_H4 = 4;
5854
+ var PARA_CODE = 5;
5855
+ var PARA_QUOTE = 6;
5856
+ var PARA_LIST = 7;
5857
+ async function markdownToHwpx(markdown) {
5858
+ const blocks = parseMarkdownToBlocks(markdown);
5859
+ const sectionXml = blocksToSectionXml(blocks);
5860
+ const zip = new JSZip3();
5861
+ zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
5862
+ zip.file("META-INF/container.xml", generateContainerXml());
5863
+ zip.file("Contents/content.hpf", generateManifest());
5864
+ zip.file("Contents/header.xml", generateHeaderXml());
5865
+ zip.file("Contents/section0.xml", sectionXml);
5866
+ return await zip.generateAsync({ type: "arraybuffer" });
5867
+ }
5868
+ function parseMarkdownToBlocks(md) {
5869
+ const lines = md.split("\n");
5870
+ const blocks = [];
5871
+ let i = 0;
5872
+ while (i < lines.length) {
5873
+ const line = lines[i];
5874
+ if (!line.trim()) {
5875
+ i++;
5876
+ continue;
5877
+ }
5878
+ const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/);
5879
+ if (fenceMatch) {
5880
+ const fence = fenceMatch[1];
5881
+ const lang = fenceMatch[2].trim();
5882
+ const codeLines = [];
5883
+ i++;
5884
+ while (i < lines.length && !lines[i].startsWith(fence)) {
5885
+ codeLines.push(lines[i]);
5886
+ i++;
5887
+ }
5888
+ if (i < lines.length) i++;
5889
+ blocks.push({ type: "code_block", text: codeLines.join("\n"), lang });
5890
+ continue;
5891
+ }
5892
+ if (/^(\*{3,}|-{3,}|_{3,})\s*$/.test(line.trim())) {
5893
+ blocks.push({ type: "hr" });
5894
+ i++;
5895
+ continue;
5896
+ }
5897
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
5898
+ if (headingMatch) {
5899
+ blocks.push({ type: "heading", text: headingMatch[2].trim(), level: headingMatch[1].length });
5900
+ i++;
5901
+ continue;
5902
+ }
5903
+ if (line.trimStart().startsWith("|")) {
5904
+ const tableRows = [];
5905
+ while (i < lines.length && lines[i].trimStart().startsWith("|")) {
5906
+ const row = lines[i];
5907
+ if (/^[\s|:\-]+$/.test(row)) {
5908
+ i++;
5909
+ continue;
5910
+ }
5911
+ const cells = row.split("|").slice(1, -1).map((c) => c.trim());
5912
+ if (cells.length > 0) tableRows.push(cells);
5913
+ i++;
5914
+ }
5915
+ if (tableRows.length > 0) blocks.push({ type: "table", rows: tableRows });
5916
+ continue;
5917
+ }
5918
+ if (line.trimStart().startsWith("> ")) {
5919
+ const quoteLines = [];
5920
+ while (i < lines.length && (lines[i].trimStart().startsWith("> ") || lines[i].trimStart().startsWith(">"))) {
5921
+ quoteLines.push(lines[i].replace(/^>\s?/, ""));
5922
+ i++;
5923
+ }
5924
+ for (const ql of quoteLines) {
5925
+ blocks.push({ type: "blockquote", text: ql.trim() || "" });
5926
+ }
5927
+ continue;
5928
+ }
5929
+ const listMatch = line.match(/^(\s*)([-*+]|\d+[.)]) (.+)$/);
5930
+ if (listMatch) {
5931
+ const indent = Math.floor(listMatch[1].length / 2);
5932
+ const ordered = /\d/.test(listMatch[2]);
5933
+ blocks.push({ type: "list_item", text: listMatch[3].trim(), ordered, indent });
5934
+ i++;
5935
+ continue;
5936
+ }
5937
+ blocks.push({ type: "paragraph", text: line.trim() });
5938
+ i++;
5939
+ }
5940
+ return blocks;
5941
+ }
5942
+ function parseInlineMarkdown(text) {
5943
+ text = text.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
5944
+ text = text.replace(/\[([^\]]*)\]\(([^)]*)\)/g, (_, t, u) => t || u);
5945
+ text = text.replace(/~~([^~]+)~~/g, "$1");
5946
+ const spans = [];
5947
+ const regex = /(`[^`]+`|\*{3}[^*]+\*{3}|\*{2}[^*]+\*{2}|\*[^*]+\*|_{2}[^_]+_{2}|_[^_]+_)/g;
5948
+ let lastIdx = 0;
5949
+ for (const match of text.matchAll(regex)) {
5950
+ const idx = match.index;
5951
+ if (idx > lastIdx) {
5952
+ spans.push({ text: text.slice(lastIdx, idx), bold: false, italic: false, code: false });
5953
+ }
5954
+ const raw = match[0];
5955
+ if (raw.startsWith("`")) {
5956
+ spans.push({ text: raw.slice(1, -1), bold: false, italic: false, code: true });
5957
+ } else if (raw.startsWith("***") || raw.startsWith("___")) {
5958
+ spans.push({ text: raw.slice(3, -3), bold: true, italic: true, code: false });
5959
+ } else if (raw.startsWith("**") || raw.startsWith("__")) {
5960
+ spans.push({ text: raw.slice(2, -2), bold: true, italic: false, code: false });
5961
+ } else {
5962
+ spans.push({ text: raw.slice(1, -1), bold: false, italic: true, code: false });
5963
+ }
5964
+ lastIdx = idx + raw.length;
5965
+ }
5966
+ if (lastIdx < text.length) {
5967
+ spans.push({ text: text.slice(lastIdx), bold: false, italic: false, code: false });
5968
+ }
5969
+ if (spans.length === 0) {
5970
+ spans.push({ text, bold: false, italic: false, code: false });
5971
+ }
5972
+ return spans;
5973
+ }
5974
+ function spanToCharPrId(span) {
5975
+ if (span.code) return CHAR_CODE;
5976
+ if (span.bold && span.italic) return CHAR_BOLD_ITALIC;
5977
+ if (span.bold) return CHAR_BOLD;
5978
+ if (span.italic) return CHAR_ITALIC;
5979
+ return CHAR_NORMAL;
5980
+ }
5981
+ function escapeXml(text) {
5982
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
5983
+ }
5984
+ function generateRuns(text, defaultCharPr = CHAR_NORMAL) {
5985
+ const spans = parseInlineMarkdown(text);
5986
+ return spans.map((span) => {
5987
+ const charId = span.code || span.bold || span.italic ? spanToCharPrId(span) : defaultCharPr;
5988
+ return `<hp:run charPrIDRef="${charId}"><hp:t>${escapeXml(span.text)}</hp:t></hp:run>`;
5989
+ }).join("");
5990
+ }
5991
+ function generateParagraph(text, paraPrId = PARA_NORMAL, charPrId = CHAR_NORMAL) {
5992
+ if (paraPrId === PARA_CODE) {
5993
+ return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0"><hp:run charPrIDRef="${CHAR_CODE}"><hp:t>${escapeXml(text)}</hp:t></hp:run></hp:p>`;
5994
+ }
5995
+ const runs = generateRuns(text, charPrId);
5996
+ return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0">${runs}</hp:p>`;
5997
+ }
5998
+ function headingParaPrId(level) {
5999
+ if (level === 1) return PARA_H1;
6000
+ if (level === 2) return PARA_H2;
6001
+ if (level === 3) return PARA_H3;
6002
+ return PARA_H4;
6003
+ }
6004
+ function headingCharPrId(level) {
6005
+ if (level === 1) return CHAR_H1;
6006
+ if (level === 2) return CHAR_H2;
6007
+ if (level === 3) return CHAR_H3;
6008
+ return CHAR_H4;
6009
+ }
6010
+ function generateContainerXml() {
6011
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6012
+ <ocf:container xmlns:ocf="${NS_OCF}" xmlns:hpf="${NS_HPF}">
6013
+ <ocf:rootfiles>
6014
+ <ocf:rootfile full-path="Contents/content.hpf" media-type="application/hwpml-package+xml"/>
6015
+ </ocf:rootfiles>
6016
+ </ocf:container>`;
6017
+ }
6018
+ function generateManifest() {
6019
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6020
+ <opf:package xmlns:opf="${NS_OPF}" xmlns:hpf="${NS_HPF}" xmlns:hh="${NS_HEAD}">
6021
+ <opf:manifest>
6022
+ <opf:item id="header" href="Contents/header.xml" media-type="application/xml"/>
6023
+ <opf:item id="section0" href="Contents/section0.xml" media-type="application/xml"/>
6024
+ </opf:manifest>
6025
+ <opf:spine>
6026
+ <opf:itemref idref="header" linear="no"/>
6027
+ <opf:itemref idref="section0" linear="yes"/>
6028
+ </opf:spine>
6029
+ </opf:package>`;
6030
+ }
6031
+ function charPr(id, height, bold, italic, fontId = 0) {
6032
+ const boldAttr = bold ? ` bold="1"` : "";
6033
+ const italicAttr = italic ? ` italic="1"` : "";
6034
+ return ` <hh:charPr id="${id}" height="${height}" textColor="#000000" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0"${boldAttr}${italicAttr}>
6035
+ <hh:fontRef hangul="${fontId}" latin="${fontId}" hanja="${fontId}" japanese="${fontId}" other="${fontId}" symbol="${fontId}" user="${fontId}"/>
6036
+ <hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
6037
+ <hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
6038
+ <hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
6039
+ <hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
6040
+ </hh:charPr>`;
6041
+ }
6042
+ function paraPr(id, opts = {}) {
6043
+ const { align = "JUSTIFY", spaceBefore = 0, spaceAfter = 0, lineSpacing = 160, indent = 0 } = opts;
6044
+ return ` <hh:paraPr id="${id}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="1" suppressLineNumbers="0" checked="0" textDir="AUTO">
6045
+ <hh:align horizontal="${align}" vertical="BASELINE"/>
6046
+ <hh:heading type="NONE" idRef="0" level="0"/>
6047
+ <hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="BREAK_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/>
6048
+ <hh:autoSpacing eAsianEng="0" eAsianNum="0"/>
6049
+ <hh:margin indent="${indent}" left="0" right="0" prev="${spaceBefore}" next="${spaceAfter}"/>
6050
+ <hh:lineSpacing type="PERCENT" value="${lineSpacing}"/>
6051
+ <hh:border borderFillIDRef="0" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/>
6052
+ </hh:paraPr>`;
6053
+ }
6054
+ function generateHeaderXml() {
6055
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6056
+ <hh:head xmlns:hh="${NS_HEAD}" xmlns:hp="${NS_PARA}" version="1.4" secCnt="1">
6057
+ <hh:beginNum page="1" footnote="1" endnote="1" pic="1" tbl="1" equation="1"/>
6058
+ <hh:refList>
6059
+ <hh:fontfaces itemCnt="7">
6060
+ <hh:fontface lang="HANGUL" fontCnt="2">
6061
+ <hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
6062
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6063
+ </hh:font>
6064
+ <hh:font id="1" face="\uD568\uCD08\uB86C\uB3CB\uC6C0" type="TTF" isEmbedded="0">
6065
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6066
+ </hh:font>
6067
+ </hh:fontface>
6068
+ <hh:fontface lang="LATIN" fontCnt="2">
6069
+ <hh:font id="0" face="Times New Roman" type="TTF" isEmbedded="0">
6070
+ <hh:typeInfo familyType="FCAT_OLDSTYLE" weight="5" proportion="4" contrast="2" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="4"/>
6071
+ </hh:font>
6072
+ <hh:font id="1" face="Consolas" type="TTF" isEmbedded="0">
6073
+ <hh:typeInfo familyType="FCAT_MODERN" weight="5" proportion="0" contrast="0" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="0"/>
6074
+ </hh:font>
6075
+ </hh:fontface>
6076
+ <hh:fontface lang="HANJA" fontCnt="1">
6077
+ <hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
6078
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6079
+ </hh:font>
6080
+ </hh:fontface>
6081
+ <hh:fontface lang="JAPANESE" fontCnt="1">
6082
+ <hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
6083
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6084
+ </hh:font>
6085
+ </hh:fontface>
6086
+ <hh:fontface lang="OTHER" fontCnt="1">
6087
+ <hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
6088
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6089
+ </hh:font>
6090
+ </hh:fontface>
6091
+ <hh:fontface lang="SYMBOL" fontCnt="1">
6092
+ <hh:font id="0" face="Symbol" type="TTF" isEmbedded="0">
6093
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6094
+ </hh:font>
6095
+ </hh:fontface>
6096
+ <hh:fontface lang="USER" fontCnt="1">
6097
+ <hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
6098
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6099
+ </hh:font>
6100
+ </hh:fontface>
6101
+ </hh:fontfaces>
6102
+ <hh:borderFills itemCnt="1">
6103
+ <hh:borderFill id="0" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
6104
+ <hh:slash type="NONE" Crooked="0" isCounter="0"/>
6105
+ <hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
6106
+ <hh:leftBorder type="NONE" width="0.1mm" color="#000000"/>
6107
+ <hh:rightBorder type="NONE" width="0.1mm" color="#000000"/>
6108
+ <hh:topBorder type="NONE" width="0.1mm" color="#000000"/>
6109
+ <hh:bottomBorder type="NONE" width="0.1mm" color="#000000"/>
6110
+ <hh:diagonal type="NONE" width="0.1mm" color="#000000"/>
6111
+ <hh:fillInfo/>
6112
+ </hh:borderFill>
6113
+ </hh:borderFills>
6114
+ <hh:charProperties itemCnt="9">
6115
+ ${charPr(0, 1e3, false, false)}
6116
+ ${charPr(1, 1e3, true, false)}
6117
+ ${charPr(2, 1e3, false, true)}
6118
+ ${charPr(3, 1e3, true, true)}
6119
+ ${charPr(4, 900, false, false, 1)}
6120
+ ${charPr(5, 1800, true, false, 1)}
6121
+ ${charPr(6, 1400, true, false, 1)}
6122
+ ${charPr(7, 1200, true, false, 1)}
6123
+ ${charPr(8, 1100, true, false, 1)}
6124
+ </hh:charProperties>
6125
+ <hh:tabProperties itemCnt="0"/>
6126
+ <hh:numberings itemCnt="0"/>
6127
+ <hh:bullets itemCnt="0"/>
6128
+ <hh:paraProperties itemCnt="8">
6129
+ ${paraPr(0)}
6130
+ ${paraPr(1, { align: "LEFT", spaceBefore: 800, spaceAfter: 200, lineSpacing: 180 })}
6131
+ ${paraPr(2, { align: "LEFT", spaceBefore: 600, spaceAfter: 150, lineSpacing: 170 })}
6132
+ ${paraPr(3, { align: "LEFT", spaceBefore: 400, spaceAfter: 100, lineSpacing: 160 })}
6133
+ ${paraPr(4, { align: "LEFT", spaceBefore: 300, spaceAfter: 100, lineSpacing: 160 })}
6134
+ ${paraPr(5, { align: "LEFT", lineSpacing: 130, indent: 400 })}
6135
+ ${paraPr(6, { align: "LEFT", lineSpacing: 150, indent: 600 })}
6136
+ ${paraPr(7, { align: "LEFT", lineSpacing: 160, indent: 600 })}
6137
+ </hh:paraProperties>
6138
+ <hh:styles itemCnt="1">
6139
+ <hh:style id="0" type="PARA" name="\uBC14\uD0D5\uAE00" engName="Normal" paraPrIDRef="0" charPrIDRef="0" nextStyleIDRef="0" langIDRef="1042" lockForm="0"/>
6140
+ </hh:styles>
6141
+ </hh:refList>
6142
+ <hh:compatibleDocument targetProgram="HWP2018"/>
6143
+ </hh:head>`;
6144
+ }
6145
+ function generateSecPr() {
6146
+ return `<hp:secPr textDirection="HORIZONTAL" spaceColumns="1134" tabStop="8000" outlineShapeIDRef="0" memoShapeIDRef="0" textVerticalWidthHead="0" masterPageCnt="0"><hp:grid lineGrid="0" charGrid="0" wonggojiFormat="0"/><hp:startNum pageStartsOn="BOTH" page="0" pic="0" tbl="0" equation="0"/><hp:visibility hideFirstHeader="0" hideFirstFooter="0" hideFirstMasterPage="0" border="SHOW_ALL" fill="SHOW_ALL" hideFirstPageNum="0" hideFirstEmptyLine="0" showLineNumber="0"/><hp:pagePr landscape="WIDELY" width="59528" height="84188" gutterType="LEFT_ONLY"><hp:margin header="2835" footer="2835" gutter="0" left="5670" right="4252" top="8504" bottom="4252"/></hp:pagePr><hp:footNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="-1" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="283" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="EACH_COLUMN" beneathText="0"/></hp:footNotePr><hp:endNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="14692344" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="0" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="END_OF_DOCUMENT" beneathText="0"/></hp:endNotePr></hp:secPr>`;
6147
+ }
6148
+ function generateTable(rows) {
6149
+ const trElements = rows.map((row) => {
6150
+ const tdElements = row.map((cell) => {
6151
+ const runs = generateRuns(cell);
6152
+ return `<hp:tc><hp:cellSpan colSpan="1" rowSpan="1"/><hp:p paraPrIDRef="0" styleIDRef="0">${runs}</hp:p></hp:tc>`;
6153
+ }).join("");
6154
+ return `<hp:tr>${tdElements}</hp:tr>`;
6155
+ }).join("");
6156
+ return `<hp:tbl>${trElements}</hp:tbl>`;
6157
+ }
6158
+ function blocksToSectionXml(blocks) {
6159
+ const paraXmls = [];
6160
+ let isFirst = true;
6161
+ for (const block of blocks) {
6162
+ let xml = "";
6163
+ switch (block.type) {
6164
+ case "heading": {
6165
+ const pId = headingParaPrId(block.level || 1);
6166
+ const cId = headingCharPrId(block.level || 1);
6167
+ xml = generateParagraph(block.text || "", pId, cId);
6168
+ break;
6169
+ }
6170
+ case "paragraph":
6171
+ xml = generateParagraph(block.text || "");
6172
+ break;
6173
+ case "code_block": {
6174
+ const codeLines = (block.text || "").split("\n");
6175
+ xml = codeLines.map((line) => generateParagraph(line || " ", PARA_CODE)).join("\n ");
6176
+ break;
6177
+ }
6178
+ case "blockquote":
6179
+ xml = generateParagraph(block.text || "", PARA_QUOTE);
6180
+ break;
6181
+ case "list_item": {
6182
+ const marker = block.ordered ? `${(block.indent || 0) + 1}. ` : "\xB7 ";
6183
+ const indentPrefix = " ".repeat(block.indent || 0);
6184
+ xml = generateParagraph(indentPrefix + marker + (block.text || ""), PARA_LIST);
6185
+ break;
6186
+ }
6187
+ case "hr":
6188
+ xml = `<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0"><hp:t>\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500</hp:t></hp:run></hp:p>`;
6189
+ break;
6190
+ case "table":
6191
+ if (block.rows) {
6192
+ if (isFirst) {
6193
+ const secRun = `<hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run>`;
6194
+ paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0">${secRun}</hp:p>`);
6195
+ isFirst = false;
6196
+ }
6197
+ xml = generateTable(block.rows);
6198
+ }
6199
+ break;
6200
+ }
6201
+ if (!xml) continue;
6202
+ if (isFirst && block.type !== "table") {
6203
+ xml = xml.replace(
6204
+ /<hp:run charPrIDRef="(\d+)">/,
6205
+ `<hp:run charPrIDRef="$1">${generateSecPr()}`
6206
+ );
6207
+ isFirst = false;
6208
+ }
6209
+ paraXmls.push(xml);
6210
+ }
6211
+ if (paraXmls.length === 0) {
6212
+ paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run></hp:p>`);
6213
+ }
6214
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6215
+ <hs:sec xmlns:hs="${NS_SECTION}" xmlns:hp="${NS_PARA}">
6216
+ ${paraXmls.join("\n ")}
6217
+ </hs:sec>`;
6218
+ }
6219
+
5192
6220
  // src/index.ts
5193
6221
  import { readFile } from "fs/promises";
5194
6222
 
5195
6223
  // src/xlsx/parser.ts
5196
- import JSZip2 from "jszip";
5197
- import { DOMParser as DOMParser2 } from "@xmldom/xmldom";
6224
+ import JSZip4 from "jszip";
6225
+ import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
5198
6226
  var MAX_SHEETS = 100;
5199
6227
  var MAX_DECOMPRESS_SIZE3 = 100 * 1024 * 1024;
5200
6228
  var MAX_ROWS2 = 1e4;
@@ -5231,7 +6259,7 @@ function getTextContent(el) {
5231
6259
  return el.textContent?.trim() ?? "";
5232
6260
  }
5233
6261
  function parseXml(text) {
5234
- return new DOMParser2().parseFromString(stripDtd(text), "text/xml");
6262
+ return new DOMParser3().parseFromString(stripDtd(text), "text/xml");
5235
6263
  }
5236
6264
  function parseSharedStrings(xml) {
5237
6265
  const doc = parseXml(xml);
@@ -5384,7 +6412,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
5384
6412
  }
5385
6413
  async function parseXlsxDocument(buffer, options) {
5386
6414
  precheckZipSize(buffer, MAX_DECOMPRESS_SIZE3);
5387
- const zip = await JSZip2.loadAsync(buffer);
6415
+ const zip = await JSZip4.loadAsync(buffer);
5388
6416
  const warnings = [];
5389
6417
  const workbookFile = zip.file("xl/workbook.xml");
5390
6418
  if (!workbookFile) {
@@ -5474,24 +6502,24 @@ async function parseXlsxDocument(buffer, options) {
5474
6502
  }
5475
6503
 
5476
6504
  // src/docx/parser.ts
5477
- import JSZip3 from "jszip";
5478
- import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
6505
+ import JSZip5 from "jszip";
6506
+ import { DOMParser as DOMParser4 } from "@xmldom/xmldom";
5479
6507
  var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
5480
- function getChildElements(parent, localName) {
6508
+ function getChildElements(parent, localName2) {
5481
6509
  const result = [];
5482
6510
  const children = parent.childNodes;
5483
6511
  for (let i = 0; i < children.length; i++) {
5484
6512
  const node = children[i];
5485
6513
  if (node.nodeType === 1) {
5486
6514
  const el = node;
5487
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
6515
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5488
6516
  result.push(el);
5489
6517
  }
5490
6518
  }
5491
6519
  }
5492
6520
  return result;
5493
6521
  }
5494
- function findElements(parent, localName) {
6522
+ function findElements(parent, localName2) {
5495
6523
  const result = [];
5496
6524
  const walk = (node) => {
5497
6525
  const children = node.childNodes;
@@ -5499,7 +6527,7 @@ function findElements(parent, localName) {
5499
6527
  const child = children[i];
5500
6528
  if (child.nodeType === 1) {
5501
6529
  const el = child;
5502
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
6530
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5503
6531
  result.push(el);
5504
6532
  }
5505
6533
  walk(el);
@@ -5509,16 +6537,16 @@ function findElements(parent, localName) {
5509
6537
  walk(parent);
5510
6538
  return result;
5511
6539
  }
5512
- function getAttr(el, localName) {
6540
+ function getAttr(el, localName2) {
5513
6541
  const attrs = el.attributes;
5514
6542
  for (let i = 0; i < attrs.length; i++) {
5515
6543
  const attr = attrs[i];
5516
- if (attr.localName === localName || attr.name === localName) return attr.value;
6544
+ if (attr.localName === localName2 || attr.name === localName2) return attr.value;
5517
6545
  }
5518
6546
  return null;
5519
6547
  }
5520
6548
  function parseXml2(text) {
5521
- return new DOMParser3().parseFromString(stripDtd(text), "text/xml");
6549
+ return new DOMParser4().parseFromString(stripDtd(text), "text/xml");
5522
6550
  }
5523
6551
  function parseStyles(xml) {
5524
6552
  const doc = parseXml2(xml);
@@ -5812,7 +6840,7 @@ async function extractImages(zip, rels, doc) {
5812
6840
  }
5813
6841
  async function parseDocxDocument(buffer, options) {
5814
6842
  precheckZipSize(buffer, MAX_DECOMPRESS_SIZE4);
5815
- const zip = await JSZip3.loadAsync(buffer);
6843
+ const zip = await JSZip5.loadAsync(buffer);
5816
6844
  const warnings = [];
5817
6845
  const docFile = zip.file("word/document.xml");
5818
6846
  if (!docFile) {
@@ -5860,11 +6888,11 @@ async function parseDocxDocument(buffer, options) {
5860
6888
  const node = children[i];
5861
6889
  if (node.nodeType !== 1) continue;
5862
6890
  const el = node;
5863
- const localName = el.localName ?? el.tagName?.split(":").pop();
5864
- if (localName === "p") {
6891
+ const localName2 = el.localName ?? el.tagName?.split(":").pop();
6892
+ if (localName2 === "p") {
5865
6893
  const block = parseParagraph(el, styles, numbering, footnotes, rels);
5866
6894
  if (block) blocks.push(block);
5867
- } else if (localName === "tbl") {
6895
+ } else if (localName2 === "tbl") {
5868
6896
  const block = parseTable(el, styles, numbering, footnotes, rels);
5869
6897
  if (block) blocks.push(block);
5870
6898
  }
@@ -5902,135 +6930,6 @@ async function parseDocxDocument(buffer, options) {
5902
6930
  };
5903
6931
  }
5904
6932
 
5905
- // src/form/recognize.ts
5906
- var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
5907
- "\uC131\uBA85",
5908
- "\uC774\uB984",
5909
- "\uC8FC\uC18C",
5910
- "\uC804\uD654",
5911
- "\uC804\uD654\uBC88\uD638",
5912
- "\uD734\uB300\uD3F0",
5913
- "\uD578\uB4DC\uD3F0",
5914
- "\uC5F0\uB77D\uCC98",
5915
- "\uC0DD\uB144\uC6D4\uC77C",
5916
- "\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
5917
- "\uC18C\uC18D",
5918
- "\uC9C1\uC704",
5919
- "\uC9C1\uAE09",
5920
- "\uBD80\uC11C",
5921
- "\uC774\uBA54\uC77C",
5922
- "\uD329\uC2A4",
5923
- "\uD559\uAD50",
5924
- "\uD559\uB144",
5925
- "\uBC18",
5926
- "\uBC88\uD638",
5927
- "\uC2E0\uCCAD\uC778",
5928
- "\uB300\uD45C\uC790",
5929
- "\uB2F4\uB2F9\uC790",
5930
- "\uC791\uC131\uC790",
5931
- "\uD655\uC778\uC790",
5932
- "\uC2B9\uC778\uC790",
5933
- "\uC77C\uC2DC",
5934
- "\uB0A0\uC9DC",
5935
- "\uAE30\uAC04",
5936
- "\uC7A5\uC18C",
5937
- "\uBAA9\uC801",
5938
- "\uC0AC\uC720",
5939
- "\uBE44\uACE0",
5940
- "\uAE08\uC561",
5941
- "\uC218\uB7C9",
5942
- "\uB2E8\uAC00",
5943
- "\uD569\uACC4",
5944
- "\uACC4",
5945
- "\uC18C\uACC4"
5946
- ]);
5947
- function isLabelCell(text) {
5948
- const trimmed = text.trim();
5949
- if (!trimmed || trimmed.length > 30) return false;
5950
- for (const kw of LABEL_KEYWORDS) {
5951
- if (trimmed.includes(kw)) return true;
5952
- }
5953
- if (/^[가-힣\s()·:]{2,8}$/.test(trimmed) && !/\d/.test(trimmed)) return true;
5954
- if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
5955
- return false;
5956
- }
5957
- function extractFormFields(blocks) {
5958
- const fields = [];
5959
- let totalTables = 0;
5960
- let formTables = 0;
5961
- for (const block of blocks) {
5962
- if (block.type !== "table" || !block.table) continue;
5963
- totalTables++;
5964
- const tableFields = extractFromTable(block.table);
5965
- if (tableFields.length > 0) {
5966
- formTables++;
5967
- fields.push(...tableFields);
5968
- }
5969
- }
5970
- for (const block of blocks) {
5971
- if (block.type === "paragraph" && block.text) {
5972
- const inlineFields = extractInlineFields(block.text);
5973
- fields.push(...inlineFields);
5974
- }
5975
- }
5976
- const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
5977
- return { fields, confidence: Math.min(confidence, 1) };
5978
- }
5979
- function extractFromTable(table) {
5980
- const fields = [];
5981
- if (table.cols >= 2) {
5982
- for (let r = 0; r < table.rows; r++) {
5983
- for (let c = 0; c < table.cols - 1; c++) {
5984
- const labelCell = table.cells[r][c];
5985
- const valueCell = table.cells[r][c + 1];
5986
- if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
5987
- fields.push({
5988
- label: labelCell.text.trim().replace(/[::]\s*$/, ""),
5989
- value: valueCell.text.trim(),
5990
- row: r,
5991
- col: c
5992
- });
5993
- }
5994
- }
5995
- }
5996
- }
5997
- if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
5998
- const headerRow = table.cells[0];
5999
- const allLabels = headerRow.every((cell) => {
6000
- const t = cell.text.trim();
6001
- return t.length > 0 && t.length <= 20;
6002
- });
6003
- if (allLabels) {
6004
- for (let r = 1; r < table.rows; r++) {
6005
- for (let c = 0; c < table.cols; c++) {
6006
- const label = headerRow[c].text.trim();
6007
- const value = table.cells[r][c].text.trim();
6008
- if (label && value) {
6009
- fields.push({ label, value, row: r, col: c });
6010
- }
6011
- }
6012
- }
6013
- }
6014
- }
6015
- return fields;
6016
- }
6017
- function extractInlineFields(text) {
6018
- const fields = [];
6019
- const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
6020
- let match;
6021
- while ((match = pattern.exec(text)) !== null) {
6022
- const label = match[1].trim();
6023
- const value = match[2].trim();
6024
- if (value) {
6025
- fields.push({ label, value, row: -1, col: -1 });
6026
- }
6027
- }
6028
- return fields;
6029
- }
6030
-
6031
- // src/hwpx/generator.ts
6032
- import JSZip4 from "jszip";
6033
-
6034
6933
  // src/index.ts
6035
6934
  async function parse(input, options) {
6036
6935
  let buffer;
@@ -6294,8 +7193,11 @@ export {
6294
7193
  extractHwpxMetadataOnly,
6295
7194
  extractHwp5MetadataOnly,
6296
7195
  extractPdfMetadataOnly,
6297
- compare,
6298
7196
  extractFormFields,
7197
+ fillFormFields,
7198
+ fillHwpx,
7199
+ markdownToHwpx,
7200
+ compare,
6299
7201
  parse
6300
7202
  };
6301
- //# sourceMappingURL=chunk-AIG7SDWU.js.map
7203
+ //# sourceMappingURL=chunk-SY2RFVLW.js.map