kordoc 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ import {
8
8
  } from "./chunk-MOL7MDBG.js";
9
9
 
10
10
  // src/utils.ts
11
- var VERSION = true ? "2.2.2" : "0.0.0-dev";
11
+ var VERSION = true ? "2.2.4" : "0.0.0-dev";
12
12
  function toArrayBuffer(buf) {
13
13
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
14
14
  return buf.buffer;
@@ -330,9 +330,47 @@ function blocksToMarkdown(blocks) {
330
330
  }
331
331
  return lines.join("\n").trim();
332
332
  }
333
+ function hasMergedCells(table) {
334
+ for (const row of table.cells) {
335
+ for (const cell of row) {
336
+ if (cell.colSpan > 1 || cell.rowSpan > 1) return true;
337
+ }
338
+ }
339
+ return false;
340
+ }
341
+ function tableToHtml(table) {
342
+ const { cells, rows: numRows, cols: numCols } = table;
343
+ const skip = /* @__PURE__ */ new Set();
344
+ const lines = ["<table>"];
345
+ for (let r = 0; r < numRows; r++) {
346
+ const tag = r === 0 ? "th" : "td";
347
+ const rowHtml = [];
348
+ for (let c = 0; c < numCols; c++) {
349
+ if (skip.has(`${r},${c}`)) continue;
350
+ const cell = cells[r]?.[c];
351
+ if (!cell) continue;
352
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
353
+ for (let dc = 0; dc < cell.colSpan; dc++) {
354
+ if (dr === 0 && dc === 0) continue;
355
+ if (r + dr < numRows && c + dc < numCols) skip.add(`${r + dr},${c + dc}`);
356
+ }
357
+ }
358
+ const text = sanitizeText(cell.text).replace(/\n/g, "<br>");
359
+ const attrs = [];
360
+ if (cell.colSpan > 1) attrs.push(`colspan="${cell.colSpan}"`);
361
+ if (cell.rowSpan > 1) attrs.push(`rowspan="${cell.rowSpan}"`);
362
+ const attrStr = attrs.length ? " " + attrs.join(" ") : "";
363
+ rowHtml.push(`<${tag}${attrStr}>${text}</${tag}>`);
364
+ }
365
+ if (rowHtml.length) lines.push(`<tr>${rowHtml.join("")}</tr>`);
366
+ }
367
+ lines.push("</table>");
368
+ return lines.join("\n");
369
+ }
333
370
  function tableToMarkdown(table) {
334
371
  if (table.rows === 0 || table.cols === 0) return "";
335
372
  const { cells, rows: numRows, cols: numCols } = table;
373
+ if (hasMergedCells(table)) return tableToHtml(table);
336
374
  if (numRows === 1 && numCols === 1) {
337
375
  const content = sanitizeText(cells[0][0].text);
338
376
  if (!content) return "";
@@ -5151,12 +5189,1040 @@ function mergeKoreanLines(text) {
5151
5189
  return result.join("\n");
5152
5190
  }
5153
5191
 
5192
+ // src/form/recognize.ts
5193
+ var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
5194
+ "\uC131\uBA85",
5195
+ "\uC774\uB984",
5196
+ "\uC8FC\uC18C",
5197
+ "\uC804\uD654",
5198
+ "\uC804\uD654\uBC88\uD638",
5199
+ "\uD734\uB300\uD3F0",
5200
+ "\uD578\uB4DC\uD3F0",
5201
+ "\uC5F0\uB77D\uCC98",
5202
+ "\uC0DD\uB144\uC6D4\uC77C",
5203
+ "\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
5204
+ "\uC18C\uC18D",
5205
+ "\uC9C1\uC704",
5206
+ "\uC9C1\uAE09",
5207
+ "\uBD80\uC11C",
5208
+ "\uC774\uBA54\uC77C",
5209
+ "\uD329\uC2A4",
5210
+ "\uD559\uAD50",
5211
+ "\uD559\uB144",
5212
+ "\uBC18",
5213
+ "\uBC88\uD638",
5214
+ "\uC2E0\uCCAD\uC778",
5215
+ "\uB300\uD45C\uC790",
5216
+ "\uB2F4\uB2F9\uC790",
5217
+ "\uC791\uC131\uC790",
5218
+ "\uD655\uC778\uC790",
5219
+ "\uC2B9\uC778\uC790",
5220
+ "\uC77C\uC2DC",
5221
+ "\uB0A0\uC9DC",
5222
+ "\uAE30\uAC04",
5223
+ "\uC7A5\uC18C",
5224
+ "\uBAA9\uC801",
5225
+ "\uC0AC\uC720",
5226
+ "\uBE44\uACE0",
5227
+ "\uAE08\uC561",
5228
+ "\uC218\uB7C9",
5229
+ "\uB2E8\uAC00",
5230
+ "\uD569\uACC4",
5231
+ "\uACC4",
5232
+ "\uC18C\uACC4",
5233
+ "\uB4F1\uB85D\uAE30\uC900\uC9C0",
5234
+ "\uBCF8\uC801",
5235
+ "\uC704\uC784\uC778",
5236
+ "\uCCAD\uAD6C\uC0AC\uC720",
5237
+ "\uC18C\uBA85\uC790\uB8CC"
5238
+ ]);
5239
+ function isLabelCell(text) {
5240
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
5241
+ if (!trimmed || trimmed.length > 30) return false;
5242
+ for (const kw of LABEL_KEYWORDS) {
5243
+ if (trimmed.includes(kw)) return true;
5244
+ }
5245
+ if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
5246
+ if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
5247
+ return false;
5248
+ }
5249
+ function extractFormFields(blocks) {
5250
+ const fields = [];
5251
+ let totalTables = 0;
5252
+ let formTables = 0;
5253
+ for (const block of blocks) {
5254
+ if (block.type !== "table" || !block.table) continue;
5255
+ totalTables++;
5256
+ const tableFields = extractFromTable(block.table);
5257
+ if (tableFields.length > 0) {
5258
+ formTables++;
5259
+ fields.push(...tableFields);
5260
+ }
5261
+ }
5262
+ for (const block of blocks) {
5263
+ if (block.type === "paragraph" && block.text) {
5264
+ const inlineFields = extractInlineFields(block.text);
5265
+ fields.push(...inlineFields);
5266
+ }
5267
+ }
5268
+ const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
5269
+ return { fields, confidence: Math.min(confidence, 1) };
5270
+ }
5271
+ function extractFromTable(table) {
5272
+ const fields = [];
5273
+ if (table.cols >= 2) {
5274
+ for (let r = 0; r < table.rows; r++) {
5275
+ for (let c = 0; c < table.cols - 1; c++) {
5276
+ const labelCell = table.cells[r][c];
5277
+ const valueCell = table.cells[r][c + 1];
5278
+ if (isLabelCell(labelCell.text)) {
5279
+ fields.push({
5280
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
5281
+ value: valueCell.text.trim(),
5282
+ row: r,
5283
+ col: c
5284
+ });
5285
+ }
5286
+ }
5287
+ }
5288
+ }
5289
+ if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
5290
+ const headerRow = table.cells[0];
5291
+ const allLabels = headerRow.every((cell) => {
5292
+ const t = cell.text.trim();
5293
+ return t.length > 0 && t.length <= 20;
5294
+ });
5295
+ if (allLabels) {
5296
+ for (let r = 1; r < table.rows; r++) {
5297
+ for (let c = 0; c < table.cols; c++) {
5298
+ const label = headerRow[c].text.trim();
5299
+ const value = table.cells[r][c].text.trim();
5300
+ if (label && value) {
5301
+ fields.push({ label, value, row: r, col: c });
5302
+ }
5303
+ }
5304
+ }
5305
+ }
5306
+ }
5307
+ return fields;
5308
+ }
5309
+ function extractInlineFields(text) {
5310
+ const fields = [];
5311
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
5312
+ let match;
5313
+ while ((match = pattern.exec(text)) !== null) {
5314
+ const label = match[1].trim();
5315
+ const value = match[2].trim();
5316
+ if (value) {
5317
+ fields.push({ label, value, row: -1, col: -1 });
5318
+ }
5319
+ }
5320
+ return fields;
5321
+ }
5322
+
5323
+ // src/form/match.ts
5324
+ function normalizeLabel(label) {
5325
+ return label.trim().replace(/[::\s()()·]/g, "");
5326
+ }
5327
+ function findMatchingKey(cellLabel, values) {
5328
+ if (values.has(cellLabel)) return cellLabel;
5329
+ let bestKey;
5330
+ let bestLen = 0;
5331
+ for (const key of values.keys()) {
5332
+ if (cellLabel.startsWith(key)) {
5333
+ if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
5334
+ bestLen = key.length;
5335
+ bestKey = key;
5336
+ }
5337
+ } else if (key.startsWith(cellLabel)) {
5338
+ if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
5339
+ bestLen = cellLabel.length;
5340
+ bestKey = key;
5341
+ }
5342
+ }
5343
+ }
5344
+ return bestKey;
5345
+ }
5346
+ function isKeywordLabel(text) {
5347
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
5348
+ if (!trimmed || trimmed.length > 15) return false;
5349
+ for (const kw of LABEL_KEYWORDS) {
5350
+ if (trimmed.includes(kw)) return true;
5351
+ }
5352
+ return false;
5353
+ }
5354
+ function fillInCellPatterns(cellText, values, matchedLabels) {
5355
+ let text = cellText;
5356
+ const matches = [];
5357
+ text = text.replace(
5358
+ /([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
5359
+ (match, prefix, suffix) => {
5360
+ const label = prefix + suffix;
5361
+ const normalizedLabel = normalizeLabel(label);
5362
+ const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
5363
+ if (matchKey === void 0) return match;
5364
+ const newValue = values.get(matchKey);
5365
+ matchedLabels.add(matchKey);
5366
+ matches.push({ key: matchKey, label, value: newValue });
5367
+ return `${prefix}(${newValue})${suffix}`;
5368
+ }
5369
+ );
5370
+ text = text.replace(
5371
+ /□([가-힣A-Za-z]+)/g,
5372
+ (match, keyword) => {
5373
+ const normalizedKw = normalizeLabel(keyword);
5374
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
5375
+ if (matchKey === void 0) return match;
5376
+ const val = values.get(matchKey);
5377
+ const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
5378
+ if (!isTruthy) return match;
5379
+ matchedLabels.add(matchKey);
5380
+ matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
5381
+ return `\u2611${keyword}`;
5382
+ }
5383
+ );
5384
+ text = text.replace(
5385
+ /\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
5386
+ (match, keyword) => {
5387
+ const normalizedKw = normalizeLabel(keyword);
5388
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
5389
+ if (matchKey === void 0) return match;
5390
+ const newValue = values.get(matchKey);
5391
+ matchedLabels.add(matchKey);
5392
+ matches.push({ key: matchKey, label: keyword, value: newValue });
5393
+ return `(${keyword}\uFF1A${newValue})`;
5394
+ }
5395
+ );
5396
+ return matches.length > 0 ? { text, matches } : null;
5397
+ }
5398
+ function normalizeValues(values) {
5399
+ const map = /* @__PURE__ */ new Map();
5400
+ for (const [label, value] of Object.entries(values)) {
5401
+ map.set(normalizeLabel(label), value);
5402
+ }
5403
+ return map;
5404
+ }
5405
+ function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
5406
+ return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
5407
+ for (const orig of Object.keys(originalValues)) {
5408
+ if (normalizeLabel(orig) === k) return orig;
5409
+ }
5410
+ return k;
5411
+ });
5412
+ }
5413
+
5414
+ // src/form/filler.ts
5415
+ function fillFormFields(blocks, values) {
5416
+ const cloned = structuredClone(blocks);
5417
+ const filled = [];
5418
+ const matchedLabels = /* @__PURE__ */ new Set();
5419
+ const normalizedValues = normalizeValues(values);
5420
+ const patternFilledCells = /* @__PURE__ */ new Set();
5421
+ for (const block of cloned) {
5422
+ if (block.type !== "table" || !block.table) continue;
5423
+ for (let r = 0; r < block.table.rows; r++) {
5424
+ for (let c = 0; c < block.table.cols; c++) {
5425
+ const cell = block.table.cells[r]?.[c];
5426
+ if (!cell) continue;
5427
+ const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
5428
+ if (result) {
5429
+ cell.text = result.text;
5430
+ patternFilledCells.add(`${r},${c}`);
5431
+ for (const m of result.matches) {
5432
+ filled.push({ label: m.label, value: m.value, row: r, col: c });
5433
+ }
5434
+ }
5435
+ }
5436
+ }
5437
+ }
5438
+ for (const block of cloned) {
5439
+ if (block.type !== "table" || !block.table) continue;
5440
+ fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
5441
+ }
5442
+ for (const block of cloned) {
5443
+ if (block.type !== "paragraph" || !block.text) continue;
5444
+ const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
5445
+ if (newText !== block.text) block.text = newText;
5446
+ }
5447
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
5448
+ return { blocks: cloned, filled, unmatched };
5449
+ }
5450
+ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
5451
+ if (table.cols < 2) return;
5452
+ for (let r = 0; r < table.rows; r++) {
5453
+ for (let c = 0; c < table.cols - 1; c++) {
5454
+ const labelCell = table.cells[r][c];
5455
+ const valueCell = table.cells[r][c + 1];
5456
+ if (!labelCell || !valueCell) continue;
5457
+ if (!isLabelCell(labelCell.text)) continue;
5458
+ if (isKeywordLabel(valueCell.text)) continue;
5459
+ const normalizedCellLabel = normalizeLabel(labelCell.text);
5460
+ if (!normalizedCellLabel) continue;
5461
+ const matchKey = findMatchingKey(normalizedCellLabel, values);
5462
+ if (matchKey === void 0) continue;
5463
+ const newValue = values.get(matchKey);
5464
+ if (patternFilledCells?.has(`${r},${c + 1}`)) {
5465
+ valueCell.text = newValue + " " + valueCell.text;
5466
+ } else {
5467
+ valueCell.text = newValue;
5468
+ }
5469
+ matchedLabels.add(matchKey);
5470
+ filled.push({
5471
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
5472
+ value: newValue,
5473
+ row: r,
5474
+ col: c
5475
+ });
5476
+ }
5477
+ }
5478
+ if (table.rows >= 2 && table.cols >= 2) {
5479
+ const headerRow = table.cells[0];
5480
+ const allLabels = headerRow.every((cell) => {
5481
+ const t = cell.text.trim();
5482
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
5483
+ });
5484
+ if (!allLabels) return;
5485
+ for (let r = 1; r < table.rows; r++) {
5486
+ for (let c = 0; c < table.cols; c++) {
5487
+ const headerLabel = normalizeLabel(headerRow[c].text);
5488
+ const matchKey = findMatchingKey(headerLabel, values);
5489
+ if (matchKey === void 0) continue;
5490
+ if (matchedLabels.has(matchKey)) continue;
5491
+ const newValue = values.get(matchKey);
5492
+ table.cells[r][c].text = newValue;
5493
+ matchedLabels.add(matchKey);
5494
+ filled.push({
5495
+ label: headerRow[c].text.trim(),
5496
+ value: newValue,
5497
+ row: r,
5498
+ col: c
5499
+ });
5500
+ }
5501
+ }
5502
+ }
5503
+ }
5504
+ function fillInlineFields(text, values, filled, matchedLabels) {
5505
+ return text.replace(
5506
+ /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
5507
+ (match, rawLabel, _oldValue) => {
5508
+ const normalized = normalizeLabel(rawLabel);
5509
+ const matchKey = findMatchingKey(normalized, values);
5510
+ if (matchKey === void 0) return match;
5511
+ const newValue = values.get(matchKey);
5512
+ matchedLabels.add(matchKey);
5513
+ filled.push({
5514
+ label: rawLabel.trim(),
5515
+ value: newValue,
5516
+ row: -1,
5517
+ col: -1
5518
+ });
5519
+ return `${rawLabel}: ${newValue}`;
5520
+ }
5521
+ );
5522
+ }
5523
+
5524
+ // src/form/filler-hwpx.ts
5525
+ import JSZip2 from "jszip";
5526
+ import { DOMParser as DOMParser2, XMLSerializer } from "@xmldom/xmldom";
5527
+ async function fillHwpx(hwpxBuffer, values) {
5528
+ const zip = await JSZip2.loadAsync(hwpxBuffer);
5529
+ const filled = [];
5530
+ const matchedLabels = /* @__PURE__ */ new Set();
5531
+ const normalizedValues = normalizeValues(values);
5532
+ const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
5533
+ if (sectionFiles.length === 0) {
5534
+ throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
5535
+ }
5536
+ const xmlParser = new DOMParser2();
5537
+ const xmlSerializer = new XMLSerializer();
5538
+ for (const sectionPath of sectionFiles) {
5539
+ const zipEntry = zip.file(sectionPath);
5540
+ if (!zipEntry) continue;
5541
+ const rawXml = await zipEntry.async("text");
5542
+ const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
5543
+ if (!doc.documentElement) continue;
5544
+ let modified = false;
5545
+ const tables = findAllElements(doc.documentElement, "tbl");
5546
+ const cellPatternApplied = /* @__PURE__ */ new Set();
5547
+ for (const tblEl of tables) {
5548
+ const allCells = findAllElements(tblEl, "tc");
5549
+ for (const tcEl of allCells) {
5550
+ const tNodes = collectCellTextNodes(tcEl);
5551
+ const fullText = tNodes.map((n) => n.text).join("");
5552
+ const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
5553
+ if (!result) continue;
5554
+ applyTextReplacements(tNodes, fullText, result.text);
5555
+ cellPatternApplied.add(tcEl);
5556
+ for (const m of result.matches) {
5557
+ filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
5558
+ }
5559
+ modified = true;
5560
+ }
5561
+ }
5562
+ for (const tblEl of tables) {
5563
+ const rows = findDirectChildren(tblEl, "tr");
5564
+ for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
5565
+ const trEl = rows[rowIdx];
5566
+ const cells = findDirectChildren(trEl, "tc");
5567
+ for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
5568
+ const labelText = extractCellText(cells[colIdx]);
5569
+ if (!isLabelCell(labelText)) continue;
5570
+ const valueCell = cells[colIdx + 1];
5571
+ const valueText = extractCellText(valueCell);
5572
+ if (isKeywordLabel(valueText)) continue;
5573
+ const normalizedCellLabel = normalizeLabel(labelText);
5574
+ if (!normalizedCellLabel) continue;
5575
+ const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
5576
+ if (matchKey === void 0) continue;
5577
+ const newValue = normalizedValues.get(matchKey);
5578
+ if (cellPatternApplied.has(valueCell)) {
5579
+ prependCellText(valueCell, newValue);
5580
+ } else {
5581
+ replaceCellText(valueCell, newValue);
5582
+ }
5583
+ matchedLabels.add(matchKey);
5584
+ filled.push({
5585
+ label: labelText.trim().replace(/[::]\s*$/, ""),
5586
+ value: newValue,
5587
+ row: rowIdx,
5588
+ col: colIdx
5589
+ });
5590
+ modified = true;
5591
+ }
5592
+ }
5593
+ if (rows.length >= 2) {
5594
+ const headerCells = findDirectChildren(rows[0], "tc");
5595
+ const allLabels = headerCells.every((cell) => {
5596
+ const t = extractCellText(cell).trim();
5597
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
5598
+ });
5599
+ if (allLabels) {
5600
+ for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
5601
+ const dataCells = findDirectChildren(rows[rowIdx], "tc");
5602
+ for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
5603
+ const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
5604
+ const matchKey = findMatchingKey(headerLabel, normalizedValues);
5605
+ if (matchKey === void 0) continue;
5606
+ if (matchedLabels.has(matchKey)) continue;
5607
+ const newValue = normalizedValues.get(matchKey);
5608
+ replaceCellText(dataCells[colIdx], newValue);
5609
+ matchedLabels.add(matchKey);
5610
+ filled.push({
5611
+ label: extractCellText(headerCells[colIdx]).trim(),
5612
+ value: newValue,
5613
+ row: rowIdx,
5614
+ col: colIdx
5615
+ });
5616
+ modified = true;
5617
+ }
5618
+ }
5619
+ }
5620
+ }
5621
+ }
5622
+ const allParagraphs = findAllElements(doc.documentElement, "p");
5623
+ for (const pEl of allParagraphs) {
5624
+ if (isInsideTable(pEl)) continue;
5625
+ const tNodes = collectTextNodes(pEl);
5626
+ const fullText = tNodes.map((n) => n.text).join("");
5627
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
5628
+ let match;
5629
+ while ((match = pattern.exec(fullText)) !== null) {
5630
+ const rawLabel = match[1];
5631
+ const normalized = normalizeLabel(rawLabel);
5632
+ const matchKey = findMatchingKey(normalized, normalizedValues);
5633
+ if (matchKey === void 0) continue;
5634
+ const newValue = normalizedValues.get(matchKey);
5635
+ const valueStart = match.index + match[0].length - match[2].length;
5636
+ const valueEnd = match.index + match[0].length;
5637
+ replaceTextRange(tNodes, valueStart, valueEnd, newValue);
5638
+ matchedLabels.add(matchKey);
5639
+ filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
5640
+ modified = true;
5641
+ break;
5642
+ }
5643
+ }
5644
+ if (modified) {
5645
+ const newXml = xmlSerializer.serializeToString(doc);
5646
+ zip.file(sectionPath, newXml);
5647
+ }
5648
+ }
5649
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
5650
+ const buffer = await zip.generateAsync({ type: "arraybuffer" });
5651
+ return { buffer, filled, unmatched };
5652
+ }
5653
+ function localName(el) {
5654
+ return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
5655
+ }
5656
+ function findAllElements(node, tagLocalName) {
5657
+ const result = [];
5658
+ const walk = (n) => {
5659
+ const children = n.childNodes;
5660
+ if (!children) return;
5661
+ for (let i = 0; i < children.length; i++) {
5662
+ const child = children[i];
5663
+ if (child.nodeType !== 1) continue;
5664
+ if (localName(child) === tagLocalName) result.push(child);
5665
+ walk(child);
5666
+ }
5667
+ };
5668
+ walk(node);
5669
+ return result;
5670
+ }
5671
+ function findDirectChildren(parent, tagLocalName) {
5672
+ const result = [];
5673
+ const children = parent.childNodes;
5674
+ if (!children) return result;
5675
+ for (let i = 0; i < children.length; i++) {
5676
+ const child = children[i];
5677
+ if (child.nodeType === 1 && localName(child) === tagLocalName) {
5678
+ result.push(child);
5679
+ }
5680
+ }
5681
+ return result;
5682
+ }
5683
+ function isInsideTable(el) {
5684
+ let parent = el.parentNode;
5685
+ while (parent) {
5686
+ if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
5687
+ parent = parent.parentNode;
5688
+ }
5689
+ return false;
5690
+ }
5691
+ function extractCellText(tcEl) {
5692
+ const parts = [];
5693
+ const walk = (node) => {
5694
+ const children = node.childNodes;
5695
+ if (!children) return;
5696
+ for (let i = 0; i < children.length; i++) {
5697
+ const child = children[i];
5698
+ if (child.nodeType === 3) {
5699
+ parts.push(child.textContent || "");
5700
+ } else if (child.nodeType === 1) {
5701
+ const tag = localName(child);
5702
+ if (tag === "t") walk(child);
5703
+ else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
5704
+ else if (tag === "tab") parts.push(" ");
5705
+ else if (tag === "br") parts.push("\n");
5706
+ }
5707
+ }
5708
+ };
5709
+ walk(tcEl);
5710
+ return parts.join("");
5711
+ }
5712
+ function prependCellText(tcEl, text) {
5713
+ const tElements = findAllElements(tcEl, "t");
5714
+ if (tElements.length === 0) return;
5715
+ const firstT = tElements[0];
5716
+ const existing = firstT.textContent || "";
5717
+ clearChildren(firstT);
5718
+ firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
5719
+ }
5720
+ function replaceCellText(tcEl, newValue) {
5721
+ const paragraphs = findAllElements(tcEl, "p");
5722
+ if (paragraphs.length === 0) return;
5723
+ const firstP = paragraphs[0];
5724
+ const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
5725
+ if (runs.length > 0) {
5726
+ setRunText(runs[0], newValue);
5727
+ for (let i = 1; i < runs.length; i++) {
5728
+ setRunText(runs[i], "");
5729
+ }
5730
+ } else {
5731
+ const tElements = findAllElements(firstP, "t");
5732
+ if (tElements.length > 0) {
5733
+ clearChildren(tElements[0]);
5734
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
5735
+ for (let i = 1; i < tElements.length; i++) {
5736
+ clearChildren(tElements[i]);
5737
+ }
5738
+ }
5739
+ }
5740
+ for (let i = 1; i < paragraphs.length; i++) {
5741
+ const p = paragraphs[i];
5742
+ if (p.parentNode) {
5743
+ const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
5744
+ for (const run of pRuns) setRunText(run, "");
5745
+ const pTs = findAllElements(p, "t");
5746
+ for (const t of pTs) clearChildren(t);
5747
+ }
5748
+ }
5749
+ }
5750
+ function setRunText(runEl, text) {
5751
+ const tElements = findAllElements(runEl, "t");
5752
+ if (tElements.length > 0) {
5753
+ clearChildren(tElements[0]);
5754
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
5755
+ for (let i = 1; i < tElements.length; i++) {
5756
+ clearChildren(tElements[i]);
5757
+ }
5758
+ }
5759
+ }
5760
+ function clearChildren(el) {
5761
+ while (el.firstChild) el.removeChild(el.firstChild);
5762
+ }
5763
+ function collectTextNodes(pEl) {
5764
+ const tElements = findAllElements(pEl, "t");
5765
+ const result = [];
5766
+ let offset = 0;
5767
+ for (const t of tElements) {
5768
+ const text = t.textContent || "";
5769
+ result.push({ element: t, text, offset });
5770
+ offset += text.length;
5771
+ }
5772
+ return result;
5773
+ }
5774
+ function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
5775
+ let replaced = false;
5776
+ for (const node of tNodes) {
5777
+ const nodeStart = node.offset;
5778
+ const nodeEnd = node.offset + node.text.length;
5779
+ if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
5780
+ const localStart = Math.max(0, globalStart - nodeStart);
5781
+ const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
5782
+ if (!replaced) {
5783
+ const before = node.text.slice(0, localStart);
5784
+ const after = node.text.slice(localEnd);
5785
+ const newText = before + newValue + after;
5786
+ clearChildren(node.element);
5787
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
5788
+ replaced = true;
5789
+ } else {
5790
+ const before = node.text.slice(0, localStart);
5791
+ const after = node.text.slice(localEnd);
5792
+ const newText = before + after;
5793
+ clearChildren(node.element);
5794
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
5795
+ }
5796
+ }
5797
+ }
5798
+ function collectCellTextNodes(tcEl) {
5799
+ const tElements = findAllElements(tcEl, "t");
5800
+ const result = [];
5801
+ let offset = 0;
5802
+ for (const t of tElements) {
5803
+ const text = t.textContent || "";
5804
+ result.push({ element: t, text, offset });
5805
+ offset += text.length;
5806
+ }
5807
+ return result;
5808
+ }
5809
+ function applyTextReplacements(tNodes, originalFull, replacedFull) {
5810
+ if (originalFull === replacedFull) return;
5811
+ if (tNodes.length === 1) {
5812
+ clearChildren(tNodes[0].element);
5813
+ tNodes[0].element.appendChild(
5814
+ tNodes[0].element.ownerDocument.createTextNode(replacedFull)
5815
+ );
5816
+ return;
5817
+ }
5818
+ let diffStart = 0;
5819
+ while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
5820
+ diffStart++;
5821
+ }
5822
+ let diffEndOrig = originalFull.length;
5823
+ let diffEndRepl = replacedFull.length;
5824
+ while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
5825
+ diffEndOrig--;
5826
+ diffEndRepl--;
5827
+ }
5828
+ const newPart = replacedFull.slice(diffStart, diffEndRepl);
5829
+ replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
5830
+ }
5831
+
5832
+ // src/hwpx/generator.ts
5833
+ import JSZip3 from "jszip";
5834
+ var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
5835
+ var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
5836
+ var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
5837
+ var NS_OPF = "http://www.idpf.org/2007/opf/";
5838
+ var NS_HPF = "http://www.hancom.co.kr/schema/2011/hpf";
5839
+ var NS_OCF = "urn:oasis:names:tc:opendocument:xmlns:container";
5840
+ var CHAR_NORMAL = 0;
5841
+ var CHAR_BOLD = 1;
5842
+ var CHAR_ITALIC = 2;
5843
+ var CHAR_BOLD_ITALIC = 3;
5844
+ var CHAR_CODE = 4;
5845
+ var CHAR_H1 = 5;
5846
+ var CHAR_H2 = 6;
5847
+ var CHAR_H3 = 7;
5848
+ var CHAR_H4 = 8;
5849
+ var PARA_NORMAL = 0;
5850
+ var PARA_H1 = 1;
5851
+ var PARA_H2 = 2;
5852
+ var PARA_H3 = 3;
5853
+ var PARA_H4 = 4;
5854
+ var PARA_CODE = 5;
5855
+ var PARA_QUOTE = 6;
5856
+ var PARA_LIST = 7;
5857
+ async function markdownToHwpx(markdown) {
5858
+ const blocks = parseMarkdownToBlocks(markdown);
5859
+ const sectionXml = blocksToSectionXml(blocks);
5860
+ const zip = new JSZip3();
5861
+ zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
5862
+ zip.file("META-INF/container.xml", generateContainerXml());
5863
+ zip.file("Contents/content.hpf", generateManifest());
5864
+ zip.file("Contents/header.xml", generateHeaderXml());
5865
+ zip.file("Contents/section0.xml", sectionXml);
5866
+ return await zip.generateAsync({ type: "arraybuffer" });
5867
+ }
5868
+ function parseMarkdownToBlocks(md) {
5869
+ const lines = md.split("\n");
5870
+ const blocks = [];
5871
+ let i = 0;
5872
+ while (i < lines.length) {
5873
+ const line = lines[i];
5874
+ if (!line.trim()) {
5875
+ i++;
5876
+ continue;
5877
+ }
5878
+ const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/);
5879
+ if (fenceMatch) {
5880
+ const fence = fenceMatch[1];
5881
+ const lang = fenceMatch[2].trim();
5882
+ const codeLines = [];
5883
+ i++;
5884
+ while (i < lines.length && !lines[i].startsWith(fence)) {
5885
+ codeLines.push(lines[i]);
5886
+ i++;
5887
+ }
5888
+ if (i < lines.length) i++;
5889
+ blocks.push({ type: "code_block", text: codeLines.join("\n"), lang });
5890
+ continue;
5891
+ }
5892
+ if (/^(\*{3,}|-{3,}|_{3,})\s*$/.test(line.trim())) {
5893
+ blocks.push({ type: "hr" });
5894
+ i++;
5895
+ continue;
5896
+ }
5897
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
5898
+ if (headingMatch) {
5899
+ blocks.push({ type: "heading", text: headingMatch[2].trim(), level: headingMatch[1].length });
5900
+ i++;
5901
+ continue;
5902
+ }
5903
+ if (line.trimStart().startsWith("|")) {
5904
+ const tableRows = [];
5905
+ while (i < lines.length && lines[i].trimStart().startsWith("|")) {
5906
+ const row = lines[i];
5907
+ if (/^[\s|:\-]+$/.test(row)) {
5908
+ i++;
5909
+ continue;
5910
+ }
5911
+ const cells = row.split("|").slice(1, -1).map((c) => c.trim());
5912
+ if (cells.length > 0) tableRows.push(cells);
5913
+ i++;
5914
+ }
5915
+ if (tableRows.length > 0) blocks.push({ type: "table", rows: tableRows });
5916
+ continue;
5917
+ }
5918
+ if (line.trimStart().startsWith("> ")) {
5919
+ const quoteLines = [];
5920
+ while (i < lines.length && (lines[i].trimStart().startsWith("> ") || lines[i].trimStart().startsWith(">"))) {
5921
+ quoteLines.push(lines[i].replace(/^>\s?/, ""));
5922
+ i++;
5923
+ }
5924
+ for (const ql of quoteLines) {
5925
+ blocks.push({ type: "blockquote", text: ql.trim() || "" });
5926
+ }
5927
+ continue;
5928
+ }
5929
+ const listMatch = line.match(/^(\s*)([-*+]|\d+[.)]) (.+)$/);
5930
+ if (listMatch) {
5931
+ const indent = Math.floor(listMatch[1].length / 2);
5932
+ const ordered = /\d/.test(listMatch[2]);
5933
+ blocks.push({ type: "list_item", text: listMatch[3].trim(), ordered, indent });
5934
+ i++;
5935
+ continue;
5936
+ }
5937
+ blocks.push({ type: "paragraph", text: line.trim() });
5938
+ i++;
5939
+ }
5940
+ return blocks;
5941
+ }
5942
+ function parseInlineMarkdown(text) {
5943
+ text = text.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
5944
+ text = text.replace(/\[([^\]]*)\]\(([^)]*)\)/g, (_, t, u) => t || u);
5945
+ text = text.replace(/~~([^~]+)~~/g, "$1");
5946
+ const spans = [];
5947
+ const regex = /(`[^`]+`|\*{3}[^*]+\*{3}|\*{2}[^*]+\*{2}|\*[^*]+\*|_{2}[^_]+_{2}|_[^_]+_)/g;
5948
+ let lastIdx = 0;
5949
+ for (const match of text.matchAll(regex)) {
5950
+ const idx = match.index;
5951
+ if (idx > lastIdx) {
5952
+ spans.push({ text: text.slice(lastIdx, idx), bold: false, italic: false, code: false });
5953
+ }
5954
+ const raw = match[0];
5955
+ if (raw.startsWith("`")) {
5956
+ spans.push({ text: raw.slice(1, -1), bold: false, italic: false, code: true });
5957
+ } else if (raw.startsWith("***") || raw.startsWith("___")) {
5958
+ spans.push({ text: raw.slice(3, -3), bold: true, italic: true, code: false });
5959
+ } else if (raw.startsWith("**") || raw.startsWith("__")) {
5960
+ spans.push({ text: raw.slice(2, -2), bold: true, italic: false, code: false });
5961
+ } else {
5962
+ spans.push({ text: raw.slice(1, -1), bold: false, italic: true, code: false });
5963
+ }
5964
+ lastIdx = idx + raw.length;
5965
+ }
5966
+ if (lastIdx < text.length) {
5967
+ spans.push({ text: text.slice(lastIdx), bold: false, italic: false, code: false });
5968
+ }
5969
+ if (spans.length === 0) {
5970
+ spans.push({ text, bold: false, italic: false, code: false });
5971
+ }
5972
+ return spans;
5973
+ }
5974
+ function spanToCharPrId(span) {
5975
+ if (span.code) return CHAR_CODE;
5976
+ if (span.bold && span.italic) return CHAR_BOLD_ITALIC;
5977
+ if (span.bold) return CHAR_BOLD;
5978
+ if (span.italic) return CHAR_ITALIC;
5979
+ return CHAR_NORMAL;
5980
+ }
5981
+ function escapeXml(text) {
5982
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
5983
+ }
5984
+ function generateRuns(text, defaultCharPr = CHAR_NORMAL) {
5985
+ const spans = parseInlineMarkdown(text);
5986
+ return spans.map((span) => {
5987
+ const charId = span.code || span.bold || span.italic ? spanToCharPrId(span) : defaultCharPr;
5988
+ return `<hp:run charPrIDRef="${charId}"><hp:t>${escapeXml(span.text)}</hp:t></hp:run>`;
5989
+ }).join("");
5990
+ }
5991
+ function generateParagraph(text, paraPrId = PARA_NORMAL, charPrId = CHAR_NORMAL) {
5992
+ if (paraPrId === PARA_CODE) {
5993
+ return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0"><hp:run charPrIDRef="${CHAR_CODE}"><hp:t>${escapeXml(text)}</hp:t></hp:run></hp:p>`;
5994
+ }
5995
+ const runs = generateRuns(text, charPrId);
5996
+ return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0">${runs}</hp:p>`;
5997
+ }
5998
+ function headingParaPrId(level) {
5999
+ if (level === 1) return PARA_H1;
6000
+ if (level === 2) return PARA_H2;
6001
+ if (level === 3) return PARA_H3;
6002
+ return PARA_H4;
6003
+ }
6004
+ function headingCharPrId(level) {
6005
+ if (level === 1) return CHAR_H1;
6006
+ if (level === 2) return CHAR_H2;
6007
+ if (level === 3) return CHAR_H3;
6008
+ return CHAR_H4;
6009
+ }
6010
+ function generateContainerXml() {
6011
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6012
+ <ocf:container xmlns:ocf="${NS_OCF}" xmlns:hpf="${NS_HPF}">
6013
+ <ocf:rootfiles>
6014
+ <ocf:rootfile full-path="Contents/content.hpf" media-type="application/hwpml-package+xml"/>
6015
+ </ocf:rootfiles>
6016
+ </ocf:container>`;
6017
+ }
6018
+ function generateManifest() {
6019
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6020
+ <opf:package xmlns:opf="${NS_OPF}" xmlns:hpf="${NS_HPF}" xmlns:hh="${NS_HEAD}">
6021
+ <opf:manifest>
6022
+ <opf:item id="header" href="Contents/header.xml" media-type="application/xml"/>
6023
+ <opf:item id="section0" href="Contents/section0.xml" media-type="application/xml"/>
6024
+ </opf:manifest>
6025
+ <opf:spine>
6026
+ <opf:itemref idref="header" linear="no"/>
6027
+ <opf:itemref idref="section0" linear="yes"/>
6028
+ </opf:spine>
6029
+ </opf:package>`;
6030
+ }
6031
+ function charPr(id, height, bold, italic, fontId = 0) {
6032
+ const boldAttr = bold ? ` bold="1"` : "";
6033
+ const italicAttr = italic ? ` italic="1"` : "";
6034
+ return ` <hh:charPr id="${id}" height="${height}" textColor="#000000" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0"${boldAttr}${italicAttr}>
6035
+ <hh:fontRef hangul="${fontId}" latin="${fontId}" hanja="${fontId}" japanese="${fontId}" other="${fontId}" symbol="${fontId}" user="${fontId}"/>
6036
+ <hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
6037
+ <hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
6038
+ <hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
6039
+ <hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
6040
+ </hh:charPr>`;
6041
+ }
6042
+ function paraPr(id, opts = {}) {
6043
+ const { align = "JUSTIFY", spaceBefore = 0, spaceAfter = 0, lineSpacing = 160, indent = 0 } = opts;
6044
+ return ` <hh:paraPr id="${id}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="1" suppressLineNumbers="0" checked="0" textDir="AUTO">
6045
+ <hh:align horizontal="${align}" vertical="BASELINE"/>
6046
+ <hh:heading type="NONE" idRef="0" level="0"/>
6047
+ <hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="BREAK_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/>
6048
+ <hh:autoSpacing eAsianEng="0" eAsianNum="0"/>
6049
+ <hh:margin indent="${indent}" left="0" right="0" prev="${spaceBefore}" next="${spaceAfter}"/>
6050
+ <hh:lineSpacing type="PERCENT" value="${lineSpacing}"/>
6051
+ <hh:border borderFillIDRef="0" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/>
6052
+ </hh:paraPr>`;
6053
+ }
6054
+ function generateHeaderXml() {
6055
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6056
+ <hh:head xmlns:hh="${NS_HEAD}" xmlns:hp="${NS_PARA}" version="1.4" secCnt="1">
6057
+ <hh:beginNum page="1" footnote="1" endnote="1" pic="1" tbl="1" equation="1"/>
6058
+ <hh:refList>
6059
+ <hh:fontfaces itemCnt="7">
6060
+ <hh:fontface lang="HANGUL" fontCnt="2">
6061
+ <hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
6062
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6063
+ </hh:font>
6064
+ <hh:font id="1" face="\uD568\uCD08\uB86C\uB3CB\uC6C0" type="TTF" isEmbedded="0">
6065
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6066
+ </hh:font>
6067
+ </hh:fontface>
6068
+ <hh:fontface lang="LATIN" fontCnt="2">
6069
+ <hh:font id="0" face="Times New Roman" type="TTF" isEmbedded="0">
6070
+ <hh:typeInfo familyType="FCAT_OLDSTYLE" weight="5" proportion="4" contrast="2" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="4"/>
6071
+ </hh:font>
6072
+ <hh:font id="1" face="Consolas" type="TTF" isEmbedded="0">
6073
+ <hh:typeInfo familyType="FCAT_MODERN" weight="5" proportion="0" contrast="0" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="0"/>
6074
+ </hh:font>
6075
+ </hh:fontface>
6076
+ <hh:fontface lang="HANJA" fontCnt="1">
6077
+ <hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
6078
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6079
+ </hh:font>
6080
+ </hh:fontface>
6081
+ <hh:fontface lang="JAPANESE" fontCnt="1">
6082
+ <hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
6083
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6084
+ </hh:font>
6085
+ </hh:fontface>
6086
+ <hh:fontface lang="OTHER" fontCnt="1">
6087
+ <hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
6088
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6089
+ </hh:font>
6090
+ </hh:fontface>
6091
+ <hh:fontface lang="SYMBOL" fontCnt="1">
6092
+ <hh:font id="0" face="Symbol" type="TTF" isEmbedded="0">
6093
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6094
+ </hh:font>
6095
+ </hh:fontface>
6096
+ <hh:fontface lang="USER" fontCnt="1">
6097
+ <hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
6098
+ <hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
6099
+ </hh:font>
6100
+ </hh:fontface>
6101
+ </hh:fontfaces>
6102
+ <hh:borderFills itemCnt="1">
6103
+ <hh:borderFill id="0" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
6104
+ <hh:slash type="NONE" Crooked="0" isCounter="0"/>
6105
+ <hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
6106
+ <hh:leftBorder type="NONE" width="0.1mm" color="#000000"/>
6107
+ <hh:rightBorder type="NONE" width="0.1mm" color="#000000"/>
6108
+ <hh:topBorder type="NONE" width="0.1mm" color="#000000"/>
6109
+ <hh:bottomBorder type="NONE" width="0.1mm" color="#000000"/>
6110
+ <hh:diagonal type="NONE" width="0.1mm" color="#000000"/>
6111
+ <hh:fillInfo/>
6112
+ </hh:borderFill>
6113
+ </hh:borderFills>
6114
+ <hh:charProperties itemCnt="9">
6115
+ ${charPr(0, 1e3, false, false)}
6116
+ ${charPr(1, 1e3, true, false)}
6117
+ ${charPr(2, 1e3, false, true)}
6118
+ ${charPr(3, 1e3, true, true)}
6119
+ ${charPr(4, 900, false, false, 1)}
6120
+ ${charPr(5, 1800, true, false, 1)}
6121
+ ${charPr(6, 1400, true, false, 1)}
6122
+ ${charPr(7, 1200, true, false, 1)}
6123
+ ${charPr(8, 1100, true, false, 1)}
6124
+ </hh:charProperties>
6125
+ <hh:tabProperties itemCnt="0"/>
6126
+ <hh:numberings itemCnt="0"/>
6127
+ <hh:bullets itemCnt="0"/>
6128
+ <hh:paraProperties itemCnt="8">
6129
+ ${paraPr(0)}
6130
+ ${paraPr(1, { align: "LEFT", spaceBefore: 800, spaceAfter: 200, lineSpacing: 180 })}
6131
+ ${paraPr(2, { align: "LEFT", spaceBefore: 600, spaceAfter: 150, lineSpacing: 170 })}
6132
+ ${paraPr(3, { align: "LEFT", spaceBefore: 400, spaceAfter: 100, lineSpacing: 160 })}
6133
+ ${paraPr(4, { align: "LEFT", spaceBefore: 300, spaceAfter: 100, lineSpacing: 160 })}
6134
+ ${paraPr(5, { align: "LEFT", lineSpacing: 130, indent: 400 })}
6135
+ ${paraPr(6, { align: "LEFT", lineSpacing: 150, indent: 600 })}
6136
+ ${paraPr(7, { align: "LEFT", lineSpacing: 160, indent: 600 })}
6137
+ </hh:paraProperties>
6138
+ <hh:styles itemCnt="1">
6139
+ <hh:style id="0" type="PARA" name="\uBC14\uD0D5\uAE00" engName="Normal" paraPrIDRef="0" charPrIDRef="0" nextStyleIDRef="0" langIDRef="1042" lockForm="0"/>
6140
+ </hh:styles>
6141
+ </hh:refList>
6142
+ <hh:compatibleDocument targetProgram="HWP2018"/>
6143
+ </hh:head>`;
6144
+ }
6145
+ function generateSecPr() {
6146
+ return `<hp:secPr textDirection="HORIZONTAL" spaceColumns="1134" tabStop="8000" outlineShapeIDRef="0" memoShapeIDRef="0" textVerticalWidthHead="0" masterPageCnt="0"><hp:grid lineGrid="0" charGrid="0" wonggojiFormat="0"/><hp:startNum pageStartsOn="BOTH" page="0" pic="0" tbl="0" equation="0"/><hp:visibility hideFirstHeader="0" hideFirstFooter="0" hideFirstMasterPage="0" border="SHOW_ALL" fill="SHOW_ALL" hideFirstPageNum="0" hideFirstEmptyLine="0" showLineNumber="0"/><hp:pagePr landscape="WIDELY" width="59528" height="84188" gutterType="LEFT_ONLY"><hp:margin header="2835" footer="2835" gutter="0" left="5670" right="4252" top="8504" bottom="4252"/></hp:pagePr><hp:footNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="-1" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="283" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="EACH_COLUMN" beneathText="0"/></hp:footNotePr><hp:endNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="14692344" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="0" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="END_OF_DOCUMENT" beneathText="0"/></hp:endNotePr></hp:secPr>`;
6147
+ }
6148
+ function generateTable(rows) {
6149
+ const trElements = rows.map((row) => {
6150
+ const tdElements = row.map((cell) => {
6151
+ const runs = generateRuns(cell);
6152
+ return `<hp:tc><hp:cellSpan colSpan="1" rowSpan="1"/><hp:p paraPrIDRef="0" styleIDRef="0">${runs}</hp:p></hp:tc>`;
6153
+ }).join("");
6154
+ return `<hp:tr>${tdElements}</hp:tr>`;
6155
+ }).join("");
6156
+ return `<hp:tbl>${trElements}</hp:tbl>`;
6157
+ }
6158
+ function blocksToSectionXml(blocks) {
6159
+ const paraXmls = [];
6160
+ let isFirst = true;
6161
+ for (const block of blocks) {
6162
+ let xml = "";
6163
+ switch (block.type) {
6164
+ case "heading": {
6165
+ const pId = headingParaPrId(block.level || 1);
6166
+ const cId = headingCharPrId(block.level || 1);
6167
+ xml = generateParagraph(block.text || "", pId, cId);
6168
+ break;
6169
+ }
6170
+ case "paragraph":
6171
+ xml = generateParagraph(block.text || "");
6172
+ break;
6173
+ case "code_block": {
6174
+ const codeLines = (block.text || "").split("\n");
6175
+ xml = codeLines.map((line) => generateParagraph(line || " ", PARA_CODE)).join("\n ");
6176
+ break;
6177
+ }
6178
+ case "blockquote":
6179
+ xml = generateParagraph(block.text || "", PARA_QUOTE);
6180
+ break;
6181
+ case "list_item": {
6182
+ const marker = block.ordered ? `${(block.indent || 0) + 1}. ` : "\xB7 ";
6183
+ const indentPrefix = " ".repeat(block.indent || 0);
6184
+ xml = generateParagraph(indentPrefix + marker + (block.text || ""), PARA_LIST);
6185
+ break;
6186
+ }
6187
+ case "hr":
6188
+ xml = `<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0"><hp:t>\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500</hp:t></hp:run></hp:p>`;
6189
+ break;
6190
+ case "table":
6191
+ if (block.rows) {
6192
+ if (isFirst) {
6193
+ const secRun = `<hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run>`;
6194
+ paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0">${secRun}</hp:p>`);
6195
+ isFirst = false;
6196
+ }
6197
+ xml = generateTable(block.rows);
6198
+ }
6199
+ break;
6200
+ }
6201
+ if (!xml) continue;
6202
+ if (isFirst && block.type !== "table") {
6203
+ xml = xml.replace(
6204
+ /<hp:run charPrIDRef="(\d+)">/,
6205
+ `<hp:run charPrIDRef="$1">${generateSecPr()}`
6206
+ );
6207
+ isFirst = false;
6208
+ }
6209
+ paraXmls.push(xml);
6210
+ }
6211
+ if (paraXmls.length === 0) {
6212
+ paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run></hp:p>`);
6213
+ }
6214
+ return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
6215
+ <hs:sec xmlns:hs="${NS_SECTION}" xmlns:hp="${NS_PARA}">
6216
+ ${paraXmls.join("\n ")}
6217
+ </hs:sec>`;
6218
+ }
6219
+
5154
6220
  // src/index.ts
5155
6221
  import { readFile } from "fs/promises";
5156
6222
 
5157
6223
  // src/xlsx/parser.ts
5158
- import JSZip2 from "jszip";
5159
- import { DOMParser as DOMParser2 } from "@xmldom/xmldom";
6224
+ import JSZip4 from "jszip";
6225
+ import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
5160
6226
  var MAX_SHEETS = 100;
5161
6227
  var MAX_DECOMPRESS_SIZE3 = 100 * 1024 * 1024;
5162
6228
  var MAX_ROWS2 = 1e4;
@@ -5193,7 +6259,7 @@ function getTextContent(el) {
5193
6259
  return el.textContent?.trim() ?? "";
5194
6260
  }
5195
6261
  function parseXml(text) {
5196
- return new DOMParser2().parseFromString(stripDtd(text), "text/xml");
6262
+ return new DOMParser3().parseFromString(stripDtd(text), "text/xml");
5197
6263
  }
5198
6264
  function parseSharedStrings(xml) {
5199
6265
  const doc = parseXml(xml);
@@ -5346,7 +6412,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
5346
6412
  }
5347
6413
  async function parseXlsxDocument(buffer, options) {
5348
6414
  precheckZipSize(buffer, MAX_DECOMPRESS_SIZE3);
5349
- const zip = await JSZip2.loadAsync(buffer);
6415
+ const zip = await JSZip4.loadAsync(buffer);
5350
6416
  const warnings = [];
5351
6417
  const workbookFile = zip.file("xl/workbook.xml");
5352
6418
  if (!workbookFile) {
@@ -5436,24 +6502,24 @@ async function parseXlsxDocument(buffer, options) {
5436
6502
  }
5437
6503
 
5438
6504
  // src/docx/parser.ts
5439
- import JSZip3 from "jszip";
5440
- import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
6505
+ import JSZip5 from "jszip";
6506
+ import { DOMParser as DOMParser4 } from "@xmldom/xmldom";
5441
6507
  var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
5442
- function getChildElements(parent, localName) {
6508
+ function getChildElements(parent, localName2) {
5443
6509
  const result = [];
5444
6510
  const children = parent.childNodes;
5445
6511
  for (let i = 0; i < children.length; i++) {
5446
6512
  const node = children[i];
5447
6513
  if (node.nodeType === 1) {
5448
6514
  const el = node;
5449
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
6515
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5450
6516
  result.push(el);
5451
6517
  }
5452
6518
  }
5453
6519
  }
5454
6520
  return result;
5455
6521
  }
5456
- function findElements(parent, localName) {
6522
+ function findElements(parent, localName2) {
5457
6523
  const result = [];
5458
6524
  const walk = (node) => {
5459
6525
  const children = node.childNodes;
@@ -5461,7 +6527,7 @@ function findElements(parent, localName) {
5461
6527
  const child = children[i];
5462
6528
  if (child.nodeType === 1) {
5463
6529
  const el = child;
5464
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
6530
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5465
6531
  result.push(el);
5466
6532
  }
5467
6533
  walk(el);
@@ -5471,16 +6537,16 @@ function findElements(parent, localName) {
5471
6537
  walk(parent);
5472
6538
  return result;
5473
6539
  }
5474
- function getAttr(el, localName) {
6540
+ function getAttr(el, localName2) {
5475
6541
  const attrs = el.attributes;
5476
6542
  for (let i = 0; i < attrs.length; i++) {
5477
6543
  const attr = attrs[i];
5478
- if (attr.localName === localName || attr.name === localName) return attr.value;
6544
+ if (attr.localName === localName2 || attr.name === localName2) return attr.value;
5479
6545
  }
5480
6546
  return null;
5481
6547
  }
5482
6548
  function parseXml2(text) {
5483
- return new DOMParser3().parseFromString(stripDtd(text), "text/xml");
6549
+ return new DOMParser4().parseFromString(stripDtd(text), "text/xml");
5484
6550
  }
5485
6551
  function parseStyles(xml) {
5486
6552
  const doc = parseXml2(xml);
@@ -5774,7 +6840,7 @@ async function extractImages(zip, rels, doc) {
5774
6840
  }
5775
6841
  async function parseDocxDocument(buffer, options) {
5776
6842
  precheckZipSize(buffer, MAX_DECOMPRESS_SIZE4);
5777
- const zip = await JSZip3.loadAsync(buffer);
6843
+ const zip = await JSZip5.loadAsync(buffer);
5778
6844
  const warnings = [];
5779
6845
  const docFile = zip.file("word/document.xml");
5780
6846
  if (!docFile) {
@@ -5822,11 +6888,11 @@ async function parseDocxDocument(buffer, options) {
5822
6888
  const node = children[i];
5823
6889
  if (node.nodeType !== 1) continue;
5824
6890
  const el = node;
5825
- const localName = el.localName ?? el.tagName?.split(":").pop();
5826
- if (localName === "p") {
6891
+ const localName2 = el.localName ?? el.tagName?.split(":").pop();
6892
+ if (localName2 === "p") {
5827
6893
  const block = parseParagraph(el, styles, numbering, footnotes, rels);
5828
6894
  if (block) blocks.push(block);
5829
- } else if (localName === "tbl") {
6895
+ } else if (localName2 === "tbl") {
5830
6896
  const block = parseTable(el, styles, numbering, footnotes, rels);
5831
6897
  if (block) blocks.push(block);
5832
6898
  }
@@ -5864,135 +6930,6 @@ async function parseDocxDocument(buffer, options) {
5864
6930
  };
5865
6931
  }
5866
6932
 
5867
- // src/form/recognize.ts
5868
- var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
5869
- "\uC131\uBA85",
5870
- "\uC774\uB984",
5871
- "\uC8FC\uC18C",
5872
- "\uC804\uD654",
5873
- "\uC804\uD654\uBC88\uD638",
5874
- "\uD734\uB300\uD3F0",
5875
- "\uD578\uB4DC\uD3F0",
5876
- "\uC5F0\uB77D\uCC98",
5877
- "\uC0DD\uB144\uC6D4\uC77C",
5878
- "\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
5879
- "\uC18C\uC18D",
5880
- "\uC9C1\uC704",
5881
- "\uC9C1\uAE09",
5882
- "\uBD80\uC11C",
5883
- "\uC774\uBA54\uC77C",
5884
- "\uD329\uC2A4",
5885
- "\uD559\uAD50",
5886
- "\uD559\uB144",
5887
- "\uBC18",
5888
- "\uBC88\uD638",
5889
- "\uC2E0\uCCAD\uC778",
5890
- "\uB300\uD45C\uC790",
5891
- "\uB2F4\uB2F9\uC790",
5892
- "\uC791\uC131\uC790",
5893
- "\uD655\uC778\uC790",
5894
- "\uC2B9\uC778\uC790",
5895
- "\uC77C\uC2DC",
5896
- "\uB0A0\uC9DC",
5897
- "\uAE30\uAC04",
5898
- "\uC7A5\uC18C",
5899
- "\uBAA9\uC801",
5900
- "\uC0AC\uC720",
5901
- "\uBE44\uACE0",
5902
- "\uAE08\uC561",
5903
- "\uC218\uB7C9",
5904
- "\uB2E8\uAC00",
5905
- "\uD569\uACC4",
5906
- "\uACC4",
5907
- "\uC18C\uACC4"
5908
- ]);
5909
- function isLabelCell(text) {
5910
- const trimmed = text.trim();
5911
- if (!trimmed || trimmed.length > 30) return false;
5912
- for (const kw of LABEL_KEYWORDS) {
5913
- if (trimmed.includes(kw)) return true;
5914
- }
5915
- if (/^[가-힣\s()·:]{2,8}$/.test(trimmed) && !/\d/.test(trimmed)) return true;
5916
- if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
5917
- return false;
5918
- }
5919
- function extractFormFields(blocks) {
5920
- const fields = [];
5921
- let totalTables = 0;
5922
- let formTables = 0;
5923
- for (const block of blocks) {
5924
- if (block.type !== "table" || !block.table) continue;
5925
- totalTables++;
5926
- const tableFields = extractFromTable(block.table);
5927
- if (tableFields.length > 0) {
5928
- formTables++;
5929
- fields.push(...tableFields);
5930
- }
5931
- }
5932
- for (const block of blocks) {
5933
- if (block.type === "paragraph" && block.text) {
5934
- const inlineFields = extractInlineFields(block.text);
5935
- fields.push(...inlineFields);
5936
- }
5937
- }
5938
- const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
5939
- return { fields, confidence: Math.min(confidence, 1) };
5940
- }
5941
- function extractFromTable(table) {
5942
- const fields = [];
5943
- if (table.cols >= 2) {
5944
- for (let r = 0; r < table.rows; r++) {
5945
- for (let c = 0; c < table.cols - 1; c++) {
5946
- const labelCell = table.cells[r][c];
5947
- const valueCell = table.cells[r][c + 1];
5948
- if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
5949
- fields.push({
5950
- label: labelCell.text.trim().replace(/[::]\s*$/, ""),
5951
- value: valueCell.text.trim(),
5952
- row: r,
5953
- col: c
5954
- });
5955
- }
5956
- }
5957
- }
5958
- }
5959
- if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
5960
- const headerRow = table.cells[0];
5961
- const allLabels = headerRow.every((cell) => {
5962
- const t = cell.text.trim();
5963
- return t.length > 0 && t.length <= 20;
5964
- });
5965
- if (allLabels) {
5966
- for (let r = 1; r < table.rows; r++) {
5967
- for (let c = 0; c < table.cols; c++) {
5968
- const label = headerRow[c].text.trim();
5969
- const value = table.cells[r][c].text.trim();
5970
- if (label && value) {
5971
- fields.push({ label, value, row: r, col: c });
5972
- }
5973
- }
5974
- }
5975
- }
5976
- }
5977
- return fields;
5978
- }
5979
- function extractInlineFields(text) {
5980
- const fields = [];
5981
- const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
5982
- let match;
5983
- while ((match = pattern.exec(text)) !== null) {
5984
- const label = match[1].trim();
5985
- const value = match[2].trim();
5986
- if (value) {
5987
- fields.push({ label, value, row: -1, col: -1 });
5988
- }
5989
- }
5990
- return fields;
5991
- }
5992
-
5993
- // src/hwpx/generator.ts
5994
- import JSZip4 from "jszip";
5995
-
5996
6933
  // src/index.ts
5997
6934
  async function parse(input, options) {
5998
6935
  let buffer;
@@ -6256,8 +7193,11 @@ export {
6256
7193
  extractHwpxMetadataOnly,
6257
7194
  extractHwp5MetadataOnly,
6258
7195
  extractPdfMetadataOnly,
6259
- compare,
6260
7196
  extractFormFields,
7197
+ fillFormFields,
7198
+ fillHwpx,
7199
+ markdownToHwpx,
7200
+ compare,
6261
7201
  parse
6262
7202
  };
6263
- //# sourceMappingURL=chunk-R34CFFNV.js.map
7203
+ //# sourceMappingURL=chunk-SY2RFVLW.js.map