hwpkit-dev 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +1 -0
- package/dist/index.d.mts +34 -3
- package/dist/index.d.ts +30 -3
- package/dist/index.js +2138 -245
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +2134 -245
- package/dist/index.mjs.map +1 -1
- package/hwp-analyze.ts +90 -0
- package/inspect-doc.ts +57 -0
- package/output_test.hwp +0 -0
- package/package.json +3 -1
- package/src/decoders/docx/DocxDecoder.ts +155 -30
- package/src/decoders/hwp/HwpScanner.ts +258 -37
- package/src/decoders/hwpx/HwpxDecoder.ts +9 -1
- package/src/encoders/docx/DocxEncoder.ts +199 -158
- package/src/encoders/html/HtmlEncoder.ts +205 -0
- package/src/encoders/hwp/HwpEncoder.ts +864 -222
- package/src/encoders/hwpx/HwpxEncoder.ts +119 -59
- package/src/encoders/md/MdEncoder.ts +98 -16
- package/src/index.ts +1 -0
- package/src/model/builders.ts +4 -2
- package/src/model/doc-tree.ts +1 -1
- package/src/pipeline/Pipeline.ts +14 -1
- package/src/safety/StyleBridge.ts +1 -1
- package/test-docx-to-hwp.ts +45 -0
|
@@ -88,6 +88,7 @@ interface HwpxCtx {
|
|
|
88
88
|
fonts: string[];
|
|
89
89
|
fontMap: Map<string, number>;
|
|
90
90
|
imgMap: WeakMap<ImgNode, string>; // ImgNode → binId (no mutation)
|
|
91
|
+
nextZOrder: number; // monotonically increasing z-order for images/objects
|
|
91
92
|
}
|
|
92
93
|
|
|
93
94
|
function charPrKey(p: TextProps): string {
|
|
@@ -229,7 +230,37 @@ function addBorderFill(
|
|
|
229
230
|
fill = `<hc:fillBrush><hc:winBrush faceColor="${bc}" hatchColor="none" alpha="0"/></hc:fillBrush>`;
|
|
230
231
|
}
|
|
231
232
|
|
|
232
|
-
const xml = `<hh:borderFill id="${id}" threeD="0" shadow="0" centerLine="NONE" breakCellSeparateLine="0"><hh:slash type="NONE" Crooked="0" isCounter="0"/><hh:backSlash type="NONE" Crooked="0" isCounter="0"/><hh:leftBorder type="${type}" width="${w}" color="${c}"/><hh:rightBorder type="${type}" width="${w}" color="${c}"/><hh:topBorder type="${type}" width="${w}" color="${c}"/><hh:bottomBorder type="${type}" width="${w}" color="${c}"/><hh:diagonal type="
|
|
233
|
+
const xml = `<hh:borderFill id="${id}" threeD="0" shadow="0" centerLine="NONE" breakCellSeparateLine="0"><hh:slash type="NONE" Crooked="0" isCounter="0"/><hh:backSlash type="NONE" Crooked="0" isCounter="0"/><hh:leftBorder type="${type}" width="${w}" color="${c}"/><hh:rightBorder type="${type}" width="${w}" color="${c}"/><hh:topBorder type="${type}" width="${w}" color="${c}"/><hh:bottomBorder type="${type}" width="${w}" color="${c}"/><hh:diagonal type="NONE" width="0.12 mm" color="#000000"/>${fill}</hh:borderFill>`;
|
|
234
|
+
ctx.borderFills.push({ id, xml });
|
|
235
|
+
return id;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function addBorderFillPerSide(
|
|
239
|
+
ctx: HwpxCtx,
|
|
240
|
+
top?: Stroke,
|
|
241
|
+
right?: Stroke,
|
|
242
|
+
bottom?: Stroke,
|
|
243
|
+
left?: Stroke,
|
|
244
|
+
bgColor?: string,
|
|
245
|
+
): number {
|
|
246
|
+
const id = ctx.borderFills.length + 1;
|
|
247
|
+
const kindMap: Record<string, string> = {
|
|
248
|
+
solid: "SOLID", dash: "DASH", dot: "DOT", double: "DOUBLE", none: "NONE",
|
|
249
|
+
};
|
|
250
|
+
function sideXml(tag: string, s?: Stroke): string {
|
|
251
|
+
const type = s ? (kindMap[s.kind] ?? "SOLID") : "NONE";
|
|
252
|
+
const w = s ? `${(s.pt * 0.3528).toFixed(2)} mm` : "0.12 mm";
|
|
253
|
+
const c = s ? (s.color.startsWith("#") ? s.color : `#${s.color}`) : "#000000";
|
|
254
|
+
return `<hh:${tag} type="${type}" width="${w}" color="${c}"/>`;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
let fill = "";
|
|
258
|
+
if (bgColor) {
|
|
259
|
+
const bc = bgColor.startsWith("#") ? bgColor : `#${bgColor}`;
|
|
260
|
+
fill = `<hc:fillBrush><hc:winBrush faceColor="${bc}" hatchColor="none" alpha="0"/></hc:fillBrush>`;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const xml = `<hh:borderFill id="${id}" threeD="0" shadow="0" centerLine="NONE" breakCellSeparateLine="0"><hh:slash type="NONE" Crooked="0" isCounter="0"/><hh:backSlash type="NONE" Crooked="0" isCounter="0"/>${sideXml("leftBorder", left)}${sideXml("rightBorder", right)}${sideXml("topBorder", top)}${sideXml("bottomBorder", bottom)}<hh:diagonal type="NONE" width="0.12 mm" color="#000000"/>${fill}</hh:borderFill>`;
|
|
233
264
|
ctx.borderFills.push({ id, xml });
|
|
234
265
|
return id;
|
|
235
266
|
}
|
|
@@ -263,6 +294,7 @@ export class HwpxEncoder implements Encoder {
|
|
|
263
294
|
fonts: [],
|
|
264
295
|
fontMap: new Map(),
|
|
265
296
|
imgMap: new WeakMap(),
|
|
297
|
+
nextZOrder: 0,
|
|
266
298
|
};
|
|
267
299
|
|
|
268
300
|
// Default borderFill (id=1, no border)
|
|
@@ -615,112 +647,131 @@ function encodeImage(img: ImgNode, ctx: HwpxCtx): string {
|
|
|
615
647
|
const vertOffset = layout?.yPt != null ? Metric.ptToHwp(layout.yPt) : 0;
|
|
616
648
|
|
|
617
649
|
// hp:pic children must follow the exact HWPX spec order.
|
|
618
|
-
|
|
650
|
+
const zOrder = ctx.nextZOrder++;
|
|
651
|
+
return `<hp:run charPrIDRef="${charPrId}"><hp:pic id="${ctx.nextElementId++}" zOrder="${zOrder}" numberingType="PICTURE" textWrap="${textWrap}" textFlow="${textFlow}" lock="0" dropcapstyle="None" href="" groupLevel="0" instid="0" reverse="0"><hp:offset x="0" y="0"/><hp:orgSz width="${w}" height="${h}"/><hp:curSz width="${w}" height="${h}"/><hp:flip horizontal="0" vertical="0"/><hp:rotationInfo angle="0" centerX="${cx}" centerY="${cy}" rotateimage="1"/><hp:renderingInfo><hc:transMatrix e1="1" e2="0" e3="0" e4="0" e5="1" e6="0"/><hc:scaMatrix e1="1" e2="0" e3="0" e4="0" e5="1" e6="0"/><hc:rotMatrix e1="1" e2="0" e3="0" e4="0" e5="1" e6="0"/></hp:renderingInfo><hp:imgRect><hc:pt0 x="0" y="0"/><hc:pt1 x="${w}" y="0"/><hc:pt2 x="${w}" y="${h}"/><hc:pt3 x="0" y="${h}"/></hp:imgRect><hp:imgClip left="0" right="0" top="0" bottom="0"/><hp:inMargin left="0" right="0" top="0" bottom="0"/><hp:imgDim dimwidth="${w}" dimheight="${h}"/><hc:img binaryItemIDRef="${binId}" bright="0" contrast="0" effect="REAL_PIC" alpha="0"/><hp:effects/><hp:sz width="${w}" widthRelTo="ABSOLUTE" height="${h}" heightRelTo="ABSOLUTE" protect="0"/><hp:pos treatAsChar="${treatAsChar}" affectLSpacing="0" flowWithText="${flowWithText}" allowOverlap="${allowOverlap}" holdAnchorAndSO="0" vertRelTo="${vertRelTo}" horzRelTo="${horzRelTo}" vertAlign="${vertAlign}" horzAlign="${horzAlign}" vertOffset="${vertOffset}" horzOffset="${horzOffset}"/><hp:outMargin left="0" right="0" top="0" bottom="0"/></hp:pic><hp:t></hp:t></hp:run>`;
|
|
619
652
|
}
|
|
620
653
|
|
|
621
654
|
function encodeGrid(grid: GridNode, ctx: HwpxCtx): string {
|
|
622
655
|
const rowCount = grid.kids.length;
|
|
623
656
|
|
|
624
|
-
//
|
|
657
|
+
// 1단계: 가상 2D 맵핑 (Virtual Table Map) 생성
|
|
658
|
+
interface CellMap {
|
|
659
|
+
type: 'real' | 'absorbed';
|
|
660
|
+
cell?: CellNode;
|
|
661
|
+
}
|
|
662
|
+
const tableMap: CellMap[][] = Array.from({ length: rowCount }, () => []);
|
|
663
|
+
|
|
664
|
+
for (let ri = 0; ri < rowCount; ri++) {
|
|
665
|
+
let ci = 0;
|
|
666
|
+
for (const cell of grid.kids[ri].kids) {
|
|
667
|
+
while (tableMap[ri][ci]) ci++; // 이미 점유된 자리 건너뜀
|
|
668
|
+
|
|
669
|
+
tableMap[ri][ci] = { type: 'real', cell };
|
|
670
|
+
|
|
671
|
+
// 병합 영역 예약
|
|
672
|
+
for (let rr = 0; rr < cell.rs; rr++) {
|
|
673
|
+
const targetRi = ri + rr;
|
|
674
|
+
if (targetRi >= rowCount) break;
|
|
675
|
+
if (!tableMap[targetRi]) tableMap[targetRi] = [];
|
|
676
|
+
for (let cc = 0; cc < cell.cs; cc++) {
|
|
677
|
+
if (rr === 0 && cc === 0) continue;
|
|
678
|
+
tableMap[targetRi][ci + cc] = { type: 'absorbed' };
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
ci += cell.cs;
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// 정확한 전체 열 개수 계산
|
|
625
686
|
let colCount = 0;
|
|
626
|
-
for (
|
|
627
|
-
|
|
628
|
-
for (const cell of row.kids) rowCols += cell.cs;
|
|
629
|
-
if (rowCols > colCount) colCount = rowCols;
|
|
687
|
+
for (let ri = 0; ri < rowCount; ri++) {
|
|
688
|
+
colCount = Math.max(colCount, tableMap[ri].length);
|
|
630
689
|
}
|
|
631
|
-
if (colCount === 0) colCount =
|
|
690
|
+
if (colCount === 0) colCount = 1;
|
|
632
691
|
|
|
633
|
-
//
|
|
692
|
+
// 2단계: 컬럼 너비 계산
|
|
634
693
|
const totalWidth = ctx.availableWidth;
|
|
635
|
-
const defaultColW = Math.round(totalWidth /
|
|
694
|
+
const defaultColW = Math.round(totalWidth / colCount);
|
|
636
695
|
const colWidths: number[] = [];
|
|
637
696
|
if (grid.props.colWidths && grid.props.colWidths.length === colCount) {
|
|
638
|
-
// Fill zero-width columns by distributing remaining space
|
|
639
697
|
const srcPt = [...grid.props.colWidths];
|
|
640
698
|
const knownTotal = srcPt.filter((w) => w > 0).reduce((s, w) => s + w, 0);
|
|
641
699
|
const zeroCount = srcPt.filter((w) => w <= 0).length;
|
|
642
|
-
const
|
|
700
|
+
const availPt = Metric.hwpToPt(totalWidth);
|
|
701
|
+
const remaining = Math.max(0, availPt - knownTotal);
|
|
643
702
|
const zeroFill = zeroCount > 0 ? remaining / zeroCount : 0;
|
|
644
703
|
for (let i = 0; i < srcPt.length; i++) {
|
|
645
|
-
if (srcPt[i] <= 0)
|
|
646
|
-
srcPt[i] = zeroFill > 0 ? zeroFill : Metric.hwpToPt(defaultColW);
|
|
704
|
+
if (srcPt[i] <= 0) srcPt[i] = zeroFill > 0 ? zeroFill : Metric.hwpToPt(defaultColW);
|
|
647
705
|
}
|
|
648
706
|
for (const wPt of srcPt) colWidths.push(Metric.ptToHwp(wPt));
|
|
649
707
|
} else {
|
|
650
708
|
for (let c = 0; c < colCount; c++) colWidths.push(defaultColW);
|
|
651
709
|
}
|
|
652
|
-
|
|
710
|
+
|
|
653
711
|
const rawTotal = colWidths.reduce((s, w) => s + w, 0);
|
|
654
712
|
if (rawTotal > totalWidth * 1.05) {
|
|
655
713
|
const scale = totalWidth / rawTotal;
|
|
656
|
-
for (let i = 0; i < colWidths.length; i++)
|
|
657
|
-
colWidths[i] = Math.round(colWidths[i] * scale);
|
|
714
|
+
for (let i = 0; i < colWidths.length; i++) colWidths[i] = Math.round(colWidths[i] * scale);
|
|
658
715
|
}
|
|
659
716
|
const actualTotal = colWidths.reduce((s, w) => s + w, 0);
|
|
660
717
|
|
|
661
|
-
//
|
|
662
|
-
const tblBfId = grid.props.defaultStroke
|
|
663
|
-
? addBorderFill(ctx, grid.props.defaultStroke)
|
|
664
|
-
: 2; // default table border
|
|
665
|
-
|
|
666
|
-
// Pre-calculate row heights (max cell height per row)
|
|
718
|
+
// 3단계: 행 높이 계산
|
|
667
719
|
const rowHeights: number[] = [];
|
|
668
|
-
for (
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
720
|
+
for (let ri = 0; ri < rowCount; ri++) {
|
|
721
|
+
const row = grid.kids[ri];
|
|
722
|
+
if (row.heightPt != null && row.heightPt > 0) {
|
|
723
|
+
rowHeights.push(Metric.ptToHwp(row.heightPt));
|
|
724
|
+
} else {
|
|
725
|
+
let maxH = 0;
|
|
726
|
+
for (let ci = 0; ci < colCount; ci++) {
|
|
727
|
+
const entry = tableMap[ri][ci];
|
|
728
|
+
if (entry?.type === 'real') {
|
|
729
|
+
const h = estimateCellHeight(entry.cell!, ctx);
|
|
730
|
+
if (h > maxH) maxH = h;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
rowHeights.push(maxH || Math.round(1000 * 1.6));
|
|
673
734
|
}
|
|
674
|
-
rowHeights.push(maxH);
|
|
675
735
|
}
|
|
676
736
|
const totalTableHeight = rowHeights.reduce((s, h) => s + h, 0);
|
|
677
737
|
|
|
678
|
-
//
|
|
738
|
+
// 4단계: XML 조립
|
|
739
|
+
const tblBfId = grid.props.defaultStroke ? addBorderFill(ctx, grid.props.defaultStroke) : 2;
|
|
679
740
|
let rowsXml = "";
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
const rowH = rowHeights[ri];
|
|
741
|
+
|
|
742
|
+
for (let ri = 0; ri < rowCount; ri++) {
|
|
683
743
|
let cellsXml = "";
|
|
684
|
-
let
|
|
685
|
-
|
|
686
|
-
|
|
744
|
+
for (let ci = 0; ci < colCount; ci++) {
|
|
745
|
+
const entry = tableMap[ri][ci];
|
|
746
|
+
if (!entry || entry.type === 'absorbed') continue;
|
|
687
747
|
|
|
688
|
-
|
|
748
|
+
const cell = entry.cell!;
|
|
749
|
+
const cp = cell.props;
|
|
689
750
|
let cellBfId = tblBfId;
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
751
|
+
|
|
752
|
+
const hasPerSideBorder = cp.top || cp.bot || cp.left || cp.right;
|
|
753
|
+
if (hasPerSideBorder || cp.bg) {
|
|
754
|
+
const defStroke = grid.props.defaultStroke ?? DEFAULT_STROKE;
|
|
755
|
+
cellBfId = hasPerSideBorder
|
|
756
|
+
? addBorderFillPerSide(ctx, cp.top ?? defStroke, cp.right ?? defStroke, cp.bot ?? defStroke, cp.left ?? defStroke, cp.bg)
|
|
757
|
+
: addBorderFill(ctx, defStroke, cp.bg);
|
|
696
758
|
}
|
|
697
759
|
|
|
698
|
-
// Calculate cell width from column widths
|
|
699
760
|
let cellW = 0;
|
|
700
|
-
for (
|
|
701
|
-
let sc = colIdx;
|
|
702
|
-
sc < colIdx + cell.cs && sc < colWidths.length;
|
|
703
|
-
sc++
|
|
704
|
-
)
|
|
705
|
-
cellW += colWidths[sc];
|
|
761
|
+
for (let sc = ci; sc < ci + cell.cs && sc < colWidths.length; sc++) cellW += colWidths[sc];
|
|
706
762
|
if (cellW === 0) cellW = defaultColW * cell.cs;
|
|
707
763
|
|
|
708
|
-
// Cell inner width for lineseg (subtract left + right cell margins)
|
|
709
764
|
const cellInnerW = Math.max(cellW - 282, 100);
|
|
765
|
+
const parasXml = cell.kids.map((p) => encodePara(p, ctx, "", cellInnerW)).join("");
|
|
710
766
|
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
.join("");
|
|
715
|
-
|
|
716
|
-
cellsXml += `<hp:tc name="" header="0" hasMargin="1" protect="0" editable="0" dirty="0" borderFillIDRef="${cellBfId}"><hp:subList id="" textDirection="HORIZONTAL" lineWrap="BREAK" vertAlign="${cell.props.va === "mid" ? "CENTER" : cell.props.va === "bot" ? "BOTTOM" : "TOP"}" linkListIDRef="0" linkListNextIDRef="0" textWidth="0" textHeight="0" hasTextRef="0" hasNumRef="0">${parasXml}</hp:subList><hp:cellAddr colAddr="${colIdx}" rowAddr="${ri}"/><hp:cellSpan colSpan="${cell.cs}" rowSpan="${cell.rs}"/><hp:cellSz width="${cellW}" height="${rowH}"/><hp:cellMargin left="141" right="141" top="141" bottom="141"/></hp:tc>`;
|
|
717
|
-
colIdx += cell.cs;
|
|
767
|
+
cellsXml += `<hp:tc name="" header="0" hasMargin="1" protect="0" editable="0" dirty="0" borderFillIDRef="${cellBfId}">` +
|
|
768
|
+
`<hp:subList id="" textDirection="HORIZONTAL" lineWrap="BREAK" vertAlign="${cp.va === "mid" ? "CENTER" : cp.va === "bot" ? "BOTTOM" : "TOP"}" linkListIDRef="0" linkListNextIDRef="0" textWidth="0" textHeight="0" hasTextRef="0" hasNumRef="0">${parasXml}</hp:subList>` +
|
|
769
|
+
`<hp:cellAddr colAddr="${ci}" rowAddr="${ri}"/><hp:cellSpan colSpan="${cell.cs}" rowSpan="${cell.rs}"/><hp:cellSz width="${cellW}" height="${rowHeights[ri]}"/><hp:cellMargin left="141" right="141" top="141" bottom="141"/></hp:tc>`;
|
|
718
770
|
}
|
|
719
771
|
rowsXml += `<hp:tr>${cellsXml}</hp:tr>`;
|
|
720
772
|
}
|
|
721
773
|
|
|
722
774
|
const headerRow = grid.props.headerRow ? ' repeatHeader="1"' : "";
|
|
723
|
-
|
|
724
775
|
return `<hp:tbl id="${ctx.nextElementId++}" zOrder="0" numberingType="TABLE" textWrap="TOP_AND_BOTTOM" textFlow="BOTH_SIDES" lock="0" dropcapstyle="None" pageBreak="NONE"${headerRow} rowCnt="${rowCount}" colCnt="${colCount}" cellSpacing="0" borderFillIDRef="${tblBfId}" noAdjust="0"><hp:sz width="${actualTotal}" widthRelTo="ABSOLUTE" height="${totalTableHeight}" heightRelTo="ABSOLUTE" protect="0"/><hp:pos treatAsChar="1" affectLSpacing="0" flowWithText="1" allowOverlap="0" holdAnchorAndSO="0" vertRelTo="PARA" horzRelTo="PARA" vertAlign="TOP" horzAlign="LEFT" vertOffset="0" horzOffset="0"/><hp:outMargin left="138" right="138" top="138" bottom="138"/><hp:inMargin left="138" right="138" top="138" bottom="138"/>${rowsXml}</hp:tbl>`;
|
|
725
776
|
}
|
|
726
777
|
|
|
@@ -764,6 +815,15 @@ function extractPreviewText(sheet?: SheetNode): string {
|
|
|
764
815
|
}
|
|
765
816
|
|
|
766
817
|
function esc(s: string): string {
|
|
818
|
+
if (!s) return "";
|
|
819
|
+
// 1. 내부 처리용 플레이스홀더(__EXT_0__ 등) 제거
|
|
820
|
+
s = s.replace(/__EXT_\d+__/g, "");
|
|
821
|
+
// 2. 글자 깨짐을 유발하는 쓰레기값 및 BOM 기호 명시적 제거
|
|
822
|
+
s = s.replace(/湰灧/g, "");
|
|
823
|
+
s = s.replace(/\uFEFF/g, "");
|
|
824
|
+
// 3. XML 1.0에서 허용하지 않는 보이지 않는 제어문자 모두 제거
|
|
825
|
+
s = s.replace(/[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]/g, "");
|
|
826
|
+
|
|
767
827
|
return TextKit.escapeXml(s);
|
|
768
828
|
}
|
|
769
829
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { Encoder } from '../../contract/encoder';
|
|
2
2
|
import type { DocRoot, ParaNode, SpanNode, GridNode, ContentNode, ImgNode } from '../../model/doc-tree';
|
|
3
3
|
import type { Outcome } from '../../contract/result';
|
|
4
|
+
import type { Stroke } from '../../model/doc-props';
|
|
4
5
|
import { succeed, fail } from '../../contract/result';
|
|
5
6
|
import { TextKit } from '../../toolkit/TextKit';
|
|
6
7
|
import { registry } from '../../pipeline/registry';
|
|
@@ -53,12 +54,6 @@ function encodePara(para: ParaNode, warns: string[]): string {
|
|
|
53
54
|
}
|
|
54
55
|
|
|
55
56
|
function encodeSpan(span: SpanNode, warns: string[]): string {
|
|
56
|
-
// Warn about properties that can't be represented in MD
|
|
57
|
-
if (span.props.font) warns.push(`[SHIELD] MD: 글꼴(${span.props.font}) 표현 불가 — 손실됨`);
|
|
58
|
-
if (span.props.pt) warns.push(`[SHIELD] MD: 글자 크기(${span.props.pt}pt) 표현 불가 — 손실됨`);
|
|
59
|
-
if (span.props.color) warns.push(`[SHIELD] MD: 글자 색상(#${span.props.color}) 표현 불가 — 손실됨`);
|
|
60
|
-
if (span.props.bg) warns.push(`[SHIELD] MD: 배경 색상(#${span.props.bg}) 표현 불가 — 손실됨`);
|
|
61
|
-
|
|
62
57
|
let hasPageNum = false;
|
|
63
58
|
const textParts: string[] = [];
|
|
64
59
|
for (const kid of span.kids) {
|
|
@@ -72,6 +67,40 @@ function encodeSpan(span: SpanNode, warns: string[]): string {
|
|
|
72
67
|
let r = textParts.join('');
|
|
73
68
|
if (hasPageNum && r === '') r = '[페이지 번호]';
|
|
74
69
|
|
|
70
|
+
// Collect CSS styles for font/color/size/bg — use HTML span so fonts can be
|
|
71
|
+
// loaded externally via the page's stylesheet or @font-face rules.
|
|
72
|
+
const cssStyles: string[] = [];
|
|
73
|
+
if (span.props.font) cssStyles.push(`font-family: ${span.props.font}`);
|
|
74
|
+
if (span.props.pt) cssStyles.push(`font-size: ${span.props.pt}pt`);
|
|
75
|
+
if (span.props.color) cssStyles.push(`color: #${span.props.color}`);
|
|
76
|
+
if (span.props.bg) cssStyles.push(`background-color: #${span.props.bg}`);
|
|
77
|
+
|
|
78
|
+
const hasHtmlStyle = cssStyles.length > 0;
|
|
79
|
+
|
|
80
|
+
if (hasHtmlStyle) {
|
|
81
|
+
// When style properties are present, use HTML for all formatting so that
|
|
82
|
+
// markdown markers inside an HTML element don't break parsers.
|
|
83
|
+
if (span.props.b) cssStyles.push('font-weight: bold');
|
|
84
|
+
if (span.props.i) cssStyles.push('font-style: italic');
|
|
85
|
+
if (span.props.s) cssStyles.push('text-decoration: line-through');
|
|
86
|
+
if (span.props.u) {
|
|
87
|
+
// combine underline with possible line-through
|
|
88
|
+
const existing = cssStyles.find(s => s.startsWith('text-decoration:'));
|
|
89
|
+
if (existing) {
|
|
90
|
+
const idx = cssStyles.indexOf(existing);
|
|
91
|
+
cssStyles[idx] = existing.replace('line-through', 'underline line-through');
|
|
92
|
+
if (!existing.includes('line-through')) cssStyles[idx] = existing + ' underline';
|
|
93
|
+
} else {
|
|
94
|
+
cssStyles.push('text-decoration: underline');
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
const styleAttr = cssStyles.join('; ');
|
|
98
|
+
if (span.props.sup) return `<sup style="${styleAttr}">${r}</sup>`;
|
|
99
|
+
if (span.props.sub) return `<sub style="${styleAttr}">${r}</sub>`;
|
|
100
|
+
return `<span style="${styleAttr}">${r}</span>`;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// No CSS styles needed — use plain Markdown formatting
|
|
75
104
|
if (span.props.b && span.props.i) r = `***${r}***`;
|
|
76
105
|
else if (span.props.b) r = `**${r}**`;
|
|
77
106
|
else if (span.props.i) r = `*${r}*`;
|
|
@@ -87,22 +116,75 @@ function encodeImage(img: ImgNode): string {
|
|
|
87
116
|
return ``;
|
|
88
117
|
}
|
|
89
118
|
|
|
119
|
+
/** pt → CSS border shorthand (only if stroke is visible) */
|
|
120
|
+
function strokeToCss(s?: Stroke): string | undefined {
|
|
121
|
+
if (!s || s.kind === 'none' || s.pt <= 0) return undefined;
|
|
122
|
+
const kindMap: Record<string, string> = { solid: 'solid', dash: 'dashed', dot: 'dotted', double: 'double', none: 'none' };
|
|
123
|
+
const style = kindMap[s.kind] ?? 'solid';
|
|
124
|
+
const px = Math.max(1, Math.round(s.pt * 96 / 72));
|
|
125
|
+
const color = s.color.startsWith('#') ? s.color : `#${s.color}`;
|
|
126
|
+
return `${px}px ${style} ${color}`;
|
|
127
|
+
}
|
|
128
|
+
|
|
90
129
|
function encodeGrid(grid: GridNode, warns: string[]): string {
|
|
91
130
|
if (grid.kids.length === 0) return '';
|
|
92
131
|
|
|
93
|
-
//
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
132
|
+
// HTML 테이블로 출력 — 테두리/배경색을 인라인 스타일로 유지
|
|
133
|
+
const rowCount = grid.kids.length;
|
|
134
|
+
|
|
135
|
+
// Build occupancy map for rowspan
|
|
136
|
+
const occupancy: Set<number>[] = Array.from({ length: rowCount }, () => new Set());
|
|
137
|
+
let colCount = 0;
|
|
138
|
+
for (let ri = 0; ri < rowCount; ri++) {
|
|
139
|
+
const row = grid.kids[ri];
|
|
140
|
+
let ci = 0;
|
|
141
|
+
for (const cell of row.kids) {
|
|
142
|
+
while (occupancy[ri].has(ci)) ci++;
|
|
143
|
+
if (cell.rs > 1) {
|
|
144
|
+
for (let r = ri + 1; r < ri + cell.rs && r < rowCount; r++) {
|
|
145
|
+
for (let c = ci; c < ci + cell.cs; c++) occupancy[r].add(c);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
ci += cell.cs;
|
|
149
|
+
}
|
|
150
|
+
while (occupancy[ri].has(ci)) ci++;
|
|
151
|
+
if (ci > colCount) colCount = ci;
|
|
152
|
+
}
|
|
99
153
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
154
|
+
let rows = '';
|
|
155
|
+
for (let ri = 0; ri < rowCount; ri++) {
|
|
156
|
+
const row = grid.kids[ri];
|
|
157
|
+
let cells = '';
|
|
158
|
+
let colIdx = 0;
|
|
159
|
+
|
|
160
|
+
for (const cell of row.kids) {
|
|
161
|
+
while (occupancy[ri].has(colIdx)) colIdx++;
|
|
162
|
+
|
|
163
|
+
const cs = cell.cs > 1 ? ` colspan="${cell.cs}"` : '';
|
|
164
|
+
const rs = cell.rs > 1 ? ` rowspan="${cell.rs}"` : '';
|
|
165
|
+
|
|
166
|
+
const styles: string[] = ['padding:4px 6px', 'vertical-align:top'];
|
|
167
|
+
const top = strokeToCss(cell.props.top);
|
|
168
|
+
const bot = strokeToCss(cell.props.bot);
|
|
169
|
+
const left = strokeToCss(cell.props.left);
|
|
170
|
+
const right = strokeToCss(cell.props.right);
|
|
171
|
+
if (top) styles.push(`border-top:${top}`);
|
|
172
|
+
if (bot) styles.push(`border-bottom:${bot}`);
|
|
173
|
+
if (left) styles.push(`border-left:${left}`);
|
|
174
|
+
if (right) styles.push(`border-right:${right}`);
|
|
175
|
+
if (cell.props.bg) styles.push(`background-color:#${cell.props.bg}`);
|
|
176
|
+
if (cell.props.va === 'mid') styles[1] = 'vertical-align:middle';
|
|
177
|
+
else if (cell.props.va === 'bot') styles[1] = 'vertical-align:bottom';
|
|
178
|
+
|
|
179
|
+
const tag = (grid.props.headerRow && ri === 0) || cell.props.isHeader ? 'th' : 'td';
|
|
180
|
+
const content = cell.kids.map(p => encodePara(p, warns)).join('\n');
|
|
181
|
+
cells += `<${tag}${cs}${rs} style="${styles.join(';')}">${content}</${tag}>`;
|
|
182
|
+
colIdx += cell.cs;
|
|
183
|
+
}
|
|
184
|
+
rows += `<tr>${cells}</tr>\n`;
|
|
103
185
|
}
|
|
104
186
|
|
|
105
|
-
return rows
|
|
187
|
+
return `<table style="border-collapse:collapse;width:100%">\n<tbody>\n${rows}</tbody>\n</table>\n`;
|
|
106
188
|
}
|
|
107
189
|
|
|
108
190
|
registry.registerEncoder(new MdEncoder());
|
package/src/index.ts
CHANGED
|
@@ -10,6 +10,7 @@ import './decoders/hwpx/HwpxDecoder';
|
|
|
10
10
|
import './decoders/docx/DocxDecoder';
|
|
11
11
|
import './decoders/hwp/HwpScanner';
|
|
12
12
|
import './encoders/md/MdEncoder';
|
|
13
|
+
import './encoders/html/HtmlEncoder';
|
|
13
14
|
import './encoders/hwpx/HwpxEncoder';
|
|
14
15
|
import './encoders/docx/DocxEncoder';
|
|
15
16
|
import './encoders/hwp/HwpEncoder';
|
package/src/model/builders.ts
CHANGED
|
@@ -54,8 +54,10 @@ export function buildGrid(kids: RowNode[], props: GridProps = {}): GridNode {
|
|
|
54
54
|
return { tag: 'grid', props, kids };
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
export function buildRow(kids: CellNode[]): RowNode {
|
|
58
|
-
|
|
57
|
+
export function buildRow(kids: CellNode[], heightPt?: number): RowNode {
|
|
58
|
+
const node: RowNode = { tag: 'row', kids };
|
|
59
|
+
if (heightPt != null) node.heightPt = heightPt;
|
|
60
|
+
return node;
|
|
59
61
|
}
|
|
60
62
|
|
|
61
63
|
export function buildCell(
|
package/src/model/doc-tree.ts
CHANGED
package/src/pipeline/Pipeline.ts
CHANGED
|
@@ -60,8 +60,21 @@ export class Pipeline {
|
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
function detectFormat(data: Uint8Array): string {
|
|
63
|
-
|
|
63
|
+
// HWP 파일 (OLE Compound Document)
|
|
64
64
|
if (data[0] === 0xD0 && data[1] === 0xCF && data[2] === 0x11 && data[3] === 0xE0) return 'hwp';
|
|
65
|
+
|
|
66
|
+
// ZIP 기반 파일 (DOCX, HWPX)
|
|
67
|
+
if (data[0] === 0x50 && data[1] === 0x4B) {
|
|
68
|
+
// DOCX 는 [Content_Types].xml 에 application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml 이 있음
|
|
69
|
+
// HWPX 는 application/ha-xml-core-document
|
|
70
|
+
const str = new TextDecoder('utf-8', { fatal: false }).decode(data.slice(0, 4096));
|
|
71
|
+
if (str.includes('wordprocessingml')) return 'docx';
|
|
72
|
+
if (str.includes('ha-xml')) return 'hwpx';
|
|
73
|
+
if (str.includes('hwpml/')) return 'hwpx';
|
|
74
|
+
if (str.includes('word/')) return 'docx';
|
|
75
|
+
return 'hwpx'; // 기본값
|
|
76
|
+
}
|
|
77
|
+
|
|
65
78
|
return 'md';
|
|
66
79
|
}
|
|
67
80
|
|
|
@@ -50,7 +50,7 @@ const ALIGN_MAP: Record<string, Align> = {
|
|
|
50
50
|
start: 'left', end: 'right',
|
|
51
51
|
};
|
|
52
52
|
export function safeAlign(raw?: string): Align {
|
|
53
|
-
return ALIGN_MAP[raw ?? ''] ?? '
|
|
53
|
+
return ALIGN_MAP[raw ?? ''] ?? 'left';
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
// ─── 테두리 정규화 ─────────────────────────────────────────
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { Pipeline } from './src/index';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
|
|
4
|
+
async function testDocxToHwp() {
|
|
5
|
+
const inputPath = './data/sample/sample4_input.docx';
|
|
6
|
+
console.log(`\n📄 Testing DOCX → HWP conversion from: ${inputPath}`);
|
|
7
|
+
const data = fs.readFileSync(inputPath);
|
|
8
|
+
|
|
9
|
+
try {
|
|
10
|
+
const pipeline = Pipeline.open(data, 'docx');
|
|
11
|
+
|
|
12
|
+
console.log('Attempting to convert to HWP...');
|
|
13
|
+
const result = await pipeline.to('hwp');
|
|
14
|
+
|
|
15
|
+
if (result.ok) {
|
|
16
|
+
console.log(`✅ Success! HWP output: ${result.data.length} bytes`);
|
|
17
|
+
|
|
18
|
+
// Save to file for verification
|
|
19
|
+
fs.writeFileSync('./output_test.hwp', result.data);
|
|
20
|
+
console.log('Saved to: ./output_test.hwp');
|
|
21
|
+
|
|
22
|
+
// Verify by converting back to MD
|
|
23
|
+
console.log('\n--- Verifying by converting back to MD ---');
|
|
24
|
+
const verifyPipeline = Pipeline.open(result.data, 'hwp');
|
|
25
|
+
const mdResult = await verifyPipeline.to('md');
|
|
26
|
+
if (mdResult.ok) {
|
|
27
|
+
const mdText = new TextDecoder().decode(mdResult.data);
|
|
28
|
+
console.log(`Verification MD output: ${mdText.length} bytes`);
|
|
29
|
+
console.log('First 500 chars:', mdText.substring(0, 500));
|
|
30
|
+
} else {
|
|
31
|
+
console.log(`Verification failed: ${mdResult.error}`);
|
|
32
|
+
}
|
|
33
|
+
} else {
|
|
34
|
+
console.log(`❌ Failed: ${result.error}`);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return result.ok;
|
|
38
|
+
} catch (e: any) {
|
|
39
|
+
console.error(`❌ EXCEPTION: ${e.message}`);
|
|
40
|
+
console.error(e.stack);
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
testDocxToHwp().catch(console.error);
|