hwpkit-dev 0.0.1 β†’ 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hwp-analyze.ts ADDED
@@ -0,0 +1,90 @@
1
+ import * as fs from 'fs';
2
+ import pako from 'pako';
3
+
4
+ const hwp = fs.readFileSync('/mnt/92b2cb7e-8f06-4e4f-bfde-a14c87f2f96c/Github/hwpkit/data/sample/sample1_input.hwp');
5
+ const SS = 512;
6
+ const buf = hwp.buffer.slice(hwp.byteOffset, hwp.byteOffset + hwp.length);
7
+
8
+ function u32LE(b: Buffer, off: number) {
9
+ return b[off] | (b[off+1]<<8) | (b[off+2]<<16) | ((b[off+3]&0xFF)*0x1000000);
10
+ }
11
+ function i32LE(b: Buffer, off: number) {
12
+ const v = u32LE(b,off);
13
+ return v >= 0x80000000 ? v - 0x100000000 : v;
14
+ }
15
+ function u16LE(b: Buffer, off: number) { return b[off] | (b[off+1]<<8); }
16
+
17
+ const hdr = hwp.slice(0, 512);
18
+ const fatSec = u32LE(hdr as Buffer, 76);
19
+ const dirSec = u32LE(hdr as Buffer, 48);
20
+
21
+ function sectorSlice(sec: number) {
22
+ const off = (sec+1)*SS;
23
+ return hwp.slice(off, off+SS) as Buffer;
24
+ }
25
+
26
+ const fatSector = sectorSlice(fatSec);
27
+ function nextSec(sec: number) { return u32LE(fatSector as Buffer, (sec%128)*4); }
28
+
29
+ function readChain(startSec: number, maxB = 200000): Buffer {
30
+ const chunks: Buffer[] = [];
31
+ let sec = startSec, tot = 0;
32
+ while (sec < 0xFFFFFFFE && tot < maxB) {
33
+ chunks.push(sectorSlice(sec) as Buffer);
34
+ tot += SS;
35
+ sec = nextSec(sec);
36
+ }
37
+ return Buffer.concat(chunks);
38
+ }
39
+
40
+ const dirBuf = readChain(dirSec, 2048);
41
+ function readEntry(idx: number) {
42
+ const base = idx*128;
43
+ const nl = u16LE(dirBuf, base+64);
44
+ let name = '';
45
+ for (let i=0;i<(nl/2)-1&&i<32;i++) name += String.fromCharCode(u16LE(dirBuf, base+i*2));
46
+ return {
47
+ name, type: dirBuf[base+66],
48
+ child: i32LE(dirBuf, base+76), right: i32LE(dirBuf, base+72),
49
+ startSec: u32LE(dirBuf, base+116), size: u32LE(dirBuf, base+120)
50
+ };
51
+ }
52
+
53
+ let docInfoE: any = null, sec0E: any = null;
54
+ function walk(idx: number, depth = 0) {
55
+ if (idx < 0 || idx > 100) return;
56
+ const e = readEntry(idx);
57
+ if (depth < 5) console.log(' '.repeat(depth*2) + idx + ': "' + e.name + '" type=' + e.type + ' sz=' + e.size);
58
+ if (e.name === 'DocInfo') docInfoE = e;
59
+ if (e.name === 'Section0') sec0E = e;
60
+ walk(e.child, depth+1);
61
+ walk(e.right, depth);
62
+ }
63
+ walk(0);
64
+
65
+ if (!docInfoE || !sec0E) { console.log('NOT FOUND'); process.exit(1); }
66
+
67
+ const diC = readChain(docInfoE.startSec, docInfoE.size+512);
68
+ const s0C = readChain(sec0E.startSec, sec0E.size+512);
69
+ const diRaw = Buffer.from(pako.inflate(diC.slice(0,docInfoE.size)));
70
+ const s0Raw = Buffer.from(pako.inflate(s0C.slice(0,sec0E.size)));
71
+
72
+ function parseRecs(buf: Buffer) {
73
+ const recs: any[] = []; let off = 0;
74
+ while (off+4 <= buf.length) {
75
+ const h = u32LE(buf,off), tag=h&0x3FF, lv=(h>>10)&0x3FF;
76
+ let sz=(h>>>20)&0xFFF, hSz=4;
77
+ if(sz===0xFFF){sz=u32LE(buf,off+4);hSz=8;}
78
+ if(off+hSz+sz>buf.length) break;
79
+ recs.push({tag,lv,sz,data:buf.slice(off+hSz,off+hSz+sz)});
80
+ off+=hSz+sz;
81
+ }
82
+ return recs;
83
+ }
84
+
85
+ const s0 = parseRecs(s0Raw);
86
+ console.log('\nSection0 records:', s0.length);
87
+ for (const r of s0.slice(0, 50)) {
88
+ const h = r.data.slice(0,Math.min(r.sz,50)).toString('hex');
89
+ console.log(`tag=${r.tag} lv=${r.lv} sz=${r.sz} [${h}]`);
90
+ }
package/inspect-doc.ts ADDED
@@ -0,0 +1,57 @@
1
+ import { Pipeline, TreeWalker, countNodes } from './src/index';
2
+ import * as fs from 'fs';
3
+
4
+ async function inspectDoc(filePath: string, description: string) {
5
+ console.log(`\nπŸ“„ ${description}`);
6
+ console.log(` File: ${filePath} (${fs.statSync(filePath).size} bytes)`);
7
+
8
+ const data = fs.readFileSync(filePath);
9
+ const pipeline = Pipeline.open(data);
10
+ const result = await pipeline.inspect();
11
+
12
+ if (!result.ok) {
13
+ console.error(` ❌ FAILED: ${result.error}`);
14
+ return;
15
+ }
16
+
17
+ const doc = result.data;
18
+ console.log(` Meta: title="${doc.meta.title}", author="${doc.meta.author}"`);
19
+ console.log(` Sheets: ${doc.kids.length}`);
20
+
21
+ const walker = new TreeWalker();
22
+ let paraCount = 0, spanCount = 0, txtCount = 0, imgCount = 0, gridCount = 0;
23
+ let totalChars = 0;
24
+
25
+ walker.walk(doc, (node) => {
26
+ switch (node.tag) {
27
+ case 'para': paraCount++; break;
28
+ case 'span': spanCount++; break;
29
+ case 'txt': txtCount++; totalChars += node.content.length; break;
30
+ case 'img': imgCount++; break;
31
+ case 'grid': gridCount++; break;
32
+ }
33
+ });
34
+
35
+ console.log(` Paragraphs: ${paraCount}`);
36
+ console.log(` Spans: ${spanCount}`);
37
+ console.log(` Text nodes: ${txtCount}, Total chars: ${totalChars}`);
38
+ console.log(` Images: ${imgCount}`);
39
+ console.log(` Tables (grids): ${gridCount}`);
40
+
41
+ if (result.warns.length > 0) {
42
+ console.log(` ⚠️ Warnings: ${result.warns.slice(0, 5).join(', ')}${result.warns.length > 5 ? '...' : ''}`);
43
+ }
44
+ }
45
+
46
+ async function main() {
47
+ // Input files
48
+ await inspectDoc('./data/sample/sample1_input.hwp', 'Sample1 Input (HWP)');
49
+ await inspectDoc('./data/sample/sample3_input.hwpx', 'Sample3 Input (HWPX)');
50
+ await inspectDoc('./data/sample/sample4_input.docx', 'Sample4 Input (DOCX)');
51
+
52
+ // Expected outputs
53
+ await inspectDoc('./data/sample/sample1_output.docx', 'Sample1 Expected Output (DOCX)');
54
+ await inspectDoc('./data/sample/sample1_output.hwpx', 'Sample1 Expected Output (HWPX)');
55
+ }
56
+
57
+ main().catch(console.error);
Binary file
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "hwpkit-dev",
3
3
  "description": "HWP/HWPX/DOCX/MD μ–‘λ°©ν–₯ λ¬Έμ„œ λ³€ν™˜ 라이브러리",
4
- "version": "0.0.1",
4
+ "version": "0.0.2",
5
5
  "author": {
6
6
  "name": "INMD1",
7
7
  "email": "lyw514549@gmail.com",
@@ -31,7 +31,9 @@
31
31
  "playground": "vite --config playground/vite.config.ts"
32
32
  },
33
33
  "dependencies": {
34
+ "fs": "^0.0.1-security",
34
35
  "pako": "^2.1.0",
36
+ "path": "^0.12.7",
35
37
  "saxes": "^6.0.0"
36
38
  },
37
39
  "devDependencies": {
@@ -18,6 +18,7 @@ import type {
18
18
  CellProps,
19
19
  GridProps,
20
20
  TableLook,
21
+ Stroke,
21
22
  ImgLayout,
22
23
  ImgHorzAlign,
23
24
  ImgVertAlign,
@@ -90,6 +91,17 @@ export class DocxDecoder implements Decoder {
90
91
  }
91
92
  }
92
93
 
94
+ // Parse styles.xml for tblStyle defaults
95
+ let stylesMap: StylesMap = new Map();
96
+ const stylesXml = files.get("word/styles.xml");
97
+ if (stylesXml) {
98
+ try {
99
+ stylesMap = await parseStylesMap(TextKit.decode(stylesXml));
100
+ } catch {
101
+ /* non-fatal */
102
+ }
103
+ }
104
+
93
105
  const docStr = TextKit.decode(docXml);
94
106
  const docObj: any = await XmlKit.parseStrict(docStr);
95
107
 
@@ -97,7 +109,7 @@ export class DocxDecoder implements Decoder {
97
109
  const dims = extractDims(body) ?? { ...A4 };
98
110
  const elements = getBodyElements(body);
99
111
 
100
- const decCtx: DecCtx = { relsMap, files, shield, numMap, warns };
112
+ const decCtx: DecCtx = { relsMap, files, shield, numMap, warns, stylesMap };
101
113
 
102
114
  const kids: ContentNode[] = [];
103
115
  for (const el of elements) {
@@ -153,12 +165,30 @@ export class DocxDecoder implements Decoder {
153
165
 
154
166
  // ─── types ─────────────────────────────────────────────────
155
167
 
168
+ interface TblBorderDef {
169
+ top?: Stroke;
170
+ bottom?: Stroke;
171
+ left?: Stroke;
172
+ right?: Stroke;
173
+ insideH?: Stroke;
174
+ insideV?: Stroke;
175
+ }
176
+
177
+ /** Parsed tblStyle defaults from styles.xml */
178
+ interface TblStyleDef {
179
+ tblBorders?: TblBorderDef;
180
+ cellBg?: string; // default cell background
181
+ }
182
+
183
+ type StylesMap = Map<string, TblStyleDef>; // styleId β†’ defaults
184
+
156
185
  interface DecCtx {
157
186
  relsMap: Map<string, string>;
158
187
  files: Map<string, Uint8Array>;
159
188
  shield: ShieldedParser;
160
189
  numMap: NumMap;
161
190
  warns: string[];
191
+ stylesMap: StylesMap;
162
192
  }
163
193
 
164
194
  // numId β†’ { abstractNumId, levels: Map<ilvl, { fmt, isOrdered }> }
@@ -679,6 +709,85 @@ function decodeRun(run: any, ctx: DecCtx): SpanNode {
679
709
  return buildSpan(content, props);
680
710
  }
681
711
 
712
+ /** Parse all 6 border sides from a w:tblBorders or w:tcBorders node */
713
+ function parseBorderDef(bdrNode: any): TblBorderDef {
714
+ const sides: [string, keyof TblBorderDef][] = [
715
+ ["top", "top"], ["bottom", "bottom"], ["left", "left"], ["right", "right"],
716
+ ["insideH", "insideH"], ["insideV", "insideV"],
717
+ ];
718
+ const result: TblBorderDef = {};
719
+ for (const [xml, prop] of sides) {
720
+ const bdr = bdrNode?.["w:" + xml]?.[0]?._attr ?? bdrNode?.[xml]?.[0]?._attr;
721
+ if (!bdr) continue;
722
+ const val = bdr?.["w:val"] ?? bdr?.val;
723
+ if (val === "none" || val === "nil") continue; // explicit none β†’ skip (no border)
724
+ result[prop] = safeStrokeDocx(
725
+ val,
726
+ Number(bdr?.["w:sz"] ?? bdr?.sz ?? 4),
727
+ bdr?.["w:color"] ?? bdr?.color,
728
+ );
729
+ }
730
+ return result;
731
+ }
732
+
733
+ /** Parse styles.xml and build a map of tblStyle defaults */
734
+ async function parseStylesMap(xml: string): Promise<StylesMap> {
735
+ const map: StylesMap = new Map();
736
+ try {
737
+ const obj: any = await XmlKit.parseStrict(xml);
738
+ const stylesRoot = obj?.["w:styles"]?.[0] ?? obj?.styles?.[0] ?? obj;
739
+ const styleArr = toArr(stylesRoot?.["w:style"] ?? stylesRoot?.style);
740
+ for (const style of styleArr) {
741
+ const attr = style?._attr ?? {};
742
+ const type = attr?.["w:type"] ?? attr?.type;
743
+ if (type !== "table") continue;
744
+ const id = attr?.["w:styleId"] ?? attr?.styleId;
745
+ if (!id) continue;
746
+ const tblPr = style?.["w:tblPr"]?.[0] ?? style?.tblPr?.[0];
747
+ const tblBdrNode = tblPr?.["w:tblBorders"]?.[0] ?? tblPr?.tblBorders?.[0];
748
+ const tblBorders = tblBdrNode ? parseBorderDef(tblBdrNode) : undefined;
749
+ // tcStyle > tcBdr for default cell borders
750
+ const tcStyle = style?.["w:tcStyle"]?.[0] ?? style?.tcStyle?.[0];
751
+ const tcBdrNode = tcStyle?.["w:tcBdr"]?.[0] ?? tcStyle?.tcBdr?.[0];
752
+ if (tcBdrNode) {
753
+ const cellDef = parseBorderDef(tcBdrNode);
754
+ // merge into tblBorders as inner/outer defaults
755
+ if (!tblBorders) {
756
+ map.set(id, { tblBorders: cellDef });
757
+ } else {
758
+ map.set(id, { tblBorders: { ...cellDef, ...tblBorders } });
759
+ }
760
+ } else if (tblBorders) {
761
+ map.set(id, { tblBorders });
762
+ }
763
+ }
764
+ } catch {
765
+ /* non-fatal */
766
+ }
767
+ return map;
768
+ }
769
+
770
+ /** Resolve final CellProps borders using 3-level priority chain */
771
+ function resolveCellBorders(
772
+ cp: CellProps,
773
+ ri: number, ci: number, rs: number, cs: number,
774
+ rowCount: number, colCount: number,
775
+ tblBdr: TblBorderDef,
776
+ ): CellProps {
777
+ const isTopEdge = ri === 0;
778
+ const isBottomEdge = ri + rs >= rowCount;
779
+ const isLeftEdge = ci === 0;
780
+ const isRightEdge = ci + cs >= colCount;
781
+
782
+ // Apply tblBorders only where no explicit tcBorder was set
783
+ const resolved: CellProps = { ...cp };
784
+ if (!resolved.top) resolved.top = isTopEdge ? tblBdr.top : tblBdr.insideH;
785
+ if (!resolved.bot) resolved.bot = isBottomEdge ? tblBdr.bottom : tblBdr.insideH;
786
+ if (!resolved.left) resolved.left = isLeftEdge ? tblBdr.left : tblBdr.insideV;
787
+ if (!resolved.right) resolved.right = isRightEdge ? tblBdr.right : tblBdr.insideV;
788
+ return resolved;
789
+ }
790
+
682
791
  function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
683
792
  // Parse tblPr for table styles
684
793
  const tblPr = tbl?.["w:tblPr"]?.[0] ?? tbl?.tblPr?.[0] ?? {};
@@ -700,21 +809,20 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
700
809
  bandedCols: tblLookAttr?.["w:noVBand"] === "0" || undefined,
701
810
  };
702
811
 
703
- // Parse table borders for defaultStroke
704
- const tblBorders = tblPr?.["w:tblBorders"]?.[0] ?? tblPr?.tblBorders?.[0];
705
- let defaultStroke = undefined;
706
- if (tblBorders) {
707
- const top =
708
- tblBorders?.["w:top"]?.[0]?._attr ?? tblBorders?.top?.[0]?._attr;
709
- if (top) {
710
- defaultStroke = safeStrokeDocx(
711
- top?.["w:val"] ?? top?.val,
712
- Number(top?.["w:sz"] ?? top?.sz ?? 4),
713
- top?.["w:color"] ?? top?.color,
714
- );
715
- }
812
+ // β‘  tblStyle κΈ°λ³Έκ°’ λ‘œλ“œ
813
+ const tblStyleId = (tblPr?.["w:tblStyle"]?.[0]?._attr ?? tblPr?.tblStyle?.[0]?._attr)?.["w:val"];
814
+ const styleDef = tblStyleId ? ctx.stylesMap.get(tblStyleId) : undefined;
815
+ let tblBdr: TblBorderDef = styleDef?.tblBorders ?? {};
816
+
817
+ // β‘‘ tblBorders μž¬μ •μ˜ (tblStyle보닀 μš°μ„ )
818
+ const tblBordersNode = tblPr?.["w:tblBorders"]?.[0] ?? tblPr?.tblBorders?.[0];
819
+ if (tblBordersNode) {
820
+ const parsed = parseBorderDef(tblBordersNode);
821
+ tblBdr = { ...tblBdr, ...parsed };
716
822
  }
717
823
 
824
+ // defaultStroke for HWPX/HWP encoders: use insideH (inner horizontal border)
825
+ const defaultStroke = tblBdr.insideH ?? tblBdr.top;
718
826
  const gridProps: GridProps = { look, defaultStroke };
719
827
 
720
828
  // Read column widths from w:tblGrid
@@ -790,6 +898,14 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
790
898
  trPr?.["w:tblHeader"]?.[0] != null || trPr?.tblHeader?.[0] != null;
791
899
  if (ri === 0 && isHeaderRow) gridProps.headerRow = true;
792
900
 
901
+ // Row height from w:trHeight
902
+ let rowHeightPt: number | undefined;
903
+ const trHAttr = trPr?.["w:trHeight"]?.[0]?._attr ?? trPr?.trHeight?.[0]?._attr;
904
+ if (trHAttr) {
905
+ const hDxa = Number(trHAttr?.["w:val"] ?? trHAttr?.val ?? 0);
906
+ if (hDxa > 0) rowHeightPt = Metric.dxaToPt(hDxa);
907
+ }
908
+
793
909
  const cellNodes: CellNode[] = [];
794
910
  for (let ci = 0; ci < rawRow.length; ci++) {
795
911
  const rc = rawRow[ci];
@@ -803,24 +919,25 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
803
919
  const bgAttr = tcPr?.["w:shd"]?.[0]?._attr ?? {};
804
920
  const bg = safeHex(bgAttr?.["w:fill"] ?? bgAttr?.fill);
805
921
 
806
- // Cell borders
807
- const tcBorders = tcPr?.["w:tcBorders"]?.[0] ?? tcPr?.tcBorders?.[0];
922
+ // β‘’ tcBorders μ…€ μˆ˜μ€€ μž¬μ •μ˜ (μš°μ„ μˆœμœ„ κ°€μž₯ λ†’μŒ)
923
+ const tcBordersNode = tcPr?.["w:tcBorders"]?.[0] ?? tcPr?.tcBorders?.[0];
808
924
  const cp: CellProps = { bg, isHeader: isHeaderRow || undefined };
809
925
 
810
- if (tcBorders) {
926
+ if (tcBordersNode) {
811
927
  const dirs: Array<[string, "top" | "bot" | "left" | "right"]> = [
812
- ["top", "top"],
813
- ["bottom", "bot"],
814
- ["left", "left"],
815
- ["right", "right"],
928
+ ["top", "top"], ["bottom", "bot"], ["left", "left"], ["right", "right"],
816
929
  ];
817
930
  for (const [xmlTag, propKey] of dirs) {
818
931
  const bdr =
819
- tcBorders?.["w:" + xmlTag]?.[0]?._attr ??
820
- tcBorders?.[xmlTag]?.[0]?._attr;
821
- if (bdr) {
932
+ tcBordersNode?.["w:" + xmlTag]?.[0]?._attr ??
933
+ tcBordersNode?.[xmlTag]?.[0]?._attr;
934
+ if (!bdr) continue;
935
+ const val = bdr?.["w:val"] ?? bdr?.val;
936
+ if (val === "none" || val === "nil") {
937
+ // explicit none: keep as undefined (no border)
938
+ } else {
822
939
  cp[propKey] = safeStrokeDocx(
823
- bdr?.["w:val"] ?? bdr?.val,
940
+ val,
824
941
  Number(bdr?.["w:sz"] ?? bdr?.sz ?? 4),
825
942
  bdr?.["w:color"] ?? bdr?.color,
826
943
  );
@@ -834,15 +951,23 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
834
951
  const vaVal = vaAttr?.["w:val"] ?? vaAttr?.val;
835
952
  if (vaVal) {
836
953
  const vaMap: Record<string, "top" | "mid" | "bot"> = {
837
- top: "top",
838
- center: "mid",
839
- bottom: "bot",
954
+ top: "top", center: "mid", bottom: "bot",
840
955
  };
841
956
  cp.va = vaMap[vaVal];
842
957
  }
843
958
 
844
959
  const rs = rsMap.get(`${ri},${ci}`) ?? 1;
845
960
 
961
+ // Compute logical column index for this cell
962
+ let gridColIdx = 0;
963
+ for (let prevCi = 0; prevCi < ci; prevCi++) {
964
+ if (!rawRow[prevCi].vMergeContinue) gridColIdx += rawRow[prevCi].gridSpan;
965
+ }
966
+
967
+ // Apply 3-level border resolution (tblStyle β†’ tblBorders β†’ tcBorders already in cp)
968
+ const colCount = gridProps.colWidths?.length ?? rawGrid[0]?.reduce((s, c) => s + c.gridSpan, 0) ?? 1;
969
+ const resolvedCp = resolveCellBorders(cp, ri, gridColIdx, rs, rc.gridSpan, rawGrid.length, colCount, tblBdr);
970
+
846
971
  const paras = toArr(cell?.["w:p"] ?? cell?.p).map((p: any) =>
847
972
  decodePara(p, ctx),
848
973
  );
@@ -850,11 +975,11 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
850
975
  buildCell(paras.length > 0 ? paras : [buildPara([buildSpan("")])], {
851
976
  cs: rc.gridSpan,
852
977
  rs,
853
- props: cp,
978
+ props: resolvedCp,
854
979
  }),
855
980
  );
856
981
  }
857
- return buildRow(cellNodes);
982
+ return buildRow(cellNodes, rowHeightPt);
858
983
  });
859
984
  return buildGrid(rowNodes, gridProps);
860
985
  }