hwpkit-dev 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +11 -0
- package/README.md +223 -0
- package/dist/index.d.mts +313 -0
- package/dist/index.d.ts +317 -0
- package/dist/index.js +3546 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +3479 -0
- package/dist/index.mjs.map +1 -0
- package/license.md +136 -0
- package/package.json +45 -0
- package/src/contract/decoder.ts +7 -0
- package/src/contract/encoder.ts +7 -0
- package/src/contract/result.ts +21 -0
- package/src/decoders/docx/DocxDecoder.ts +986 -0
- package/src/decoders/hwp/HwpScanner.ts +809 -0
- package/src/decoders/hwpx/HwpxDecoder.ts +759 -0
- package/src/decoders/md/MdDecoder.ts +180 -0
- package/src/encoders/docx/DocxEncoder.ts +710 -0
- package/src/encoders/hwp/HwpEncoder.ts +711 -0
- package/src/encoders/hwpx/HwpxEncoder.ts +770 -0
- package/src/encoders/md/MdEncoder.ts +108 -0
- package/src/index.ts +47 -0
- package/src/model/builders.ts +66 -0
- package/src/model/doc-props.ts +138 -0
- package/src/model/doc-tree.ts +90 -0
- package/src/pipeline/Pipeline.ts +71 -0
- package/src/pipeline/registry.ts +18 -0
- package/src/safety/ShieldedParser.ts +91 -0
- package/src/safety/StyleBridge.ts +106 -0
- package/src/toolkit/ArchiveKit.ts +150 -0
- package/src/toolkit/BinaryKit.ts +187 -0
- package/src/toolkit/TextKit.ts +57 -0
- package/src/toolkit/XmlKit.ts +91 -0
- package/src/walk/TreeWalker.ts +42 -0
- package/src/walk/tree-ops.ts +26 -0
- package/tsconfig.json +23 -0
- package/tsup.config.ts +12 -0
|
@@ -0,0 +1,711 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HWP 5.0 encoder — DocRoot → HWP binary (OLE2/CFB container)
|
|
3
|
+
*
|
|
4
|
+
* OLE2 layout:
|
|
5
|
+
* FileHeader (stream) — 256-byte HWP signature + flags
|
|
6
|
+
* DocInfo (stream) — compressed FACE_NAME / CHAR_SHAPE / PARA_SHAPE records
|
|
7
|
+
* BodyText (storage)
|
|
8
|
+
* Section0 (stream) — compressed PAGE_DEF + paragraph/table records
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { Encoder } from '../../contract/encoder';
|
|
12
|
+
import type { DocRoot, ContentNode, ParaNode, SpanNode, GridNode } from '../../model/doc-tree';
|
|
13
|
+
import type { Outcome } from '../../contract/result';
|
|
14
|
+
import type { TextProps, ParaProps, Stroke, PageDims } from '../../model/doc-props';
|
|
15
|
+
import { succeed, fail } from '../../contract/result';
|
|
16
|
+
import { Metric } from '../../safety/StyleBridge';
|
|
17
|
+
import { registry } from '../../pipeline/registry';
|
|
18
|
+
import { A4 } from '../../model/doc-props';
|
|
19
|
+
import pako from 'pako';
|
|
20
|
+
|
|
21
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
22
|
+
HWP 5.0 tag IDs
|
|
23
|
+
═══════════════════════════════════════════════════════════════ */
|
|
24
|
+
|
|
25
|
+
const T = 16; // HWPTAG_BEGIN
|
|
26
|
+
const TAG_ID_MAPPINGS = T + 8; // 24
|
|
27
|
+
const TAG_FACE_NAME = T + 3; // 19
|
|
28
|
+
const TAG_BORDER_FILL = T + 4; // 20
|
|
29
|
+
const TAG_CHAR_SHAPE = T + 5; // 21
|
|
30
|
+
const TAG_PARA_SHAPE = T + 9; // 25
|
|
31
|
+
const TAG_PARA_HEADER = T + 50; // 66
|
|
32
|
+
const TAG_PARA_TEXT = T + 51; // 67
|
|
33
|
+
const TAG_PARA_CHAR_SHAPE = T + 52; // 68
|
|
34
|
+
const TAG_CTRL_HEADER = T + 55; // 71
|
|
35
|
+
const TAG_LIST_HEADER = T + 56; // 72
|
|
36
|
+
const TAG_PAGE_DEF = T + 57; // 73
|
|
37
|
+
const TAG_TABLE_B = T + 64; // 80
|
|
38
|
+
|
|
39
|
+
const CTRL_TABLE = 0x74626C20; // ' lbt' as LE uint32
|
|
40
|
+
|
|
41
|
+
/** Border width index table (points) — matches BORDER_W_PT in HwpScanner */
|
|
42
|
+
const BORDER_W_PT = [
|
|
43
|
+
0.28, 0.34, 0.43, 0.57, 0.71, 0.85,
|
|
44
|
+
1.13, 1.42, 1.70, 1.98, 2.84, 4.25,
|
|
45
|
+
5.67, 8.50, 11.34, 14.17,
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
const BORDER_KIND_IDX: Record<string, number> = {
|
|
49
|
+
none: 0, solid: 1, dash: 2, dot: 3, double: 8,
|
|
50
|
+
};
|
|
51
|
+
const ALIGN_CODE: Record<string, number> = {
|
|
52
|
+
justify: 0, left: 1, right: 2, center: 3,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
56
|
+
Binary buffer writer
|
|
57
|
+
═══════════════════════════════════════════════════════════════ */
|
|
58
|
+
|
|
59
|
+
class BufWriter {
|
|
60
|
+
private chunks: Uint8Array[] = [];
|
|
61
|
+
private _sz = 0;
|
|
62
|
+
|
|
63
|
+
get size() { return this._sz; }
|
|
64
|
+
|
|
65
|
+
u8(v: number): this {
|
|
66
|
+
this.chunks.push(new Uint8Array([v & 0xFF]));
|
|
67
|
+
this._sz++;
|
|
68
|
+
return this;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
u16(v: number): this {
|
|
72
|
+
this.chunks.push(new Uint8Array([v & 0xFF, (v >> 8) & 0xFF]));
|
|
73
|
+
this._sz += 2;
|
|
74
|
+
return this;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
u32(v: number): this {
|
|
78
|
+
const b = new Uint8Array(4);
|
|
79
|
+
b[0] = v & 0xFF;
|
|
80
|
+
b[1] = (v >>> 8) & 0xFF;
|
|
81
|
+
b[2] = (v >>> 16) & 0xFF;
|
|
82
|
+
b[3] = (v >>> 24) & 0xFF;
|
|
83
|
+
this.chunks.push(b);
|
|
84
|
+
this._sz += 4;
|
|
85
|
+
return this;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
i32(v: number): this { return this.u32(v < 0 ? v + 0x100000000 : v); }
|
|
89
|
+
|
|
90
|
+
bytes(d: Uint8Array): this { this.chunks.push(d); this._sz += d.length; return this; }
|
|
91
|
+
zeros(n: number): this { this.chunks.push(new Uint8Array(n)); this._sz += n; return this; }
|
|
92
|
+
|
|
93
|
+
/** Write each char as UTF-16LE UINT16 (BMP only) */
|
|
94
|
+
utf16(s: string): this {
|
|
95
|
+
for (let i = 0; i < s.length; i++) this.u16(s.charCodeAt(i));
|
|
96
|
+
return this;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Write 4-byte COLORREF (R, G, B, 0) from 6-hex string */
|
|
100
|
+
colorRef(hex: string): this {
|
|
101
|
+
const h = (hex || '000000').replace('#', '').padStart(6, '0');
|
|
102
|
+
return this
|
|
103
|
+
.u8(parseInt(h.slice(0, 2), 16))
|
|
104
|
+
.u8(parseInt(h.slice(2, 4), 16))
|
|
105
|
+
.u8(parseInt(h.slice(4, 6), 16))
|
|
106
|
+
.u8(0);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
build(): Uint8Array {
|
|
110
|
+
const out = new Uint8Array(this._sz);
|
|
111
|
+
let off = 0;
|
|
112
|
+
for (const c of this.chunks) { out.set(c, off); off += c.length; }
|
|
113
|
+
return out;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
118
|
+
HWP record builder
|
|
119
|
+
Format: 32-bit header = size(12)|level(10)|tag(10)
|
|
120
|
+
If size >= 0xFFF, append UINT32 with actual size.
|
|
121
|
+
═══════════════════════════════════════════════════════════════ */
|
|
122
|
+
|
|
123
|
+
function mkRec(tag: number, level: number, data: Uint8Array): Uint8Array {
|
|
124
|
+
const sz = data.length;
|
|
125
|
+
const enc = Math.min(sz, 0xFFF);
|
|
126
|
+
const hdr = (enc << 20) | ((level & 0x3FF) << 10) | (tag & 0x3FF);
|
|
127
|
+
const w = new BufWriter().u32(hdr);
|
|
128
|
+
if (enc >= 0xFFF) w.u32(sz);
|
|
129
|
+
w.bytes(data);
|
|
130
|
+
return w.build();
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
134
|
+
Style collector (first pass — deduplicates fonts/shapes)
|
|
135
|
+
═══════════════════════════════════════════════════════════════ */
|
|
136
|
+
|
|
137
|
+
function csKey(p: TextProps): string {
|
|
138
|
+
return [p.font ?? '', p.pt ?? 10, p.b ? 1 : 0, p.i ? 1 : 0, p.u ? 1 : 0,
|
|
139
|
+
p.s ? 1 : 0, p.sup ? 1 : 0, p.sub ? 1 : 0, p.color ?? '000000'].join('|');
|
|
140
|
+
}
|
|
141
|
+
function psKey(p: ParaProps): string {
|
|
142
|
+
return [p.align ?? 'left', p.indentPt ?? 0, p.spaceBefore ?? 0,
|
|
143
|
+
p.spaceAfter ?? 0, p.lineHeight ?? 1].join('|');
|
|
144
|
+
}
|
|
145
|
+
function bfKey(s: Stroke, bg?: string): string {
|
|
146
|
+
return `${s.kind}|${s.pt}|${s.color}|${bg ?? ''}`;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
class StyleCollector {
|
|
150
|
+
readonly DEF_STROKE: Stroke = { kind: 'solid', pt: 0.5, color: '000000' };
|
|
151
|
+
|
|
152
|
+
fonts: string[] = ['Malgun Gothic'];
|
|
153
|
+
private fontIdx = new Map<string, number>([['Malgun Gothic', 0]]);
|
|
154
|
+
|
|
155
|
+
csProps: TextProps[] = [{}];
|
|
156
|
+
private csIdx = new Map<string, number>([[csKey({}), 0]]);
|
|
157
|
+
|
|
158
|
+
psProps: ParaProps[] = [{}];
|
|
159
|
+
private psIdx = new Map<string, number>([[psKey({}), 0]]);
|
|
160
|
+
|
|
161
|
+
bfData: { s: Stroke; bg?: string }[] = [];
|
|
162
|
+
private bfIdx = new Map<string, number>();
|
|
163
|
+
|
|
164
|
+
constructor() {
|
|
165
|
+
this.addBorderFill(this.DEF_STROKE); // bfId=1
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
font(name: string): number {
|
|
169
|
+
const n = name || 'Malgun Gothic';
|
|
170
|
+
if (this.fontIdx.has(n)) return this.fontIdx.get(n)!;
|
|
171
|
+
const id = this.fonts.length;
|
|
172
|
+
this.fonts.push(n);
|
|
173
|
+
this.fontIdx.set(n, id);
|
|
174
|
+
return id;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
addCharShape(p: TextProps): number {
|
|
178
|
+
const k = csKey(p);
|
|
179
|
+
if (this.csIdx.has(k)) return this.csIdx.get(k)!;
|
|
180
|
+
const id = this.csProps.length;
|
|
181
|
+
this.csProps.push(p);
|
|
182
|
+
this.csIdx.set(k, id);
|
|
183
|
+
if (p.font) this.font(p.font);
|
|
184
|
+
return id;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
addParaShape(p: ParaProps): number {
|
|
188
|
+
const k = psKey(p);
|
|
189
|
+
if (this.psIdx.has(k)) return this.psIdx.get(k)!;
|
|
190
|
+
const id = this.psProps.length;
|
|
191
|
+
this.psProps.push(p);
|
|
192
|
+
this.psIdx.set(k, id);
|
|
193
|
+
return id;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/** Returns 1-based border fill ID (HWP uses 1-based IDs for border fills) */
|
|
197
|
+
addBorderFill(s: Stroke, bg?: string): number {
|
|
198
|
+
const k = bfKey(s, bg);
|
|
199
|
+
if (this.bfIdx.has(k)) return this.bfIdx.get(k)!;
|
|
200
|
+
const id = this.bfData.length + 1;
|
|
201
|
+
this.bfData.push({ s, bg });
|
|
202
|
+
this.bfIdx.set(k, id);
|
|
203
|
+
return id;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function collectNode(node: ContentNode, col: StyleCollector): void {
|
|
208
|
+
if (node.tag === 'para') {
|
|
209
|
+
col.addParaShape(node.props);
|
|
210
|
+
for (const kid of node.kids) {
|
|
211
|
+
if (kid.tag === 'span') col.addCharShape((kid as SpanNode).props);
|
|
212
|
+
}
|
|
213
|
+
} else if (node.tag === 'grid') {
|
|
214
|
+
if (node.props.defaultStroke) col.addBorderFill(node.props.defaultStroke);
|
|
215
|
+
for (const row of node.kids) {
|
|
216
|
+
for (const cell of row.kids) {
|
|
217
|
+
col.addBorderFill(cell.props.top ?? node.props.defaultStroke ?? col.DEF_STROKE, cell.props.bg);
|
|
218
|
+
for (const para of cell.kids) collectNode(para, col);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
225
|
+
DocInfo record builders
|
|
226
|
+
═══════════════════════════════════════════════════════════════ */
|
|
227
|
+
|
|
228
|
+
function mkIdMappings(col: StyleCollector): Uint8Array {
|
|
229
|
+
return new BufWriter()
|
|
230
|
+
.u32(col.fonts.length)
|
|
231
|
+
.u32(col.bfData.length)
|
|
232
|
+
.u32(col.csProps.length)
|
|
233
|
+
.u32(0) // tabDef count
|
|
234
|
+
.u32(0) // numbering count
|
|
235
|
+
.u32(0) // bullet count
|
|
236
|
+
.u32(col.psProps.length)
|
|
237
|
+
.u32(0) // style count
|
|
238
|
+
.build();
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function mkFaceName(name: string): Uint8Array {
|
|
242
|
+
return new BufWriter()
|
|
243
|
+
.u8(0) // substType
|
|
244
|
+
.u16(name.length)
|
|
245
|
+
.utf16(name)
|
|
246
|
+
.build();
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function borderWidthIdx(pt: number): number {
|
|
250
|
+
let best = 0;
|
|
251
|
+
for (let i = 0; i < BORDER_W_PT.length; i++) {
|
|
252
|
+
if (Math.abs(BORDER_W_PT[i] - pt) < Math.abs(BORDER_W_PT[best] - pt)) best = i;
|
|
253
|
+
}
|
|
254
|
+
return best;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function mkBorderFill(s: Stroke, bg?: string): Uint8Array {
|
|
258
|
+
const w = new BufWriter();
|
|
259
|
+
w.u16(0); // attr
|
|
260
|
+
const t = BORDER_KIND_IDX[s.kind] ?? 1;
|
|
261
|
+
const wi = borderWidthIdx(s.pt);
|
|
262
|
+
// 5 borders: left, right, top, bottom, diagonal
|
|
263
|
+
for (let i = 0; i < 5; i++) w.u8(t).u8(wi).colorRef(s.color || '000000');
|
|
264
|
+
// fill: type(4) + faceColor(4) + reserved(4)
|
|
265
|
+
if (bg) { w.u32(1).colorRef(bg).u32(0); }
|
|
266
|
+
else { w.u32(0).u32(0).u32(0); }
|
|
267
|
+
return w.build(); // 40 bytes
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function mkCharShape(p: TextProps, col: StyleCollector): Uint8Array {
|
|
271
|
+
const fontId = p.font ? col.font(p.font) : 0;
|
|
272
|
+
const w = new BufWriter();
|
|
273
|
+
for (let i = 0; i < 7; i++) w.u16(fontId); // faceId[7]
|
|
274
|
+
for (let i = 0; i < 7; i++) w.u8(100); // ratio[7]
|
|
275
|
+
for (let i = 0; i < 7; i++) w.u8(0); // spacing[7]
|
|
276
|
+
for (let i = 0; i < 7; i++) w.u8(100); // relSize[7]
|
|
277
|
+
for (let i = 0; i < 7; i++) w.u8(0); // offset[7]
|
|
278
|
+
// height @ offset 42 (HWPUNIT: pt × 100)
|
|
279
|
+
w.u32(Math.round((p.pt ?? 10) * 100));
|
|
280
|
+
// attr @ offset 46
|
|
281
|
+
let attr = 0;
|
|
282
|
+
if (p.i) attr |= 1; // italic = bit 0
|
|
283
|
+
if (p.b) attr |= 2; // bold = bit 1
|
|
284
|
+
if (p.u) attr |= (1 << 2); // ulType = bits 2-4, set to 1
|
|
285
|
+
if (p.s) attr |= (1 << 18); // skType = bits 18-20, set to 1
|
|
286
|
+
if (p.sup) attr |= (1 << 16); // suType = bits 16-17, value 1
|
|
287
|
+
if (p.sub) attr |= (2 << 16); // suType = bits 16-17, value 2
|
|
288
|
+
w.u32(attr);
|
|
289
|
+
w.u8(0).u8(0); // shadowX, shadowY @ 50-51
|
|
290
|
+
w.colorRef(p.color ?? '000000'); // textColor @ 52 (4 bytes)
|
|
291
|
+
return w.build(); // 56 bytes
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function mkParaShape(p: ParaProps): Uint8Array {
|
|
295
|
+
return new BufWriter()
|
|
296
|
+
.u32(ALIGN_CODE[p.align ?? 'left'] ?? 1) // attr (bits 0-2 = align)
|
|
297
|
+
.i32(Metric.ptToHwp(p.indentPt ?? 0)) // leftMargin
|
|
298
|
+
.i32(0) // rightMargin
|
|
299
|
+
.i32(0) // indent (first-line)
|
|
300
|
+
.i32(Metric.ptToHwp(p.spaceBefore ?? 0))
|
|
301
|
+
.i32(Metric.ptToHwp(p.spaceAfter ?? 0))
|
|
302
|
+
.i32(p.lineHeight ? Math.round(p.lineHeight * 100) : 160) // lineSpacing
|
|
303
|
+
.build(); // 28 bytes
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function buildDocInfoStream(col: StyleCollector): Uint8Array {
|
|
307
|
+
const chunks: Uint8Array[] = [
|
|
308
|
+
mkRec(TAG_ID_MAPPINGS, 0, mkIdMappings(col)),
|
|
309
|
+
...col.fonts.map(n => mkRec(TAG_FACE_NAME, 0, mkFaceName(n))),
|
|
310
|
+
...col.bfData.map(({ s, bg }) => mkRec(TAG_BORDER_FILL, 0, mkBorderFill(s, bg))),
|
|
311
|
+
...col.csProps.map(p => mkRec(TAG_CHAR_SHAPE, 0, mkCharShape(p, col))),
|
|
312
|
+
...col.psProps.map(p => mkRec(TAG_PARA_SHAPE, 0, mkParaShape(p))),
|
|
313
|
+
];
|
|
314
|
+
return concatU8(chunks);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
318
|
+
BodyText record builders
|
|
319
|
+
═══════════════════════════════════════════════════════════════ */
|
|
320
|
+
|
|
321
|
+
function mkPageDef(dims: PageDims): Uint8Array {
|
|
322
|
+
return new BufWriter()
|
|
323
|
+
.u32(Metric.ptToHwp(dims.wPt))
|
|
324
|
+
.u32(Metric.ptToHwp(dims.hPt))
|
|
325
|
+
.u32(Metric.ptToHwp(dims.ml))
|
|
326
|
+
.u32(Metric.ptToHwp(dims.mr))
|
|
327
|
+
.u32(Metric.ptToHwp(dims.mt))
|
|
328
|
+
.u32(Metric.ptToHwp(dims.mb))
|
|
329
|
+
.zeros(12) // header/footer/gutter margins (3 × INT32)
|
|
330
|
+
.u32(dims.orient === 'landscape' ? 1 : 0) // attr @ offset 36
|
|
331
|
+
.build(); // 40 bytes
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function mkParaHeader(psId: number, csCount: number): Uint8Array {
|
|
335
|
+
return new BufWriter()
|
|
336
|
+
.u32(0) // paragraphControlMask
|
|
337
|
+
.u16(0) // styleId
|
|
338
|
+
.u8(0) // divideAttr
|
|
339
|
+
.u8(0)
|
|
340
|
+
.u16(psId) // paraShapeId @ offset 8
|
|
341
|
+
.u16(csCount) // charShapeCount @ offset 10
|
|
342
|
+
.u16(0) // rangeTagCount
|
|
343
|
+
.u16(0) // memoCount
|
|
344
|
+
.i32(0) // paraChangeId
|
|
345
|
+
.build(); // 20 bytes
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
function mkParaText(text: string): Uint8Array {
|
|
349
|
+
const w = new BufWriter();
|
|
350
|
+
for (let i = 0; i < text.length; i++) {
|
|
351
|
+
const c = text.charCodeAt(i);
|
|
352
|
+
w.u16(c < 32 ? 0 : c); // replace control chars
|
|
353
|
+
}
|
|
354
|
+
w.u16(13); // paragraph terminator (0x000D)
|
|
355
|
+
return w.build();
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
function mkParaCharShape(pairs: [pos: number, id: number][]): Uint8Array {
|
|
359
|
+
const w = new BufWriter();
|
|
360
|
+
for (const [pos, id] of pairs) w.u32(pos).u32(id);
|
|
361
|
+
return w.build();
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function encodePara(para: ParaNode, col: StyleCollector, lv: number): Uint8Array[] {
|
|
365
|
+
let text = '';
|
|
366
|
+
const csPairs: [number, number][] = [];
|
|
367
|
+
let pos = 0;
|
|
368
|
+
|
|
369
|
+
for (const kid of para.kids) {
|
|
370
|
+
if (kid.tag !== 'span') continue;
|
|
371
|
+
const span = kid as SpanNode;
|
|
372
|
+
const csId = col.addCharShape(span.props);
|
|
373
|
+
// Only add a new pair when shape changes
|
|
374
|
+
if (csPairs.length === 0 || csPairs[csPairs.length - 1][1] !== csId) {
|
|
375
|
+
csPairs.push([pos, csId]);
|
|
376
|
+
}
|
|
377
|
+
for (const t of span.kids) {
|
|
378
|
+
if (t.tag === 'txt') { text += t.content; pos += t.content.length; }
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
if (csPairs.length === 0) csPairs.push([0, 0]);
|
|
383
|
+
|
|
384
|
+
const psId = col.addParaShape(para.props);
|
|
385
|
+
return [
|
|
386
|
+
mkRec(TAG_PARA_HEADER, lv, mkParaHeader(psId, csPairs.length)),
|
|
387
|
+
mkRec(TAG_PARA_TEXT, lv + 1, mkParaText(text)),
|
|
388
|
+
mkRec(TAG_PARA_CHAR_SHAPE, lv + 1, mkParaCharShape(csPairs)),
|
|
389
|
+
];
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/* ── Table encoding ─────────────────────────────────────────── */
|
|
393
|
+
|
|
394
|
+
function mkTableCtrl(): Uint8Array {
|
|
395
|
+
return new BufWriter().u32(CTRL_TABLE).zeros(12).build();
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function mkTableB(rowCnt: number, colCnt: number, rowHwp: number[], bfId: number): Uint8Array {
|
|
399
|
+
const w = new BufWriter();
|
|
400
|
+
w.u32(0); // attr
|
|
401
|
+
w.u16(rowCnt);
|
|
402
|
+
w.u16(colCnt);
|
|
403
|
+
w.zeros(10); // bytes 8-17: cell spacing / zone info
|
|
404
|
+
for (const h of rowHwp) w.u16(h);
|
|
405
|
+
w.u16(bfId);
|
|
406
|
+
return w.build();
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function mkCellListHeader(
|
|
410
|
+
paraCount: number,
|
|
411
|
+
row: number, col: number,
|
|
412
|
+
rs: number, cs: number,
|
|
413
|
+
wHwp: number, hHwp: number,
|
|
414
|
+
bfId: number,
|
|
415
|
+
): Uint8Array {
|
|
416
|
+
// Scanner reads: col = readU16LE(d, 8), row = readU16LE(d, 10)
|
|
417
|
+
// (HWP 5.0 spec: offset 8 = colAddr, offset 10 = rowAddr)
|
|
418
|
+
return new BufWriter()
|
|
419
|
+
.u16(paraCount) // 0-1: paraCount
|
|
420
|
+
.u32(0) // 2-5: attr
|
|
421
|
+
.u16(0) // 6-7: unknown
|
|
422
|
+
.u16(col) // 8-9: colAddr ← col first!
|
|
423
|
+
.u16(row) // 10-11: rowAddr ← then row
|
|
424
|
+
.u16(rs) // 12-13: rowSpan
|
|
425
|
+
.u16(cs) // 14-15: colSpan
|
|
426
|
+
.u32(wHwp) // 16-19: width
|
|
427
|
+
.u32(hHwp) // 20-23: height
|
|
428
|
+
.zeros(8) // 24-31: padding[4]
|
|
429
|
+
.u16(bfId) // 32-33: borderFillId
|
|
430
|
+
.build(); // 34 bytes
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
const DEFAULT_ROW_HEIGHT_PT = 14; // reasonable row height
|
|
434
|
+
|
|
435
|
+
function encodeGrid(grid: GridNode, col: StyleCollector, lv: number): Uint8Array[] {
|
|
436
|
+
const records: Uint8Array[] = [];
|
|
437
|
+
const rowCnt = grid.kids.length;
|
|
438
|
+
const colCnt = Math.max(1, grid.kids[0]?.kids.length ?? 1);
|
|
439
|
+
|
|
440
|
+
// Column widths
|
|
441
|
+
const cwPt = grid.props.colWidths ?? [];
|
|
442
|
+
const totalPt = cwPt.reduce((s, w) => s + w, 0) || 453; // ~A4 content width
|
|
443
|
+
const defColPt = totalPt / colCnt;
|
|
444
|
+
|
|
445
|
+
const defStroke = grid.props.defaultStroke ?? col.DEF_STROKE;
|
|
446
|
+
const defBfId = col.addBorderFill(defStroke);
|
|
447
|
+
const rowHwp = Array.from({ length: rowCnt }, () => Metric.ptToHwp(DEFAULT_ROW_HEIGHT_PT));
|
|
448
|
+
|
|
449
|
+
records.push(mkRec(TAG_CTRL_HEADER, lv, mkTableCtrl()));
|
|
450
|
+
records.push(mkRec(TAG_TABLE_B, lv + 1, mkTableB(rowCnt, colCnt, rowHwp, defBfId)));
|
|
451
|
+
|
|
452
|
+
for (let r = 0; r < grid.kids.length; r++) {
|
|
453
|
+
for (let c = 0; c < grid.kids[r].kids.length; c++) {
|
|
454
|
+
const cell = grid.kids[r].kids[c];
|
|
455
|
+
const wHwp = Metric.ptToHwp(cwPt[c] ?? defColPt);
|
|
456
|
+
const hHwp = rowHwp[r];
|
|
457
|
+
const stroke = cell.props.top ?? defStroke;
|
|
458
|
+
const bfId = col.addBorderFill(stroke, cell.props.bg);
|
|
459
|
+
const paras = cell.kids.length > 0 ? cell.kids : [{ tag: 'para' as const, props: {}, kids: [] }];
|
|
460
|
+
|
|
461
|
+
records.push(mkRec(TAG_LIST_HEADER, lv + 1,
|
|
462
|
+
mkCellListHeader(paras.length, r, c, cell.rs, cell.cs, wHwp, hHwp, bfId)));
|
|
463
|
+
|
|
464
|
+
// Cell paragraphs are at same level as LIST_HEADER (lv+1);
|
|
465
|
+
// their children (PARA_TEXT, PARA_CHAR_SHAPE) go to lv+2.
|
|
466
|
+
for (const para of paras) records.push(...encodePara(para, col, lv + 1));
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
return records;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
function buildBodyTextStream(doc: DocRoot, col: StyleCollector): Uint8Array {
|
|
474
|
+
const chunks: Uint8Array[] = [];
|
|
475
|
+
const dims = doc.kids[0]?.dims ?? A4;
|
|
476
|
+
chunks.push(mkRec(TAG_PAGE_DEF, 0, mkPageDef(dims)));
|
|
477
|
+
|
|
478
|
+
for (const sheet of doc.kids) {
|
|
479
|
+
for (const node of sheet.kids) {
|
|
480
|
+
if (node.tag === 'para') {
|
|
481
|
+
for (const r of encodePara(node as ParaNode, col, 0)) chunks.push(r);
|
|
482
|
+
} else if (node.tag === 'grid') {
|
|
483
|
+
// In HWP, a table is embedded inside a "container paragraph" at level 0.
|
|
484
|
+
// CTRL_HEADER goes at level 1 (child of that paragraph).
|
|
485
|
+
// TABLE_B / LIST_HEADER / cell PARA_HEADERs go at level 2.
|
|
486
|
+
// Cell PARA_TEXT / PARA_CHAR_SHAPE go at level 3.
|
|
487
|
+
chunks.push(mkRec(TAG_PARA_HEADER, 0, mkParaHeader(0, 1)));
|
|
488
|
+
chunks.push(mkRec(TAG_PARA_TEXT, 1, mkParaText('')));
|
|
489
|
+
chunks.push(mkRec(TAG_PARA_CHAR_SHAPE, 1, mkParaCharShape([[0, 0]])));
|
|
490
|
+
for (const r of encodeGrid(node as GridNode, col, 1)) chunks.push(r);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
return concatU8(chunks);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
499
|
+
HWP FileHeader stream (256 bytes)
|
|
500
|
+
═══════════════════════════════════════════════════════════════ */
|
|
501
|
+
|
|
502
|
+
function buildHwpFileHeader(): Uint8Array {
|
|
503
|
+
const buf = new Uint8Array(256);
|
|
504
|
+
const sig = 'HWP Document File';
|
|
505
|
+
for (let i = 0; i < sig.length; i++) buf[i] = sig.charCodeAt(i);
|
|
506
|
+
const dv = new DataView(buf.buffer);
|
|
507
|
+
dv.setUint32(32, 0x05000300, true); // version 5.0.3.0
|
|
508
|
+
dv.setUint32(36, 0x00000001, true); // flags: bit 0 = compressed
|
|
509
|
+
return buf;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
513
|
+
OLE2 / CFB container builder
|
|
514
|
+
Structure:
|
|
515
|
+
OLE2 header (512 bytes, not a sector)
|
|
516
|
+
Sector 0..fatN-1 : FAT sectors
|
|
517
|
+
Sector fatN : Directory sector 1 (entries 0-3)
|
|
518
|
+
Sector fatN+1 : Directory sector 2 (entries 4-7)
|
|
519
|
+
Sector fatN+2 .. : FileHeader data
|
|
520
|
+
then DocInfo data, then Section0 data
|
|
521
|
+
═══════════════════════════════════════════════════════════════ */
|
|
522
|
+
|
|
523
|
+
function buildHwpOle2(
|
|
524
|
+
fileHeaderData: Uint8Array,
|
|
525
|
+
docInfoData: Uint8Array,
|
|
526
|
+
section0Data: Uint8Array,
|
|
527
|
+
): Uint8Array {
|
|
528
|
+
const SS = 512;
|
|
529
|
+
const ENDOFCHAIN = 0xFFFFFFFE;
|
|
530
|
+
const FREESECT = 0xFFFFFFFF;
|
|
531
|
+
const FATSECT = 0xFFFFFFFD;
|
|
532
|
+
|
|
533
|
+
function padSector(d: Uint8Array): Uint8Array {
|
|
534
|
+
const n = Math.ceil(Math.max(d.length, 1) / SS) * SS;
|
|
535
|
+
if (d.length === n) return d;
|
|
536
|
+
const out = new Uint8Array(n);
|
|
537
|
+
out.set(d);
|
|
538
|
+
return out;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
const fhPad = padSector(fileHeaderData);
|
|
542
|
+
const diPad = padSector(docInfoData);
|
|
543
|
+
const s0Pad = padSector(section0Data);
|
|
544
|
+
const fhN = fhPad.length / SS;
|
|
545
|
+
const diN = diPad.length / SS;
|
|
546
|
+
const s0N = s0Pad.length / SS;
|
|
547
|
+
const dirN = 2; // always 2 dir sectors (holds 8 dir entries)
|
|
548
|
+
|
|
549
|
+
// Compute FAT sector count iteratively
|
|
550
|
+
let fatN = 1;
|
|
551
|
+
for (let iter = 0; iter < 10; iter++) {
|
|
552
|
+
const total = fatN + dirN + fhN + diN + s0N;
|
|
553
|
+
const needed = Math.ceil(total / 128);
|
|
554
|
+
if (needed <= fatN) break;
|
|
555
|
+
fatN = needed;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// Assign sector indices
|
|
559
|
+
const dir1Sec = fatN;
|
|
560
|
+
const dir2Sec = fatN + 1;
|
|
561
|
+
const fhSec = fatN + dirN;
|
|
562
|
+
const diSec = fhSec + fhN;
|
|
563
|
+
const s0Sec = diSec + diN;
|
|
564
|
+
const totalSec = s0Sec + s0N;
|
|
565
|
+
|
|
566
|
+
// Build FAT (fatN × 128 entries × 4 bytes = fatN × 512 bytes)
|
|
567
|
+
const fatBuf = new Uint8Array(fatN * SS).fill(0xFF); // FREESECT
|
|
568
|
+
const setFat = (i: number, v: number) => {
|
|
569
|
+
fatBuf[i * 4] = v & 0xFF;
|
|
570
|
+
fatBuf[i * 4 + 1] = (v >>> 8) & 0xFF;
|
|
571
|
+
fatBuf[i * 4 + 2] = (v >>> 16) & 0xFF;
|
|
572
|
+
fatBuf[i * 4 + 3] = (v >>> 24) & 0xFF;
|
|
573
|
+
};
|
|
574
|
+
|
|
575
|
+
for (let i = 0; i < fatN; i++) setFat(i, FATSECT);
|
|
576
|
+
setFat(dir1Sec, dir2Sec);
|
|
577
|
+
setFat(dir2Sec, ENDOFCHAIN);
|
|
578
|
+
for (let i = 0; i < fhN; i++) setFat(fhSec + i, i + 1 < fhN ? fhSec + i + 1 : ENDOFCHAIN);
|
|
579
|
+
for (let i = 0; i < diN; i++) setFat(diSec + i, i + 1 < diN ? diSec + i + 1 : ENDOFCHAIN);
|
|
580
|
+
for (let i = 0; i < s0N; i++) setFat(s0Sec + i, i + 1 < s0N ? s0Sec + i + 1 : ENDOFCHAIN);
|
|
581
|
+
|
|
582
|
+
// Build directory (8 entries × 128 bytes = dirN × SS)
|
|
583
|
+
const dirBuf = new Uint8Array(dirN * SS);
|
|
584
|
+
const dv = new DataView(dirBuf.buffer);
|
|
585
|
+
|
|
586
|
+
function writeDirEntry(
|
|
587
|
+
idx: number, name: string, type: number,
|
|
588
|
+
left: number, right: number, child: number,
|
|
589
|
+
startSec: number, size: number,
|
|
590
|
+
) {
|
|
591
|
+
const base = idx * 128;
|
|
592
|
+
const nl = Math.min(name.length, 31);
|
|
593
|
+
for (let i = 0; i < nl; i++) dv.setUint16(base + i * 2, name.charCodeAt(i), true);
|
|
594
|
+
dv.setUint16(base + 64, (nl + 1) * 2, true); // name size (incl. null)
|
|
595
|
+
dirBuf[base + 66] = type;
|
|
596
|
+
dirBuf[base + 67] = 1; // color = black
|
|
597
|
+
dv.setInt32(base + 68, left, true); // left sibling
|
|
598
|
+
dv.setInt32(base + 72, right, true); // right sibling
|
|
599
|
+
dv.setInt32(base + 76, child, true); // child
|
|
600
|
+
dv.setUint32(base + 116, startSec >>> 0, true);
|
|
601
|
+
dv.setUint32(base + 120, size >>> 0, true);
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// Use right-skewed sibling chain (no left siblings) to avoid cycles in CFB parsers.
|
|
605
|
+
// Root.child → FileHeader → DocInfo → BodyText (via sibRight).
|
|
606
|
+
// BodyText.child → Section0.
|
|
607
|
+
writeDirEntry(0, 'Root Entry', 5, -1, -1, 1, ENDOFCHAIN, 0);
|
|
608
|
+
writeDirEntry(1, 'FileHeader', 2, -1, 2, -1, fhSec, fileHeaderData.length);
|
|
609
|
+
writeDirEntry(2, 'DocInfo', 2, -1, 3, -1, diSec, docInfoData.length);
|
|
610
|
+
writeDirEntry(3, 'BodyText', 1, -1, -1, 4, ENDOFCHAIN, 0);
|
|
611
|
+
writeDirEntry(4, 'Section0', 2, -1, -1, -1, s0Sec, section0Data.length);
|
|
612
|
+
// Entries 5-7: type=0 (empty), everything else zeroed
|
|
613
|
+
|
|
614
|
+
// Build OLE2 file header (512 bytes)
|
|
615
|
+
const hdr = new Uint8Array(SS);
|
|
616
|
+
const hdv = new DataView(hdr.buffer);
|
|
617
|
+
const MAGIC = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
|
|
618
|
+
MAGIC.forEach((b, i) => { hdr[i] = b; });
|
|
619
|
+
hdv.setUint16(24, 0x003E, true); // minor version
|
|
620
|
+
hdv.setUint16(26, 0x0003, true); // major version
|
|
621
|
+
hdv.setUint16(28, 0xFFFE, true); // byte order (LE)
|
|
622
|
+
hdv.setUint16(30, 9, true); // sector size = 2^9 = 512
|
|
623
|
+
hdv.setUint16(32, 6, true); // mini sector size = 2^6 = 64
|
|
624
|
+
// OLE2 v3 header field layout (see ECMA-376 or MS-CFB spec):
|
|
625
|
+
// 40-43: num dir sectors (must be 0 for v3)
|
|
626
|
+
// 44-47: num FAT sectors
|
|
627
|
+
// 48-51: first dir sector
|
|
628
|
+
// 52-55: transaction sig (0)
|
|
629
|
+
// 56-59: mini stream cutoff (4096)
|
|
630
|
+
// 60-63: first mini FAT (ENDOFCHAIN if none)
|
|
631
|
+
// 64-67: num mini FAT (0)
|
|
632
|
+
// 68-71: first DIFAT ext (ENDOFCHAIN if none)
|
|
633
|
+
// 72-75: num DIFAT ext (0)
|
|
634
|
+
hdv.setUint32(40, 0, true); // num dir sectors (0 for v3)
|
|
635
|
+
hdv.setUint32(44, fatN, true); // num FAT sectors
|
|
636
|
+
hdv.setUint32(48, dir1Sec, true); // first directory sector
|
|
637
|
+
hdv.setUint32(52, 0, true); // transaction signature (0)
|
|
638
|
+
hdv.setUint32(56, 0x1000, true); // mini stream cutoff = 4096
|
|
639
|
+
hdv.setUint32(60, ENDOFCHAIN, true); // first mini FAT sector (none)
|
|
640
|
+
hdv.setUint32(64, 0, true); // num mini FAT sectors (0)
|
|
641
|
+
hdv.setUint32(68, ENDOFCHAIN, true); // first DIFAT extension (none)
|
|
642
|
+
hdv.setUint32(72, 0, true); // num DIFAT extensions (0)
|
|
643
|
+
// DIFAT[0..108]: first fatN entries = FAT sector numbers
|
|
644
|
+
for (let i = 0; i < 109; i++) {
|
|
645
|
+
hdv.setUint32(76 + i * 4, i < fatN ? i : FREESECT, true);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Assemble output
|
|
649
|
+
const out = new Uint8Array(SS + totalSec * SS);
|
|
650
|
+
out.set(hdr, 0);
|
|
651
|
+
// FAT sectors
|
|
652
|
+
for (let i = 0; i < fatN; i++) {
|
|
653
|
+
out.set(fatBuf.subarray(i * SS, (i + 1) * SS), SS + i * SS);
|
|
654
|
+
}
|
|
655
|
+
// Directory sectors
|
|
656
|
+
out.set(dirBuf.subarray(0, SS), SS + dir1Sec * SS);
|
|
657
|
+
out.set(dirBuf.subarray(SS, 2*SS), SS + dir2Sec * SS);
|
|
658
|
+
// Stream data
|
|
659
|
+
out.set(fhPad, SS + fhSec * SS);
|
|
660
|
+
out.set(diPad, SS + diSec * SS);
|
|
661
|
+
out.set(s0Pad, SS + s0Sec * SS);
|
|
662
|
+
return out;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
666
|
+
Utility
|
|
667
|
+
═══════════════════════════════════════════════════════════════ */
|
|
668
|
+
|
|
669
|
+
function concatU8(arrays: Uint8Array[]): Uint8Array {
|
|
670
|
+
const total = arrays.reduce((s, a) => s + a.length, 0);
|
|
671
|
+
const out = new Uint8Array(total);
|
|
672
|
+
let off = 0;
|
|
673
|
+
for (const a of arrays) { out.set(a, off); off += a.length; }
|
|
674
|
+
return out;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
678
|
+
Encoder entry point
|
|
679
|
+
═══════════════════════════════════════════════════════════════ */
|
|
680
|
+
|
|
681
|
+
export class HwpEncoder implements Encoder {
|
|
682
|
+
readonly format = 'hwp';
|
|
683
|
+
|
|
684
|
+
async encode(doc: DocRoot): Promise<Outcome<Uint8Array>> {
|
|
685
|
+
try {
|
|
686
|
+
// First pass: collect unique styles
|
|
687
|
+
const col = new StyleCollector();
|
|
688
|
+
for (const sheet of doc.kids) {
|
|
689
|
+
for (const node of sheet.kids) collectNode(node, col);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// Build streams
|
|
693
|
+
const docInfoRaw = buildDocInfoStream(col);
|
|
694
|
+
const bodyRaw = buildBodyTextStream(doc, col);
|
|
695
|
+
|
|
696
|
+
// Compress (HWP flags bit 0 = compressed)
|
|
697
|
+
const docInfoCmp = pako.deflateRaw(docInfoRaw);
|
|
698
|
+
const bodyCmp = pako.deflateRaw(bodyRaw);
|
|
699
|
+
|
|
700
|
+
// Assemble OLE2 file
|
|
701
|
+
const fileHdr = buildHwpFileHeader();
|
|
702
|
+
const hwp = buildHwpOle2(fileHdr, docInfoCmp, bodyCmp);
|
|
703
|
+
|
|
704
|
+
return succeed(hwp);
|
|
705
|
+
} catch (e: any) {
|
|
706
|
+
return fail(`HwpEncoder: ${e instanceof Error ? e.message : String(e)}`);
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
registry.registerEncoder(new HwpEncoder());
|