hwpkit-dev 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,711 @@
1
+ /**
2
+ * HWP 5.0 encoder — DocRoot → HWP binary (OLE2/CFB container)
3
+ *
4
+ * OLE2 layout:
5
+ * FileHeader (stream) — 256-byte HWP signature + flags
6
+ * DocInfo (stream) — compressed FACE_NAME / CHAR_SHAPE / PARA_SHAPE records
7
+ * BodyText (storage)
8
+ * Section0 (stream) — compressed PAGE_DEF + paragraph/table records
9
+ */
10
+
11
+ import type { Encoder } from '../../contract/encoder';
12
+ import type { DocRoot, ContentNode, ParaNode, SpanNode, GridNode } from '../../model/doc-tree';
13
+ import type { Outcome } from '../../contract/result';
14
+ import type { TextProps, ParaProps, Stroke, PageDims } from '../../model/doc-props';
15
+ import { succeed, fail } from '../../contract/result';
16
+ import { Metric } from '../../safety/StyleBridge';
17
+ import { registry } from '../../pipeline/registry';
18
+ import { A4 } from '../../model/doc-props';
19
+ import pako from 'pako';
20
+
21
+ /* ═══════════════════════════════════════════════════════════════
22
+ HWP 5.0 tag IDs
23
+ ═══════════════════════════════════════════════════════════════ */
24
+
25
+ const T = 16; // HWPTAG_BEGIN
26
+ const TAG_ID_MAPPINGS = T + 8; // 24
27
+ const TAG_FACE_NAME = T + 3; // 19
28
+ const TAG_BORDER_FILL = T + 4; // 20
29
+ const TAG_CHAR_SHAPE = T + 5; // 21
30
+ const TAG_PARA_SHAPE = T + 9; // 25
31
+ const TAG_PARA_HEADER = T + 50; // 66
32
+ const TAG_PARA_TEXT = T + 51; // 67
33
+ const TAG_PARA_CHAR_SHAPE = T + 52; // 68
34
+ const TAG_CTRL_HEADER = T + 55; // 71
35
+ const TAG_LIST_HEADER = T + 56; // 72
36
+ const TAG_PAGE_DEF = T + 57; // 73
37
+ const TAG_TABLE_B = T + 64; // 80
38
+
39
+ const CTRL_TABLE = 0x74626C20; // ' lbt' as LE uint32
40
+
41
+ /** Border width index table (points) — matches BORDER_W_PT in HwpScanner */
42
+ const BORDER_W_PT = [
43
+ 0.28, 0.34, 0.43, 0.57, 0.71, 0.85,
44
+ 1.13, 1.42, 1.70, 1.98, 2.84, 4.25,
45
+ 5.67, 8.50, 11.34, 14.17,
46
+ ];
47
+
48
+ const BORDER_KIND_IDX: Record<string, number> = {
49
+ none: 0, solid: 1, dash: 2, dot: 3, double: 8,
50
+ };
51
+ const ALIGN_CODE: Record<string, number> = {
52
+ justify: 0, left: 1, right: 2, center: 3,
53
+ };
54
+
55
+ /* ═══════════════════════════════════════════════════════════════
56
+ Binary buffer writer
57
+ ═══════════════════════════════════════════════════════════════ */
58
+
59
+ class BufWriter {
60
+ private chunks: Uint8Array[] = [];
61
+ private _sz = 0;
62
+
63
+ get size() { return this._sz; }
64
+
65
+ u8(v: number): this {
66
+ this.chunks.push(new Uint8Array([v & 0xFF]));
67
+ this._sz++;
68
+ return this;
69
+ }
70
+
71
+ u16(v: number): this {
72
+ this.chunks.push(new Uint8Array([v & 0xFF, (v >> 8) & 0xFF]));
73
+ this._sz += 2;
74
+ return this;
75
+ }
76
+
77
+ u32(v: number): this {
78
+ const b = new Uint8Array(4);
79
+ b[0] = v & 0xFF;
80
+ b[1] = (v >>> 8) & 0xFF;
81
+ b[2] = (v >>> 16) & 0xFF;
82
+ b[3] = (v >>> 24) & 0xFF;
83
+ this.chunks.push(b);
84
+ this._sz += 4;
85
+ return this;
86
+ }
87
+
88
+ i32(v: number): this { return this.u32(v < 0 ? v + 0x100000000 : v); }
89
+
90
+ bytes(d: Uint8Array): this { this.chunks.push(d); this._sz += d.length; return this; }
91
+ zeros(n: number): this { this.chunks.push(new Uint8Array(n)); this._sz += n; return this; }
92
+
93
+ /** Write each char as UTF-16LE UINT16 (BMP only) */
94
+ utf16(s: string): this {
95
+ for (let i = 0; i < s.length; i++) this.u16(s.charCodeAt(i));
96
+ return this;
97
+ }
98
+
99
+ /** Write 4-byte COLORREF (R, G, B, 0) from 6-hex string */
100
+ colorRef(hex: string): this {
101
+ const h = (hex || '000000').replace('#', '').padStart(6, '0');
102
+ return this
103
+ .u8(parseInt(h.slice(0, 2), 16))
104
+ .u8(parseInt(h.slice(2, 4), 16))
105
+ .u8(parseInt(h.slice(4, 6), 16))
106
+ .u8(0);
107
+ }
108
+
109
+ build(): Uint8Array {
110
+ const out = new Uint8Array(this._sz);
111
+ let off = 0;
112
+ for (const c of this.chunks) { out.set(c, off); off += c.length; }
113
+ return out;
114
+ }
115
+ }
116
+
117
+ /* ═══════════════════════════════════════════════════════════════
118
+ HWP record builder
119
+ Format: 32-bit header = size(12)|level(10)|tag(10)
120
+ If size >= 0xFFF, append UINT32 with actual size.
121
+ ═══════════════════════════════════════════════════════════════ */
122
+
123
+ function mkRec(tag: number, level: number, data: Uint8Array): Uint8Array {
124
+ const sz = data.length;
125
+ const enc = Math.min(sz, 0xFFF);
126
+ const hdr = (enc << 20) | ((level & 0x3FF) << 10) | (tag & 0x3FF);
127
+ const w = new BufWriter().u32(hdr);
128
+ if (enc >= 0xFFF) w.u32(sz);
129
+ w.bytes(data);
130
+ return w.build();
131
+ }
132
+
133
+ /* ═══════════════════════════════════════════════════════════════
134
+ Style collector (first pass — deduplicates fonts/shapes)
135
+ ═══════════════════════════════════════════════════════════════ */
136
+
137
+ function csKey(p: TextProps): string {
138
+ return [p.font ?? '', p.pt ?? 10, p.b ? 1 : 0, p.i ? 1 : 0, p.u ? 1 : 0,
139
+ p.s ? 1 : 0, p.sup ? 1 : 0, p.sub ? 1 : 0, p.color ?? '000000'].join('|');
140
+ }
141
+ function psKey(p: ParaProps): string {
142
+ return [p.align ?? 'left', p.indentPt ?? 0, p.spaceBefore ?? 0,
143
+ p.spaceAfter ?? 0, p.lineHeight ?? 1].join('|');
144
+ }
145
+ function bfKey(s: Stroke, bg?: string): string {
146
+ return `${s.kind}|${s.pt}|${s.color}|${bg ?? ''}`;
147
+ }
148
+
149
+ class StyleCollector {
150
+ readonly DEF_STROKE: Stroke = { kind: 'solid', pt: 0.5, color: '000000' };
151
+
152
+ fonts: string[] = ['Malgun Gothic'];
153
+ private fontIdx = new Map<string, number>([['Malgun Gothic', 0]]);
154
+
155
+ csProps: TextProps[] = [{}];
156
+ private csIdx = new Map<string, number>([[csKey({}), 0]]);
157
+
158
+ psProps: ParaProps[] = [{}];
159
+ private psIdx = new Map<string, number>([[psKey({}), 0]]);
160
+
161
+ bfData: { s: Stroke; bg?: string }[] = [];
162
+ private bfIdx = new Map<string, number>();
163
+
164
+ constructor() {
165
+ this.addBorderFill(this.DEF_STROKE); // bfId=1
166
+ }
167
+
168
+ font(name: string): number {
169
+ const n = name || 'Malgun Gothic';
170
+ if (this.fontIdx.has(n)) return this.fontIdx.get(n)!;
171
+ const id = this.fonts.length;
172
+ this.fonts.push(n);
173
+ this.fontIdx.set(n, id);
174
+ return id;
175
+ }
176
+
177
+ addCharShape(p: TextProps): number {
178
+ const k = csKey(p);
179
+ if (this.csIdx.has(k)) return this.csIdx.get(k)!;
180
+ const id = this.csProps.length;
181
+ this.csProps.push(p);
182
+ this.csIdx.set(k, id);
183
+ if (p.font) this.font(p.font);
184
+ return id;
185
+ }
186
+
187
+ addParaShape(p: ParaProps): number {
188
+ const k = psKey(p);
189
+ if (this.psIdx.has(k)) return this.psIdx.get(k)!;
190
+ const id = this.psProps.length;
191
+ this.psProps.push(p);
192
+ this.psIdx.set(k, id);
193
+ return id;
194
+ }
195
+
196
+ /** Returns 1-based border fill ID (HWP uses 1-based IDs for border fills) */
197
+ addBorderFill(s: Stroke, bg?: string): number {
198
+ const k = bfKey(s, bg);
199
+ if (this.bfIdx.has(k)) return this.bfIdx.get(k)!;
200
+ const id = this.bfData.length + 1;
201
+ this.bfData.push({ s, bg });
202
+ this.bfIdx.set(k, id);
203
+ return id;
204
+ }
205
+ }
206
+
207
+ function collectNode(node: ContentNode, col: StyleCollector): void {
208
+ if (node.tag === 'para') {
209
+ col.addParaShape(node.props);
210
+ for (const kid of node.kids) {
211
+ if (kid.tag === 'span') col.addCharShape((kid as SpanNode).props);
212
+ }
213
+ } else if (node.tag === 'grid') {
214
+ if (node.props.defaultStroke) col.addBorderFill(node.props.defaultStroke);
215
+ for (const row of node.kids) {
216
+ for (const cell of row.kids) {
217
+ col.addBorderFill(cell.props.top ?? node.props.defaultStroke ?? col.DEF_STROKE, cell.props.bg);
218
+ for (const para of cell.kids) collectNode(para, col);
219
+ }
220
+ }
221
+ }
222
+ }
223
+
224
+ /* ═══════════════════════════════════════════════════════════════
225
+ DocInfo record builders
226
+ ═══════════════════════════════════════════════════════════════ */
227
+
228
+ function mkIdMappings(col: StyleCollector): Uint8Array {
229
+ return new BufWriter()
230
+ .u32(col.fonts.length)
231
+ .u32(col.bfData.length)
232
+ .u32(col.csProps.length)
233
+ .u32(0) // tabDef count
234
+ .u32(0) // numbering count
235
+ .u32(0) // bullet count
236
+ .u32(col.psProps.length)
237
+ .u32(0) // style count
238
+ .build();
239
+ }
240
+
241
+ function mkFaceName(name: string): Uint8Array {
242
+ return new BufWriter()
243
+ .u8(0) // substType
244
+ .u16(name.length)
245
+ .utf16(name)
246
+ .build();
247
+ }
248
+
249
+ function borderWidthIdx(pt: number): number {
250
+ let best = 0;
251
+ for (let i = 0; i < BORDER_W_PT.length; i++) {
252
+ if (Math.abs(BORDER_W_PT[i] - pt) < Math.abs(BORDER_W_PT[best] - pt)) best = i;
253
+ }
254
+ return best;
255
+ }
256
+
257
+ function mkBorderFill(s: Stroke, bg?: string): Uint8Array {
258
+ const w = new BufWriter();
259
+ w.u16(0); // attr
260
+ const t = BORDER_KIND_IDX[s.kind] ?? 1;
261
+ const wi = borderWidthIdx(s.pt);
262
+ // 5 borders: left, right, top, bottom, diagonal
263
+ for (let i = 0; i < 5; i++) w.u8(t).u8(wi).colorRef(s.color || '000000');
264
+ // fill: type(4) + faceColor(4) + reserved(4)
265
+ if (bg) { w.u32(1).colorRef(bg).u32(0); }
266
+ else { w.u32(0).u32(0).u32(0); }
267
+ return w.build(); // 40 bytes
268
+ }
269
+
270
+ function mkCharShape(p: TextProps, col: StyleCollector): Uint8Array {
271
+ const fontId = p.font ? col.font(p.font) : 0;
272
+ const w = new BufWriter();
273
+ for (let i = 0; i < 7; i++) w.u16(fontId); // faceId[7]
274
+ for (let i = 0; i < 7; i++) w.u8(100); // ratio[7]
275
+ for (let i = 0; i < 7; i++) w.u8(0); // spacing[7]
276
+ for (let i = 0; i < 7; i++) w.u8(100); // relSize[7]
277
+ for (let i = 0; i < 7; i++) w.u8(0); // offset[7]
278
+ // height @ offset 42 (HWPUNIT: pt × 100)
279
+ w.u32(Math.round((p.pt ?? 10) * 100));
280
+ // attr @ offset 46
281
+ let attr = 0;
282
+ if (p.i) attr |= 1; // italic = bit 0
283
+ if (p.b) attr |= 2; // bold = bit 1
284
+ if (p.u) attr |= (1 << 2); // ulType = bits 2-4, set to 1
285
+ if (p.s) attr |= (1 << 18); // skType = bits 18-20, set to 1
286
+ if (p.sup) attr |= (1 << 16); // suType = bits 16-17, value 1
287
+ if (p.sub) attr |= (2 << 16); // suType = bits 16-17, value 2
288
+ w.u32(attr);
289
+ w.u8(0).u8(0); // shadowX, shadowY @ 50-51
290
+ w.colorRef(p.color ?? '000000'); // textColor @ 52 (4 bytes)
291
+ return w.build(); // 56 bytes
292
+ }
293
+
294
+ function mkParaShape(p: ParaProps): Uint8Array {
295
+ return new BufWriter()
296
+ .u32(ALIGN_CODE[p.align ?? 'left'] ?? 1) // attr (bits 0-2 = align)
297
+ .i32(Metric.ptToHwp(p.indentPt ?? 0)) // leftMargin
298
+ .i32(0) // rightMargin
299
+ .i32(0) // indent (first-line)
300
+ .i32(Metric.ptToHwp(p.spaceBefore ?? 0))
301
+ .i32(Metric.ptToHwp(p.spaceAfter ?? 0))
302
+ .i32(p.lineHeight ? Math.round(p.lineHeight * 100) : 160) // lineSpacing
303
+ .build(); // 28 bytes
304
+ }
305
+
306
+ function buildDocInfoStream(col: StyleCollector): Uint8Array {
307
+ const chunks: Uint8Array[] = [
308
+ mkRec(TAG_ID_MAPPINGS, 0, mkIdMappings(col)),
309
+ ...col.fonts.map(n => mkRec(TAG_FACE_NAME, 0, mkFaceName(n))),
310
+ ...col.bfData.map(({ s, bg }) => mkRec(TAG_BORDER_FILL, 0, mkBorderFill(s, bg))),
311
+ ...col.csProps.map(p => mkRec(TAG_CHAR_SHAPE, 0, mkCharShape(p, col))),
312
+ ...col.psProps.map(p => mkRec(TAG_PARA_SHAPE, 0, mkParaShape(p))),
313
+ ];
314
+ return concatU8(chunks);
315
+ }
316
+
317
+ /* ═══════════════════════════════════════════════════════════════
318
+ BodyText record builders
319
+ ═══════════════════════════════════════════════════════════════ */
320
+
321
+ function mkPageDef(dims: PageDims): Uint8Array {
322
+ return new BufWriter()
323
+ .u32(Metric.ptToHwp(dims.wPt))
324
+ .u32(Metric.ptToHwp(dims.hPt))
325
+ .u32(Metric.ptToHwp(dims.ml))
326
+ .u32(Metric.ptToHwp(dims.mr))
327
+ .u32(Metric.ptToHwp(dims.mt))
328
+ .u32(Metric.ptToHwp(dims.mb))
329
+ .zeros(12) // header/footer/gutter margins (3 × INT32)
330
+ .u32(dims.orient === 'landscape' ? 1 : 0) // attr @ offset 36
331
+ .build(); // 40 bytes
332
+ }
333
+
334
+ function mkParaHeader(psId: number, csCount: number): Uint8Array {
335
+ return new BufWriter()
336
+ .u32(0) // paragraphControlMask
337
+ .u16(0) // styleId
338
+ .u8(0) // divideAttr
339
+ .u8(0)
340
+ .u16(psId) // paraShapeId @ offset 8
341
+ .u16(csCount) // charShapeCount @ offset 10
342
+ .u16(0) // rangeTagCount
343
+ .u16(0) // memoCount
344
+ .i32(0) // paraChangeId
345
+ .build(); // 20 bytes
346
+ }
347
+
348
+ function mkParaText(text: string): Uint8Array {
349
+ const w = new BufWriter();
350
+ for (let i = 0; i < text.length; i++) {
351
+ const c = text.charCodeAt(i);
352
+ w.u16(c < 32 ? 0 : c); // replace control chars
353
+ }
354
+ w.u16(13); // paragraph terminator (0x000D)
355
+ return w.build();
356
+ }
357
+
358
+ function mkParaCharShape(pairs: [pos: number, id: number][]): Uint8Array {
359
+ const w = new BufWriter();
360
+ for (const [pos, id] of pairs) w.u32(pos).u32(id);
361
+ return w.build();
362
+ }
363
+
364
+ function encodePara(para: ParaNode, col: StyleCollector, lv: number): Uint8Array[] {
365
+ let text = '';
366
+ const csPairs: [number, number][] = [];
367
+ let pos = 0;
368
+
369
+ for (const kid of para.kids) {
370
+ if (kid.tag !== 'span') continue;
371
+ const span = kid as SpanNode;
372
+ const csId = col.addCharShape(span.props);
373
+ // Only add a new pair when shape changes
374
+ if (csPairs.length === 0 || csPairs[csPairs.length - 1][1] !== csId) {
375
+ csPairs.push([pos, csId]);
376
+ }
377
+ for (const t of span.kids) {
378
+ if (t.tag === 'txt') { text += t.content; pos += t.content.length; }
379
+ }
380
+ }
381
+
382
+ if (csPairs.length === 0) csPairs.push([0, 0]);
383
+
384
+ const psId = col.addParaShape(para.props);
385
+ return [
386
+ mkRec(TAG_PARA_HEADER, lv, mkParaHeader(psId, csPairs.length)),
387
+ mkRec(TAG_PARA_TEXT, lv + 1, mkParaText(text)),
388
+ mkRec(TAG_PARA_CHAR_SHAPE, lv + 1, mkParaCharShape(csPairs)),
389
+ ];
390
+ }
391
+
392
+ /* ── Table encoding ─────────────────────────────────────────── */
393
+
394
+ function mkTableCtrl(): Uint8Array {
395
+ return new BufWriter().u32(CTRL_TABLE).zeros(12).build();
396
+ }
397
+
398
+ function mkTableB(rowCnt: number, colCnt: number, rowHwp: number[], bfId: number): Uint8Array {
399
+ const w = new BufWriter();
400
+ w.u32(0); // attr
401
+ w.u16(rowCnt);
402
+ w.u16(colCnt);
403
+ w.zeros(10); // bytes 8-17: cell spacing / zone info
404
+ for (const h of rowHwp) w.u16(h);
405
+ w.u16(bfId);
406
+ return w.build();
407
+ }
408
+
409
+ function mkCellListHeader(
410
+ paraCount: number,
411
+ row: number, col: number,
412
+ rs: number, cs: number,
413
+ wHwp: number, hHwp: number,
414
+ bfId: number,
415
+ ): Uint8Array {
416
+ // Scanner reads: col = readU16LE(d, 8), row = readU16LE(d, 10)
417
+ // (HWP 5.0 spec: offset 8 = colAddr, offset 10 = rowAddr)
418
+ return new BufWriter()
419
+ .u16(paraCount) // 0-1: paraCount
420
+ .u32(0) // 2-5: attr
421
+ .u16(0) // 6-7: unknown
422
+ .u16(col) // 8-9: colAddr ← col first!
423
+ .u16(row) // 10-11: rowAddr ← then row
424
+ .u16(rs) // 12-13: rowSpan
425
+ .u16(cs) // 14-15: colSpan
426
+ .u32(wHwp) // 16-19: width
427
+ .u32(hHwp) // 20-23: height
428
+ .zeros(8) // 24-31: padding[4]
429
+ .u16(bfId) // 32-33: borderFillId
430
+ .build(); // 34 bytes
431
+ }
432
+
433
+ const DEFAULT_ROW_HEIGHT_PT = 14; // reasonable row height
434
+
435
+ function encodeGrid(grid: GridNode, col: StyleCollector, lv: number): Uint8Array[] {
436
+ const records: Uint8Array[] = [];
437
+ const rowCnt = grid.kids.length;
438
+ const colCnt = Math.max(1, grid.kids[0]?.kids.length ?? 1);
439
+
440
+ // Column widths
441
+ const cwPt = grid.props.colWidths ?? [];
442
+ const totalPt = cwPt.reduce((s, w) => s + w, 0) || 453; // ~A4 content width
443
+ const defColPt = totalPt / colCnt;
444
+
445
+ const defStroke = grid.props.defaultStroke ?? col.DEF_STROKE;
446
+ const defBfId = col.addBorderFill(defStroke);
447
+ const rowHwp = Array.from({ length: rowCnt }, () => Metric.ptToHwp(DEFAULT_ROW_HEIGHT_PT));
448
+
449
+ records.push(mkRec(TAG_CTRL_HEADER, lv, mkTableCtrl()));
450
+ records.push(mkRec(TAG_TABLE_B, lv + 1, mkTableB(rowCnt, colCnt, rowHwp, defBfId)));
451
+
452
+ for (let r = 0; r < grid.kids.length; r++) {
453
+ for (let c = 0; c < grid.kids[r].kids.length; c++) {
454
+ const cell = grid.kids[r].kids[c];
455
+ const wHwp = Metric.ptToHwp(cwPt[c] ?? defColPt);
456
+ const hHwp = rowHwp[r];
457
+ const stroke = cell.props.top ?? defStroke;
458
+ const bfId = col.addBorderFill(stroke, cell.props.bg);
459
+ const paras = cell.kids.length > 0 ? cell.kids : [{ tag: 'para' as const, props: {}, kids: [] }];
460
+
461
+ records.push(mkRec(TAG_LIST_HEADER, lv + 1,
462
+ mkCellListHeader(paras.length, r, c, cell.rs, cell.cs, wHwp, hHwp, bfId)));
463
+
464
+ // Cell paragraphs are at same level as LIST_HEADER (lv+1);
465
+ // their children (PARA_TEXT, PARA_CHAR_SHAPE) go to lv+2.
466
+ for (const para of paras) records.push(...encodePara(para, col, lv + 1));
467
+ }
468
+ }
469
+
470
+ return records;
471
+ }
472
+
473
+ function buildBodyTextStream(doc: DocRoot, col: StyleCollector): Uint8Array {
474
+ const chunks: Uint8Array[] = [];
475
+ const dims = doc.kids[0]?.dims ?? A4;
476
+ chunks.push(mkRec(TAG_PAGE_DEF, 0, mkPageDef(dims)));
477
+
478
+ for (const sheet of doc.kids) {
479
+ for (const node of sheet.kids) {
480
+ if (node.tag === 'para') {
481
+ for (const r of encodePara(node as ParaNode, col, 0)) chunks.push(r);
482
+ } else if (node.tag === 'grid') {
483
+ // In HWP, a table is embedded inside a "container paragraph" at level 0.
484
+ // CTRL_HEADER goes at level 1 (child of that paragraph).
485
+ // TABLE_B / LIST_HEADER / cell PARA_HEADERs go at level 2.
486
+ // Cell PARA_TEXT / PARA_CHAR_SHAPE go at level 3.
487
+ chunks.push(mkRec(TAG_PARA_HEADER, 0, mkParaHeader(0, 1)));
488
+ chunks.push(mkRec(TAG_PARA_TEXT, 1, mkParaText('')));
489
+ chunks.push(mkRec(TAG_PARA_CHAR_SHAPE, 1, mkParaCharShape([[0, 0]])));
490
+ for (const r of encodeGrid(node as GridNode, col, 1)) chunks.push(r);
491
+ }
492
+ }
493
+ }
494
+
495
+ return concatU8(chunks);
496
+ }
497
+
498
+ /* ═══════════════════════════════════════════════════════════════
499
+ HWP FileHeader stream (256 bytes)
500
+ ═══════════════════════════════════════════════════════════════ */
501
+
502
+ function buildHwpFileHeader(): Uint8Array {
503
+ const buf = new Uint8Array(256);
504
+ const sig = 'HWP Document File';
505
+ for (let i = 0; i < sig.length; i++) buf[i] = sig.charCodeAt(i);
506
+ const dv = new DataView(buf.buffer);
507
+ dv.setUint32(32, 0x05000300, true); // version 5.0.3.0
508
+ dv.setUint32(36, 0x00000001, true); // flags: bit 0 = compressed
509
+ return buf;
510
+ }
511
+
512
+ /* ═══════════════════════════════════════════════════════════════
513
+ OLE2 / CFB container builder
514
+ Structure:
515
+ OLE2 header (512 bytes, not a sector)
516
+ Sector 0..fatN-1 : FAT sectors
517
+ Sector fatN : Directory sector 1 (entries 0-3)
518
+ Sector fatN+1 : Directory sector 2 (entries 4-7)
519
+ Sector fatN+2 .. : FileHeader data
520
+ then DocInfo data, then Section0 data
521
+ ═══════════════════════════════════════════════════════════════ */
522
+
523
+ function buildHwpOle2(
524
+ fileHeaderData: Uint8Array,
525
+ docInfoData: Uint8Array,
526
+ section0Data: Uint8Array,
527
+ ): Uint8Array {
528
+ const SS = 512;
529
+ const ENDOFCHAIN = 0xFFFFFFFE;
530
+ const FREESECT = 0xFFFFFFFF;
531
+ const FATSECT = 0xFFFFFFFD;
532
+
533
+ function padSector(d: Uint8Array): Uint8Array {
534
+ const n = Math.ceil(Math.max(d.length, 1) / SS) * SS;
535
+ if (d.length === n) return d;
536
+ const out = new Uint8Array(n);
537
+ out.set(d);
538
+ return out;
539
+ }
540
+
541
+ const fhPad = padSector(fileHeaderData);
542
+ const diPad = padSector(docInfoData);
543
+ const s0Pad = padSector(section0Data);
544
+ const fhN = fhPad.length / SS;
545
+ const diN = diPad.length / SS;
546
+ const s0N = s0Pad.length / SS;
547
+ const dirN = 2; // always 2 dir sectors (holds 8 dir entries)
548
+
549
+ // Compute FAT sector count iteratively
550
+ let fatN = 1;
551
+ for (let iter = 0; iter < 10; iter++) {
552
+ const total = fatN + dirN + fhN + diN + s0N;
553
+ const needed = Math.ceil(total / 128);
554
+ if (needed <= fatN) break;
555
+ fatN = needed;
556
+ }
557
+
558
+ // Assign sector indices
559
+ const dir1Sec = fatN;
560
+ const dir2Sec = fatN + 1;
561
+ const fhSec = fatN + dirN;
562
+ const diSec = fhSec + fhN;
563
+ const s0Sec = diSec + diN;
564
+ const totalSec = s0Sec + s0N;
565
+
566
+ // Build FAT (fatN × 128 entries × 4 bytes = fatN × 512 bytes)
567
+ const fatBuf = new Uint8Array(fatN * SS).fill(0xFF); // FREESECT
568
+ const setFat = (i: number, v: number) => {
569
+ fatBuf[i * 4] = v & 0xFF;
570
+ fatBuf[i * 4 + 1] = (v >>> 8) & 0xFF;
571
+ fatBuf[i * 4 + 2] = (v >>> 16) & 0xFF;
572
+ fatBuf[i * 4 + 3] = (v >>> 24) & 0xFF;
573
+ };
574
+
575
+ for (let i = 0; i < fatN; i++) setFat(i, FATSECT);
576
+ setFat(dir1Sec, dir2Sec);
577
+ setFat(dir2Sec, ENDOFCHAIN);
578
+ for (let i = 0; i < fhN; i++) setFat(fhSec + i, i + 1 < fhN ? fhSec + i + 1 : ENDOFCHAIN);
579
+ for (let i = 0; i < diN; i++) setFat(diSec + i, i + 1 < diN ? diSec + i + 1 : ENDOFCHAIN);
580
+ for (let i = 0; i < s0N; i++) setFat(s0Sec + i, i + 1 < s0N ? s0Sec + i + 1 : ENDOFCHAIN);
581
+
582
+ // Build directory (8 entries × 128 bytes = dirN × SS)
583
+ const dirBuf = new Uint8Array(dirN * SS);
584
+ const dv = new DataView(dirBuf.buffer);
585
+
586
+ function writeDirEntry(
587
+ idx: number, name: string, type: number,
588
+ left: number, right: number, child: number,
589
+ startSec: number, size: number,
590
+ ) {
591
+ const base = idx * 128;
592
+ const nl = Math.min(name.length, 31);
593
+ for (let i = 0; i < nl; i++) dv.setUint16(base + i * 2, name.charCodeAt(i), true);
594
+ dv.setUint16(base + 64, (nl + 1) * 2, true); // name size (incl. null)
595
+ dirBuf[base + 66] = type;
596
+ dirBuf[base + 67] = 1; // color = black
597
+ dv.setInt32(base + 68, left, true); // left sibling
598
+ dv.setInt32(base + 72, right, true); // right sibling
599
+ dv.setInt32(base + 76, child, true); // child
600
+ dv.setUint32(base + 116, startSec >>> 0, true);
601
+ dv.setUint32(base + 120, size >>> 0, true);
602
+ }
603
+
604
+ // Use right-skewed sibling chain (no left siblings) to avoid cycles in CFB parsers.
605
+ // Root.child → FileHeader → DocInfo → BodyText (via sibRight).
606
+ // BodyText.child → Section0.
607
+ writeDirEntry(0, 'Root Entry', 5, -1, -1, 1, ENDOFCHAIN, 0);
608
+ writeDirEntry(1, 'FileHeader', 2, -1, 2, -1, fhSec, fileHeaderData.length);
609
+ writeDirEntry(2, 'DocInfo', 2, -1, 3, -1, diSec, docInfoData.length);
610
+ writeDirEntry(3, 'BodyText', 1, -1, -1, 4, ENDOFCHAIN, 0);
611
+ writeDirEntry(4, 'Section0', 2, -1, -1, -1, s0Sec, section0Data.length);
612
+ // Entries 5-7: type=0 (empty), everything else zeroed
613
+
614
+ // Build OLE2 file header (512 bytes)
615
+ const hdr = new Uint8Array(SS);
616
+ const hdv = new DataView(hdr.buffer);
617
+ const MAGIC = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
618
+ MAGIC.forEach((b, i) => { hdr[i] = b; });
619
+ hdv.setUint16(24, 0x003E, true); // minor version
620
+ hdv.setUint16(26, 0x0003, true); // major version
621
+ hdv.setUint16(28, 0xFFFE, true); // byte order (LE)
622
+ hdv.setUint16(30, 9, true); // sector size = 2^9 = 512
623
+ hdv.setUint16(32, 6, true); // mini sector size = 2^6 = 64
624
+ // OLE2 v3 header field layout (see ECMA-376 or MS-CFB spec):
625
+ // 40-43: num dir sectors (must be 0 for v3)
626
+ // 44-47: num FAT sectors
627
+ // 48-51: first dir sector
628
+ // 52-55: transaction sig (0)
629
+ // 56-59: mini stream cutoff (4096)
630
+ // 60-63: first mini FAT (ENDOFCHAIN if none)
631
+ // 64-67: num mini FAT (0)
632
+ // 68-71: first DIFAT ext (ENDOFCHAIN if none)
633
+ // 72-75: num DIFAT ext (0)
634
+ hdv.setUint32(40, 0, true); // num dir sectors (0 for v3)
635
+ hdv.setUint32(44, fatN, true); // num FAT sectors
636
+ hdv.setUint32(48, dir1Sec, true); // first directory sector
637
+ hdv.setUint32(52, 0, true); // transaction signature (0)
638
+ hdv.setUint32(56, 0x1000, true); // mini stream cutoff = 4096
639
+ hdv.setUint32(60, ENDOFCHAIN, true); // first mini FAT sector (none)
640
+ hdv.setUint32(64, 0, true); // num mini FAT sectors (0)
641
+ hdv.setUint32(68, ENDOFCHAIN, true); // first DIFAT extension (none)
642
+ hdv.setUint32(72, 0, true); // num DIFAT extensions (0)
643
+ // DIFAT[0..108]: first fatN entries = FAT sector numbers
644
+ for (let i = 0; i < 109; i++) {
645
+ hdv.setUint32(76 + i * 4, i < fatN ? i : FREESECT, true);
646
+ }
647
+
648
+ // Assemble output
649
+ const out = new Uint8Array(SS + totalSec * SS);
650
+ out.set(hdr, 0);
651
+ // FAT sectors
652
+ for (let i = 0; i < fatN; i++) {
653
+ out.set(fatBuf.subarray(i * SS, (i + 1) * SS), SS + i * SS);
654
+ }
655
+ // Directory sectors
656
+ out.set(dirBuf.subarray(0, SS), SS + dir1Sec * SS);
657
+ out.set(dirBuf.subarray(SS, 2*SS), SS + dir2Sec * SS);
658
+ // Stream data
659
+ out.set(fhPad, SS + fhSec * SS);
660
+ out.set(diPad, SS + diSec * SS);
661
+ out.set(s0Pad, SS + s0Sec * SS);
662
+ return out;
663
+ }
664
+
665
+ /* ═══════════════════════════════════════════════════════════════
666
+ Utility
667
+ ═══════════════════════════════════════════════════════════════ */
668
+
669
+ function concatU8(arrays: Uint8Array[]): Uint8Array {
670
+ const total = arrays.reduce((s, a) => s + a.length, 0);
671
+ const out = new Uint8Array(total);
672
+ let off = 0;
673
+ for (const a of arrays) { out.set(a, off); off += a.length; }
674
+ return out;
675
+ }
676
+
677
+ /* ═══════════════════════════════════════════════════════════════
678
+ Encoder entry point
679
+ ═══════════════════════════════════════════════════════════════ */
680
+
681
+ export class HwpEncoder implements Encoder {
682
+ readonly format = 'hwp';
683
+
684
+ async encode(doc: DocRoot): Promise<Outcome<Uint8Array>> {
685
+ try {
686
+ // First pass: collect unique styles
687
+ const col = new StyleCollector();
688
+ for (const sheet of doc.kids) {
689
+ for (const node of sheet.kids) collectNode(node, col);
690
+ }
691
+
692
+ // Build streams
693
+ const docInfoRaw = buildDocInfoStream(col);
694
+ const bodyRaw = buildBodyTextStream(doc, col);
695
+
696
+ // Compress (HWP flags bit 0 = compressed)
697
+ const docInfoCmp = pako.deflateRaw(docInfoRaw);
698
+ const bodyCmp = pako.deflateRaw(bodyRaw);
699
+
700
+ // Assemble OLE2 file
701
+ const fileHdr = buildHwpFileHeader();
702
+ const hwp = buildHwpOle2(fileHdr, docInfoCmp, bodyCmp);
703
+
704
+ return succeed(hwp);
705
+ } catch (e: any) {
706
+ return fail(`HwpEncoder: ${e instanceof Error ? e.message : String(e)}`);
707
+ }
708
+ }
709
+ }
710
+
711
+ registry.registerEncoder(new HwpEncoder());