hwpkit-dev 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +1 -0
- package/dist/index.d.mts +34 -3
- package/dist/index.d.ts +30 -3
- package/dist/index.js +2138 -245
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +2134 -245
- package/dist/index.mjs.map +1 -1
- package/hwp-analyze.ts +90 -0
- package/inspect-doc.ts +57 -0
- package/output_test.hwp +0 -0
- package/package.json +3 -1
- package/src/decoders/docx/DocxDecoder.ts +155 -30
- package/src/decoders/hwp/HwpScanner.ts +258 -37
- package/src/decoders/hwpx/HwpxDecoder.ts +9 -1
- package/src/encoders/docx/DocxEncoder.ts +199 -158
- package/src/encoders/html/HtmlEncoder.ts +205 -0
- package/src/encoders/hwp/HwpEncoder.ts +864 -222
- package/src/encoders/hwpx/HwpxEncoder.ts +119 -59
- package/src/encoders/md/MdEncoder.ts +98 -16
- package/src/index.ts +1 -0
- package/src/model/builders.ts +4 -2
- package/src/model/doc-tree.ts +1 -1
- package/src/pipeline/Pipeline.ts +14 -1
- package/src/safety/StyleBridge.ts +1 -1
- package/test-docx-to-hwp.ts +45 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import type { Decoder } from '../../contract/decoder';
|
|
2
|
-
import type { DocRoot, ContentNode, ParaNode, SpanNode } from '../../model/doc-tree';
|
|
2
|
+
import type { DocRoot, ContentNode, ParaNode, SpanNode, ImgNode } from '../../model/doc-tree';
|
|
3
3
|
import type { Outcome } from '../../contract/result';
|
|
4
4
|
import type { Align, Stroke, StrokeKind, PageDims, TextProps, ParaProps, CellProps, GridProps } from '../../model/doc-props';
|
|
5
5
|
import { succeed, fail } from '../../contract/result';
|
|
6
|
-
import { buildRoot, buildSheet, buildPara, buildSpan, buildGrid, buildRow, buildCell } from '../../model/builders';
|
|
6
|
+
import { buildRoot, buildSheet, buildPara, buildSpan, buildGrid, buildRow, buildCell, buildImg } from '../../model/builders';
|
|
7
7
|
import { ShieldedParser } from '../../safety/ShieldedParser';
|
|
8
8
|
import { BinaryKit } from '../../toolkit/BinaryKit';
|
|
9
9
|
import { Metric, safeHex, safeFont } from '../../safety/StyleBridge';
|
|
@@ -37,8 +37,11 @@ const TAG_CELL_B = HWPTAG_BEGIN + 65; // 81
|
|
|
37
37
|
function isTableTag(t: number) { return t === TAG_TABLE_A || t === TAG_TABLE_B; }
|
|
38
38
|
function isCellTag(t: number) { return t === TAG_CELL_A || t === TAG_CELL_B || t === TAG_LIST_HEADER; }
|
|
39
39
|
|
|
40
|
-
// CTRL_HEADER ctrlId
|
|
41
|
-
const CTRL_TABLE = 0x74626C20;
|
|
40
|
+
// CTRL_HEADER ctrlId values (UINT32-LE as ASCII)
|
|
41
|
+
const CTRL_TABLE = 0x74626C20; // ' lbt'
|
|
42
|
+
const CTRL_IMAGE = 0x696D6720; // 'img '
|
|
43
|
+
const CTRL_OBJ = 0x6F626A20; // 'obj '
|
|
44
|
+
const CTRL_FIG = 0x66696720; // 'fig '
|
|
42
45
|
|
|
43
46
|
/* ═══════════════════════════════════════════════════════════════
|
|
44
47
|
Types
|
|
@@ -83,7 +86,14 @@ interface DocInfo {
|
|
|
83
86
|
}
|
|
84
87
|
|
|
85
88
|
interface ParsedChar { pos: number; ch: string }
|
|
86
|
-
interface
|
|
89
|
+
interface ParsedCtrl { pos: number; ctrlId: number; objId: number; matched: boolean }
|
|
90
|
+
interface ParaTextResult { chars: ParsedChar[]; controls: ParsedCtrl[] }
|
|
91
|
+
|
|
92
|
+
interface OleObject {
|
|
93
|
+
id: number;
|
|
94
|
+
data: Uint8Array;
|
|
95
|
+
mimeType: string;
|
|
96
|
+
}
|
|
87
97
|
|
|
88
98
|
/* ═══════════════════════════════════════════════════════════════
|
|
89
99
|
Low-level record parsing
|
|
@@ -111,7 +121,9 @@ function parseRecords(data: Uint8Array): HwpRecord[] {
|
|
|
111
121
|
}
|
|
112
122
|
|
|
113
123
|
function tryInflate(data: Uint8Array): Uint8Array {
|
|
114
|
-
try { return pako.
|
|
124
|
+
try { return pako.inflate(data); } catch {
|
|
125
|
+
try { return pako.inflateRaw(data); } catch { return data; }
|
|
126
|
+
}
|
|
115
127
|
}
|
|
116
128
|
|
|
117
129
|
/* ═══════════════════════════════════════════════════════════════
|
|
@@ -212,7 +224,7 @@ function parseParaShape(d: Uint8Array): HwpParaShape {
|
|
|
212
224
|
if (d.length < 4) return { align: 'left', spaceBefore: 0, spaceAfter: 0, lineSpacing: 160, indent: 0 };
|
|
213
225
|
const attr = BinaryKit.readU32LE(d, 0);
|
|
214
226
|
return {
|
|
215
|
-
align: ALIGN_TBL[attr & 0x7] ?? 'left',
|
|
227
|
+
align: ALIGN_TBL[(attr >> 2) & 0x7] ?? 'left',
|
|
216
228
|
indent: d.length >= 16 ? i32(d, 12) : 0,
|
|
217
229
|
spaceBefore: d.length >= 20 ? i32(d, 16) : 0,
|
|
218
230
|
spaceAfter: d.length >= 24 ? i32(d, 20) : 0,
|
|
@@ -228,20 +240,30 @@ function parseParaShape(d: Uint8Array): HwpParaShape {
|
|
|
228
240
|
[36:4] faceColor (bgColor for solid fill) */
|
|
229
241
|
|
|
230
242
|
const BORDER_W_PT = [0.28, 0.34, 0.43, 0.57, 0.71, 0.85, 1.13, 1.42, 1.70, 1.98, 2.84, 4.25, 5.67, 8.50, 11.34, 14.17];
|
|
231
|
-
const BORDER_KIND: Record<number, StrokeKind> = { 0:'
|
|
243
|
+
const BORDER_KIND: Record<number, StrokeKind> = { 0:'solid',1:'dash',2:'dash',3:'dot',4:'dash',5:'dash',6:'dash',7:'double',8:'double',9:'double',10:'none' };
|
|
232
244
|
|
|
233
245
|
function parseBorderFill(d: Uint8Array): HwpBorderFill {
|
|
246
|
+
// Spec grouped format (표 23):
|
|
247
|
+
// [0:2] attr
|
|
248
|
+
// [2:4] 4 border types (left, right, top, bottom) — 1 byte each
|
|
249
|
+
// [6:4] 4 border widths (left, right, top, bottom) — 1 byte each (index into BORDER_W_PT)
|
|
250
|
+
// [10:16] 4 border colors (left, right, top, bottom) — 4 bytes each (COLORREF)
|
|
251
|
+
// [26:3] diagonal: type(1) + width(1) + color(4) = 6 bytes actually [26:6]
|
|
252
|
+
// [32:4] fillType
|
|
253
|
+
// [36:4] faceColor (bgColor for solid fill)
|
|
234
254
|
const borders: HwpBorderFill['borders'] = [];
|
|
255
|
+
const BASE_TYPE = 2; // 4 type bytes
|
|
256
|
+
const BASE_WIDTH = 6; // 4 width bytes
|
|
257
|
+
const BASE_COLOR = 10; // 4 × 4-byte colors
|
|
235
258
|
for (let i = 0; i < 4; i++) {
|
|
236
|
-
const
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
borders.push({ type: 0, widthPt: 0.5, color: '000000' });
|
|
241
|
-
}
|
|
259
|
+
const type = BASE_TYPE + i < d.length ? d[BASE_TYPE + i] : 0;
|
|
260
|
+
const widthPt = BASE_WIDTH + i < d.length ? (BORDER_W_PT[d[BASE_WIDTH + i]] ?? 0.5) : 0.5;
|
|
261
|
+
const color = BASE_COLOR + i * 4 + 4 <= d.length ? colorRef(d, BASE_COLOR + i * 4) : '000000';
|
|
262
|
+
borders.push({ type, widthPt, color });
|
|
242
263
|
}
|
|
243
264
|
let bgColor: string | undefined;
|
|
244
|
-
|
|
265
|
+
// after attr(2) + 4 types(4) + 4 widths(4) + 4 colors(16) + diagonal(6) = offset 32
|
|
266
|
+
const fOff = 32;
|
|
245
267
|
if (d.length >= fOff + 8) {
|
|
246
268
|
const ft = BinaryKit.readU32LE(d, fOff);
|
|
247
269
|
if (ft & 1) bgColor = colorRef(d, fOff + 4);
|
|
@@ -259,12 +281,19 @@ function parseBody(
|
|
|
259
281
|
const recs = parseRecords(compressed ? tryInflate(raw) : raw);
|
|
260
282
|
const content: ContentNode[] = [];
|
|
261
283
|
let pageDims: PageDims | undefined;
|
|
262
|
-
let i = 0;
|
|
263
284
|
|
|
285
|
+
// Pre-scan for PAGE_DEF at any nesting level (real HWP stores it at level 2 inside section ctrl)
|
|
286
|
+
for (const r of recs) {
|
|
287
|
+
if (r.tag === TAG_PAGE_DEF) {
|
|
288
|
+
pageDims = shield.guard(() => parsePageDef(r.data), A4, 'hwp:pageDef');
|
|
289
|
+
break;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
let i = 0;
|
|
264
294
|
while (i < recs.length) {
|
|
265
295
|
if (recs[i].tag === TAG_PAGE_DEF) {
|
|
266
|
-
|
|
267
|
-
i++;
|
|
296
|
+
i++; // already handled above; skip at top level
|
|
268
297
|
} else if (recs[i].tag === TAG_PARA_HEADER) {
|
|
269
298
|
const r = shield.guard(
|
|
270
299
|
() => parseParagraphGroup(recs, i, di, shield),
|
|
@@ -295,6 +324,7 @@ function parseParagraphGroup(
|
|
|
295
324
|
let text: ParaTextResult | null = null;
|
|
296
325
|
let csPairs: [number, number][] = [];
|
|
297
326
|
const grids: ContentNode[] = [];
|
|
327
|
+
const ctrlHeaders: { ctrlId: number; objId: number }[] = [];
|
|
298
328
|
let i = start + 1;
|
|
299
329
|
|
|
300
330
|
while (i < recs.length && recs[i].level > lv) {
|
|
@@ -307,14 +337,23 @@ function parseParagraphGroup(
|
|
|
307
337
|
csPairs = parseCharShapePairs(r.data);
|
|
308
338
|
i++;
|
|
309
339
|
} else if (r.tag === TAG_CTRL_HEADER && r.level === lv + 1) {
|
|
310
|
-
if (r.data.length >= 4
|
|
311
|
-
const
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
if (
|
|
317
|
-
|
|
340
|
+
if (r.data.length >= 4) {
|
|
341
|
+
const ctrlId = BinaryKit.readU32LE(r.data, 0);
|
|
342
|
+
// objId at offset 4 (UINT16) - identifies the image/object in BinData
|
|
343
|
+
const objId = r.data.length >= 6 ? BinaryKit.readU16LE(r.data, 4) : 0;
|
|
344
|
+
ctrlHeaders.push({ ctrlId, objId });
|
|
345
|
+
|
|
346
|
+
if (ctrlId === CTRL_TABLE) {
|
|
347
|
+
const tr = shield.guard(
|
|
348
|
+
() => parseTableCtrl(recs, i, di, shield),
|
|
349
|
+
{ grid: null, next: skipKids(recs, i) },
|
|
350
|
+
`hwp:tbl@${i}`,
|
|
351
|
+
);
|
|
352
|
+
if (tr.grid) grids.push(tr.grid);
|
|
353
|
+
i = tr.next;
|
|
354
|
+
} else {
|
|
355
|
+
i = skipKids(recs, i);
|
|
356
|
+
}
|
|
318
357
|
} else {
|
|
319
358
|
i = skipKids(recs, i);
|
|
320
359
|
}
|
|
@@ -323,14 +362,39 @@ function parseParagraphGroup(
|
|
|
323
362
|
}
|
|
324
363
|
}
|
|
325
364
|
|
|
365
|
+
// Match extended controls with CTRL_HEADER entries
|
|
366
|
+
if (text && ctrlHeaders.length > 0) {
|
|
367
|
+
for (let ci = 0; ci < text.controls.length; ci++) {
|
|
368
|
+
if (ci < ctrlHeaders.length) {
|
|
369
|
+
text.controls[ci].ctrlId = ctrlHeaders[ci].ctrlId;
|
|
370
|
+
text.controls[ci].matched = true;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
326
375
|
const nodes: ContentNode[] = [];
|
|
327
376
|
|
|
328
|
-
// Build paragraph from text
|
|
329
|
-
if (text && text.chars.length > 0) {
|
|
330
|
-
const
|
|
331
|
-
|
|
377
|
+
// Build paragraph from text and inline controls (images)
|
|
378
|
+
if (text && (text.chars.length > 0 || text.controls.length > 0)) {
|
|
379
|
+
const paraContent: (SpanNode | ContentNode)[] = [];
|
|
380
|
+
|
|
381
|
+
// Process text chars and controls together
|
|
382
|
+
if (text.chars.length > 0) {
|
|
332
383
|
const spans = resolveCharShapes(text.chars, csPairs, di);
|
|
333
|
-
|
|
384
|
+
paraContent.push(...spans);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// Add placeholder spans for extended controls (images)
|
|
388
|
+
if (text.controls.length > 0) {
|
|
389
|
+
for (let ci = 0; ci < text.controls.length; ci++) {
|
|
390
|
+
// Create placeholder for all extended controls
|
|
391
|
+
// Image replacement will happen later in injectImagesIntoContent
|
|
392
|
+
paraContent.push(buildSpan(`__EXT_${ci}__`));
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if (paraContent.length > 0) {
|
|
397
|
+
nodes.push(buildPara(paraContent as any, buildParaProps(ps)));
|
|
334
398
|
}
|
|
335
399
|
}
|
|
336
400
|
|
|
@@ -354,7 +418,7 @@ const INL_CTRL = new Set([4, 5, 6, 7, 8]);
|
|
|
354
418
|
|
|
355
419
|
function decodeParaText(d: Uint8Array): ParaTextResult {
|
|
356
420
|
const chars: ParsedChar[] = [];
|
|
357
|
-
const
|
|
421
|
+
const controls: ParsedCtrl[] = [];
|
|
358
422
|
let i = 0, pos = 0;
|
|
359
423
|
|
|
360
424
|
while (i + 1 < d.length) {
|
|
@@ -364,8 +428,14 @@ function decodeParaText(d: Uint8Array): ParaTextResult {
|
|
|
364
428
|
if (c === 10) { chars.push({ pos, ch: '\n' }); i += 2; pos++; continue; }
|
|
365
429
|
|
|
366
430
|
if (EXT_CTRL.has(c)) {
|
|
367
|
-
|
|
368
|
-
|
|
431
|
+
// Extended control: 8 WORDs (16 bytes)
|
|
432
|
+
// WORD 4 contains objId (for images, charts, etc.)
|
|
433
|
+
let objId = 0;
|
|
434
|
+
if (i + 16 <= d.length) {
|
|
435
|
+
objId = BinaryKit.readU16LE(d, i + 8); // 4th WORD (offset 8) contains objId
|
|
436
|
+
}
|
|
437
|
+
controls.push({ pos, ctrlId: 0, objId, matched: false });
|
|
438
|
+
i += 16; pos += 8; continue;
|
|
369
439
|
}
|
|
370
440
|
if (INL_CTRL.has(c)) {
|
|
371
441
|
i += 16; pos += 8; continue;
|
|
@@ -379,7 +449,7 @@ function decodeParaText(d: Uint8Array): ParaTextResult {
|
|
|
379
449
|
chars.push({ pos, ch: String.fromCharCode(c) });
|
|
380
450
|
i += 2; pos++;
|
|
381
451
|
}
|
|
382
|
-
return { chars,
|
|
452
|
+
return { chars, controls };
|
|
383
453
|
}
|
|
384
454
|
|
|
385
455
|
/* ── PARA_CHAR_SHAPE ────────────────────────────────────────── */
|
|
@@ -605,8 +675,8 @@ function parseCellRec(
|
|
|
605
675
|
// offset 8: colAddr, offset 10: rowAddr (HWP 5.0 spec)
|
|
606
676
|
col = BinaryKit.readU16LE(d, 8);
|
|
607
677
|
row = BinaryKit.readU16LE(d, 10);
|
|
608
|
-
|
|
609
|
-
|
|
678
|
+
cs = Math.max(1, BinaryKit.readU16LE(d, 12));
|
|
679
|
+
rs = Math.max(1, BinaryKit.readU16LE(d, 14));
|
|
610
680
|
widthHwp = BinaryKit.readU32LE(d, 16);
|
|
611
681
|
|
|
612
682
|
const bfId = d.length >= 34 ? BinaryKit.readU16LE(d, 32) : 0;
|
|
@@ -764,6 +834,53 @@ export class HwpScanner implements Decoder {
|
|
|
764
834
|
di = shield.guard(() => parseDocInfo(diRaw, compressed), di, 'hwp:docInfo');
|
|
765
835
|
}
|
|
766
836
|
|
|
837
|
+
// Extract images from BinData streams
|
|
838
|
+
const imageStreams: { path: string; data: Uint8Array }[] = [];
|
|
839
|
+
for (const [path, data] of streams) {
|
|
840
|
+
if ((path.includes('BinData') || path.includes('.jpg') || path.includes('.jpeg') || path.includes('.png') || path.includes('.gif') || path.includes('.bmp'))
|
|
841
|
+
&& !path.includes('FileHeader') && !path.includes('DocInfo') && !path.includes('BodyText') && !path.includes('Section')) {
|
|
842
|
+
imageStreams.push({ path, data });
|
|
843
|
+
console.log(`[HwpScanner] Image stream found: ${path} (${data.length} bytes)`);
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// Create image nodes for each image stream (deduplicated by hash)
|
|
848
|
+
const objectMap = new Map<number, ImgNode>();
|
|
849
|
+
const seenHashes = new Set<string>();
|
|
850
|
+
let imgIdx = 0;
|
|
851
|
+
for (const { path, data } of imageStreams) {
|
|
852
|
+
// Determine MIME type from extension or signature
|
|
853
|
+
let mimeType = 'image/jpeg';
|
|
854
|
+
const lowerPath = path.toLowerCase();
|
|
855
|
+
if (lowerPath.includes('.png')) mimeType = 'image/png';
|
|
856
|
+
else if (lowerPath.includes('.gif')) mimeType = 'image/gif';
|
|
857
|
+
else if (lowerPath.includes('.bmp')) mimeType = 'image/bmp';
|
|
858
|
+
|
|
859
|
+
// Also check signature
|
|
860
|
+
if (data[0] === 0x89 && data[1] === 0x50 && data[2] === 0x4E && data[3] === 0x47) mimeType = 'image/png';
|
|
861
|
+
else if (data[0] === 0x47 && data[1] === 0x49 && data[2] === 0x46 && data[3] === 0x3538) mimeType = 'image/gif';
|
|
862
|
+
else if (data[0] === 0x42 && data[1] === 0x4D) mimeType = 'image/bmp';
|
|
863
|
+
|
|
864
|
+
const imgData = Buffer.from(data);
|
|
865
|
+
const base64 = imgData.toString('base64');
|
|
866
|
+
const hash = base64.slice(0, 20); // Use first 20 chars as simple hash
|
|
867
|
+
if (!seenHashes.has(hash)) {
|
|
868
|
+
seenHashes.add(hash);
|
|
869
|
+
objectMap.set(imgIdx++, buildImg(
|
|
870
|
+
base64,
|
|
871
|
+
mimeType as any,
|
|
872
|
+
0, // w
|
|
873
|
+
0, // h
|
|
874
|
+
`Image from ${path}`,
|
|
875
|
+
));
|
|
876
|
+
console.log(`[HwpScanner] Added unique image: ${hash}... (${data.length} bytes)`);
|
|
877
|
+
} else {
|
|
878
|
+
console.log(`[HwpScanner] Duplicate image skipped: ${hash}...`);
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
console.log(`[HwpScanner] Found ${imageStreams.length} image streams, ${objectMap.size} unique images`);
|
|
883
|
+
|
|
767
884
|
// Body sections
|
|
768
885
|
const allContent: ContentNode[] = [];
|
|
769
886
|
let pageDims: PageDims = A4;
|
|
@@ -790,6 +907,30 @@ export class HwpScanner implements Decoder {
|
|
|
790
907
|
if (r.pageDims) pageDims = r.pageDims;
|
|
791
908
|
}
|
|
792
909
|
|
|
910
|
+
// Inject images into paragraphs (only if images are available)
|
|
911
|
+
console.log(`[HwpScanner] Before injection: ${allContent.length} nodes, ${objectMap.size} images available`);
|
|
912
|
+
if (objectMap.size > 0) {
|
|
913
|
+
injectImagesIntoContent(allContent, objectMap);
|
|
914
|
+
console.log(`[HwpScanner] After injection: ${allContent.length} nodes`);
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
// Count images (recursively)
|
|
918
|
+
const countImages = (nodes: ContentNode[]): number => {
|
|
919
|
+
let count = 0;
|
|
920
|
+
for (const node of nodes) {
|
|
921
|
+
if ((node as any).tag === 'img') count++;
|
|
922
|
+
if ((node as any).tag === 'para' && (node as any).kids) count += countImages((node as any).kids);
|
|
923
|
+
if ((node as any).tag === 'grid' && (node as any).kids) {
|
|
924
|
+
for (const row of (node as any).kids) {
|
|
925
|
+
if (row.kids) count += countImages(row.kids);
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
return count;
|
|
930
|
+
};
|
|
931
|
+
const imgCount = countImages(allContent);
|
|
932
|
+
console.log(`[HwpScanner] Images in content: ${imgCount}`);
|
|
933
|
+
|
|
793
934
|
warns.push(...shield.flush());
|
|
794
935
|
const content = allContent.length > 0 ? allContent : [buildPara([buildSpan('')])];
|
|
795
936
|
return succeed(buildRoot({}, [buildSheet(content, pageDims)]), warns);
|
|
@@ -806,4 +947,84 @@ function findBodySection(streams: Map<string, Uint8Array>): Uint8Array | undefin
|
|
|
806
947
|
return undefined;
|
|
807
948
|
}
|
|
808
949
|
|
|
950
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
951
|
+
OLE Object extraction (images)
|
|
952
|
+
════════════════════════════════════════════════════════════ */
|
|
953
|
+
|
|
954
|
+
function extractImagesFromOleObjectLink(data: Uint8Array): OleObject[] {
|
|
955
|
+
const objects: OleObject[] = [];
|
|
956
|
+
let off = 0;
|
|
957
|
+
|
|
958
|
+
while (off + 8 <= data.length) {
|
|
959
|
+
const objId = BinaryKit.readU32LE(data, off);
|
|
960
|
+
const dataSize = BinaryKit.readU32LE(data, off + 4);
|
|
961
|
+
const reserved = BinaryKit.readU32LE(data, off + 8);
|
|
962
|
+
|
|
963
|
+
if (objId === 0 || dataSize === 0) break;
|
|
964
|
+
|
|
965
|
+
const objOff = off + 16;
|
|
966
|
+
if (objOff + dataSize > data.length) break;
|
|
967
|
+
|
|
968
|
+
const objData = data.subarray(objOff, objOff + dataSize);
|
|
969
|
+
|
|
970
|
+
// Detect MIME type from signature
|
|
971
|
+
let mimeType = 'application/octet-stream';
|
|
972
|
+
if (objData[0] === 0xFF && objData[1] === 0xD8 && objData[2] === 0xFF) {
|
|
973
|
+
mimeType = 'image/jpeg';
|
|
974
|
+
} else if (objData[0] === 0x89 && objData[1] === 0x50 && objData[2] === 0x4E && objData[3] === 0x47) {
|
|
975
|
+
mimeType = 'image/png';
|
|
976
|
+
} else if (objData[0] === 0x47 && objData[1] === 0x49 && objData[2] === 0x46 && objData[3] === 0x3538) {
|
|
977
|
+
mimeType = 'image/gif';
|
|
978
|
+
} else if (objData[0] === 0x42 && objData[1] === 0x4D) {
|
|
979
|
+
mimeType = 'image/bmp';
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
objects.push({ id: objId, data: objData, mimeType });
|
|
983
|
+
off = objOff + dataSize;
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
return objects;
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
990
|
+
Helper to inject images into paragraph content
|
|
991
|
+
════════════════════════════════════════════════════════════ */
|
|
992
|
+
|
|
993
|
+
function injectImagesIntoContent(
|
|
994
|
+
content: ContentNode[],
|
|
995
|
+
objectMap: Map<number, ImgNode>
|
|
996
|
+
): void {
|
|
997
|
+
const imageArray = Array.from(objectMap.values());
|
|
998
|
+
if (imageArray.length === 0) return;
|
|
999
|
+
|
|
1000
|
+
// Get unique images (deduplicate by base64 content)
|
|
1001
|
+
const uniqueImages = Array.from(new Set(imageArray.map(img => img.b64))).map(b64 => {
|
|
1002
|
+
return imageArray.find(img => img.b64 === b64)!;
|
|
1003
|
+
});
|
|
1004
|
+
if (uniqueImages.length === 0) return;
|
|
1005
|
+
|
|
1006
|
+
let imgIdx = 0;
|
|
1007
|
+
for (const node of content) {
|
|
1008
|
+
if (node.tag === 'para' && node.kids) {
|
|
1009
|
+
for (let i = 0; i < node.kids.length; i++) {
|
|
1010
|
+
const kid = node.kids[i];
|
|
1011
|
+
// Span node structure: { tag: 'span', props, kids: [{ tag: 'txt', content }] }
|
|
1012
|
+
if (kid.tag === 'span' && kid.kids && kid.kids[0]?.tag === 'txt') {
|
|
1013
|
+
const text = kid.kids[0].content;
|
|
1014
|
+
// Support both __IMG_N__ and __EXT_N__ patterns
|
|
1015
|
+
const match = text.match?.(/^__(?:IMG|EXT)_(\d+)__$/);
|
|
1016
|
+
if (match) {
|
|
1017
|
+
// Replace placeholder with next available image (round-robin)
|
|
1018
|
+
const imgNode = uniqueImages[imgIdx % uniqueImages.length];
|
|
1019
|
+
if (imgNode) {
|
|
1020
|
+
node.kids[i] = imgNode;
|
|
1021
|
+
imgIdx++;
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
809
1030
|
registry.registerDecoder(new HwpScanner());
|
|
@@ -715,7 +715,15 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
|
|
|
715
715
|
{ cs, rs, props: cellProps },
|
|
716
716
|
);
|
|
717
717
|
});
|
|
718
|
-
|
|
718
|
+
// Row height: read from the first cell's cellSz height
|
|
719
|
+
let rowHeightPt: number | undefined;
|
|
720
|
+
const firstCellForH = cellArr[0];
|
|
721
|
+
if (firstCellForH) {
|
|
722
|
+
const hSz = firstCellForH?.['hp:cellSz']?.[0]?._attr ?? {};
|
|
723
|
+
const hVal = Number(hSz.height ?? 0);
|
|
724
|
+
if (hVal > 0) rowHeightPt = Metric.hwpToPt(hVal);
|
|
725
|
+
}
|
|
726
|
+
return buildRow(cellNodes, rowHeightPt);
|
|
719
727
|
});
|
|
720
728
|
return buildGrid(rowNodes, gridProps);
|
|
721
729
|
}
|