hwp-convert 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +185 -0
  2. package/LICENSE +25 -0
  3. package/NOTICE +23 -0
  4. package/README.md +338 -0
  5. package/dist/browser/hwp-convert.browser.mjs +20677 -0
  6. package/dist/browser/hwp-convert.browser.mjs.map +7 -0
  7. package/dist/cli.d.ts +2 -0
  8. package/dist/cli.js +267 -0
  9. package/dist/index.d.ts +5 -0
  10. package/dist/index.js +5 -0
  11. package/dist/lib/errors.d.ts +9 -0
  12. package/dist/lib/errors.js +18 -0
  13. package/dist/lib/hwp/binData.d.ts +15 -0
  14. package/dist/lib/hwp/binData.js +64 -0
  15. package/dist/lib/hwp/bodyText.d.ts +31 -0
  16. package/dist/lib/hwp/bodyText.js +208 -0
  17. package/dist/lib/hwp/byteReader.d.ts +40 -0
  18. package/dist/lib/hwp/byteReader.js +116 -0
  19. package/dist/lib/hwp/cfbReader.d.ts +44 -0
  20. package/dist/lib/hwp/cfbReader.js +134 -0
  21. package/dist/lib/hwp/control.d.ts +17 -0
  22. package/dist/lib/hwp/control.js +290 -0
  23. package/dist/lib/hwp/converter.d.ts +22 -0
  24. package/dist/lib/hwp/converter.js +41 -0
  25. package/dist/lib/hwp/docInfo.d.ts +26 -0
  26. package/dist/lib/hwp/docInfo.js +396 -0
  27. package/dist/lib/hwp/fileHeader.d.ts +42 -0
  28. package/dist/lib/hwp/fileHeader.js +66 -0
  29. package/dist/lib/hwp/htmlReader.d.ts +17 -0
  30. package/dist/lib/hwp/htmlReader.js +602 -0
  31. package/dist/lib/hwp/hwpxBuilder.d.ts +19 -0
  32. package/dist/lib/hwp/hwpxBuilder.js +633 -0
  33. package/dist/lib/hwp/index.d.ts +68 -0
  34. package/dist/lib/hwp/index.js +149 -0
  35. package/dist/lib/hwp/mdReader.d.ts +16 -0
  36. package/dist/lib/hwp/mdReader.js +485 -0
  37. package/dist/lib/hwp/mdWriter.d.ts +23 -0
  38. package/dist/lib/hwp/mdWriter.js +182 -0
  39. package/dist/lib/hwp/owpml.d.ts +33 -0
  40. package/dist/lib/hwp/owpml.js +86 -0
  41. package/dist/lib/hwp/record.d.ts +24 -0
  42. package/dist/lib/hwp/record.js +59 -0
  43. package/dist/lib/hwp/tags.d.ts +115 -0
  44. package/dist/lib/hwp/tags.js +217 -0
  45. package/dist/lib/hwp/types.d.ts +214 -0
  46. package/dist/lib/hwp/types.js +5 -0
  47. package/dist/lib/hwpxReader.d.ts +60 -0
  48. package/dist/lib/hwpxReader.js +1104 -0
  49. package/dist/lib/types.d.ts +47 -0
  50. package/dist/lib/types.js +1 -0
  51. package/dist/lib/writer.d.ts +19 -0
  52. package/dist/lib/writer.js +149 -0
  53. package/package.json +94 -0
@@ -0,0 +1,1104 @@
1
+ import { XMLParser } from "fast-xml-parser";
2
+ import JSZip from "jszip";
3
+ import { HwpxEncryptedDocumentError, HwpxNotLoadedError } from "./errors.js";
4
+ const DECODER_UTF8 = new TextDecoder("utf-8");
5
+ const DECODER_UTF16LE = new TextDecoder("utf-16le");
6
+ const DECODER_UTF16BE = new TextDecoder("utf-16be");
7
+ function detectTextEncoding(bytes) {
8
+ if (bytes.length >= 3 && bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf)
9
+ return "utf-8";
10
+ if (bytes.length >= 2 && bytes[0] === 0xff && bytes[1] === 0xfe)
11
+ return "utf-16le";
12
+ if (bytes.length >= 2 && bytes[0] === 0xfe && bytes[1] === 0xff)
13
+ return "utf-16be";
14
+ // Heuristic: many zeros on odd/even positions → UTF-16
15
+ let zeroEven = 0, zeroOdd = 0, sample = Math.min(bytes.length, 1024);
16
+ for (let i = 0; i < sample; i++) {
17
+ if (bytes[i] === 0)
18
+ (i % 2 === 0 ? zeroEven++ : zeroOdd++);
19
+ }
20
+ if (zeroOdd > zeroEven * 2)
21
+ return "utf-16le"; // LE: xx 00 xx 00
22
+ if (zeroEven > zeroOdd * 2)
23
+ return "utf-16be"; // BE: 00 xx 00 xx
24
+ return "utf-8";
25
+ }
26
+ function decodeBytesSmart(bytes) {
27
+ const enc = detectTextEncoding(bytes);
28
+ // Strip BOM
29
+ if (enc === "utf-8" && bytes.length >= 3 && bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf) {
30
+ return DECODER_UTF8.decode(bytes.subarray(3));
31
+ }
32
+ if (enc === "utf-16le" && bytes.length >= 2 && bytes[0] === 0xff && bytes[1] === 0xfe) {
33
+ return DECODER_UTF16LE.decode(bytes.subarray(2));
34
+ }
35
+ if (enc === "utf-16be" && bytes.length >= 2 && bytes[0] === 0xfe && bytes[1] === 0xff) {
36
+ return DECODER_UTF16BE.decode(bytes.subarray(2));
37
+ }
38
+ if (enc === "utf-8")
39
+ return DECODER_UTF8.decode(bytes);
40
+ if (enc === "utf-16le")
41
+ return DECODER_UTF16LE.decode(bytes);
42
+ return DECODER_UTF16BE.decode(bytes);
43
+ }
44
+ function getOrEmpty(value) {
45
+ return value ?? undefined;
46
+ }
47
+ /**
48
+ * fast-xml-parser 의 텍스트 노드는 string 외에도 number/boolean 또는 객체({#text})/배열 형태로
49
+ * 등장할 수 있다. 모든 케이스를 문자열로 정규화하여 pieces 에 추가한다.
50
+ */
51
+ function escapeMd(text) {
52
+ return text
53
+ .replace(/\\/g, "\\\\")
54
+ .replace(/([*_`~])/g, "\\$1")
55
+ .replace(/^([#>])/gm, "\\$1");
56
+ }
57
+ function pushTextNode(t, pieces) {
58
+ if (t === undefined || t === null)
59
+ return;
60
+ if (typeof t === "string") {
61
+ pieces.push(t);
62
+ return;
63
+ }
64
+ if (typeof t === "number" || typeof t === "boolean") {
65
+ pieces.push(String(t));
66
+ return;
67
+ }
68
+ if (Array.isArray(t)) {
69
+ for (const item of t)
70
+ pushTextNode(item, pieces);
71
+ return;
72
+ }
73
+ if (typeof t === "object") {
74
+ const inner = t["#text"];
75
+ if (inner !== undefined)
76
+ pushTextNode(inner, pieces);
77
+ }
78
+ }
79
+ export class HwpxReader {
80
+ zip = null;
81
+ files = {};
82
+ encryptedCache = null;
83
+ characterProperties = new Map();
84
+ fontFaces = new Map();
85
+ async loadFromArrayBuffer(buffer) {
86
+ const zip = await JSZip.loadAsync(buffer);
87
+ this.zip = zip;
88
+ this.files = {};
89
+ const entries = Object.keys(zip.files);
90
+ await Promise.all(entries.map(async (name) => {
91
+ const file = zip.file(name);
92
+ if (!file)
93
+ return;
94
+ // store raw bytes for flexible processing (images, xml, etc.)
95
+ this.files[name] = new Uint8Array(await file.async("uint8array"));
96
+ }));
97
+ // Validate mimetype (per spec: application/owpml). 다양한 변형을 수용하고, 불일치 시에도 진행.
98
+ const mime = this.getTextFile("mimetype")?.trim();
99
+ if (mime && !this.isLikelyHwpxMime(mime)) {
100
+ // 엄격 차단 대신 경고성 에러로 유지하려면 throw를 피한다.
101
+ // throw new InvalidHwpxFormatError();
102
+ }
103
+ // Try to locate content via META-INF/container.xml if present (not mandatory but helpful)
104
+ const containerXml = this.getTextFile("META-INF/container.xml");
105
+ if (containerXml) {
106
+ const cx = this.parseXml(containerXml);
107
+ // not strictly necessary now; reserved for future rootfile discovery
108
+ void cx;
109
+ }
110
+ // Parse styles from header.xml
111
+ this.parseStyleDefinitions();
112
+ }
113
+ isLikelyHwpxMime(m) {
114
+ const s = m.toLowerCase();
115
+ // 허용: application/hwp+zip(한컴 표준), application/owpml, hwpx/owpml/hwp+zip 포함 케이스
116
+ return (s === "application/hwp+zip" ||
117
+ s === "application/owpml" ||
118
+ s.includes("hwp+zip") ||
119
+ s.includes("owpml") ||
120
+ s.includes("hwpx"));
121
+ }
122
+ getTextFile(path) {
123
+ const bytes = this.files[path];
124
+ if (!bytes)
125
+ return null;
126
+ return decodeBytesSmart(bytes);
127
+ }
128
+ findFilePathIgnoreCase(targetPath) {
129
+ const lower = targetPath.toLowerCase();
130
+ for (const key of Object.keys(this.files)) {
131
+ if (key.toLowerCase() === lower)
132
+ return key;
133
+ }
134
+ return null;
135
+ }
136
+ parseXml(xml) {
137
+ try {
138
+ const parser = new XMLParser({
139
+ ignoreAttributes: false,
140
+ attributeNamePrefix: "@",
141
+ // 텍스트 내부 공백 보존 — `<hp:t>이것은 </hp:t>` 같은 run 사이 공백이 사라지지 않도록
142
+ trimValues: false,
143
+ removeNSPrefix: true,
144
+ // 텍스트 노드 자동 타입 변환 끄기 — "1", "true" 등이 number/boolean 으로 변환되는 것을 방지
145
+ parseTagValue: false,
146
+ parseAttributeValue: false,
147
+ });
148
+ const obj = parser.parse(xml);
149
+ return obj;
150
+ }
151
+ catch (_err) {
152
+ return null;
153
+ }
154
+ }
155
+ summarizePackage() {
156
+ const hasEncryptionInfo = this.detectEncryption();
157
+ const contentsFiles = Object.keys(this.files).filter((p) => p.startsWith("Contents/")).sort();
158
+ const contentHpf = this.getTextFile("Contents/content.hpf");
159
+ let manifest;
160
+ let spine;
161
+ if (contentHpf) {
162
+ const xml = this.parseXml(contentHpf);
163
+ const pkg = xml?.package ?? xml?.opf?.package;
164
+ const man = pkg?.manifest?.item;
165
+ if (man) {
166
+ const items = Array.isArray(man) ? man : [man];
167
+ manifest = items.map((it) => ({
168
+ id: it?.["@id"],
169
+ href: it?.["@href"],
170
+ mediaType: it?.["@media-type"] ?? it?.["@mediaType"],
171
+ }));
172
+ }
173
+ const sp = pkg?.spine?.itemref ?? pkg?.spine?.itemRef;
174
+ if (sp) {
175
+ const refs = Array.isArray(sp) ? sp : [sp];
176
+ spine = refs.map((r) => r?.["@idref"] ?? r?.["@idRef"]).filter(Boolean);
177
+ }
178
+ }
179
+ return { hasEncryptionInfo, contentsFiles, manifest, spine };
180
+ }
181
+ getSectionPathsBySpine() {
182
+ const contentHpf = this.getTextFile("Contents/content.hpf");
183
+ if (!contentHpf)
184
+ return null;
185
+ const xml = this.parseXml(contentHpf);
186
+ const pkg = xml?.package ?? xml?.opf?.package;
187
+ const man = pkg?.manifest?.item;
188
+ const map = new Map(); // id -> href
189
+ if (man) {
190
+ const items = Array.isArray(man) ? man : [man];
191
+ for (const it of items) {
192
+ const id = it?.["@id"];
193
+ const href = it?.["@href"];
194
+ if (id && href && /Contents\/section\d+\.xml$/i.test(href))
195
+ map.set(id, href);
196
+ }
197
+ }
198
+ const sp = pkg?.spine?.itemref ?? pkg?.spine?.itemRef;
199
+ const refs = sp ? (Array.isArray(sp) ? sp : [sp]) : [];
200
+ const paths = [];
201
+ for (const r of refs) {
202
+ const id = r?.["@idref"] ?? r?.["@idRef"];
203
+ const href = id ? map.get(id) : undefined;
204
+ if (href && this.files[href])
205
+ paths.push(href);
206
+ }
207
+ return paths.length ? paths : null;
208
+ }
209
+ detectEncryption() {
210
+ if (this.encryptedCache !== null)
211
+ return this.encryptedCache;
212
+ const manifestXml = this.getTextFile("META-INF/manifest.xml");
213
+ if (!manifestXml) {
214
+ this.encryptedCache = false;
215
+ return false;
216
+ }
217
+ const obj = this.parseXml(manifestXml);
218
+ const has = this.containsEncryptionMarker(obj);
219
+ this.encryptedCache = !!has;
220
+ return this.encryptedCache;
221
+ }
222
+ containsEncryptionMarker(node) {
223
+ if (!node)
224
+ return false;
225
+ if (typeof node === "string") {
226
+ return /encrypt|cipher/i.test(node);
227
+ }
228
+ if (Array.isArray(node)) {
229
+ for (const item of node) {
230
+ if (this.containsEncryptionMarker(item))
231
+ return true;
232
+ }
233
+ return false;
234
+ }
235
+ if (typeof node === "object") {
236
+ for (const [k, v] of Object.entries(node)) {
237
+ if (/encrypt|cipher/i.test(k))
238
+ return true;
239
+ if (typeof v === "string" && /encrypt|cipher/i.test(v))
240
+ return true;
241
+ if (this.containsEncryptionMarker(v))
242
+ return true;
243
+ }
244
+ }
245
+ return false;
246
+ }
247
+ readMetadata() {
248
+ const contentHpf = this.getTextFile("Contents/content.hpf");
249
+ const metadata = {};
250
+ if (contentHpf) {
251
+ const xml = this.parseXml(contentHpf);
252
+ // OPF-like: package > metadata
253
+ const md = xml?.package?.metadata;
254
+ if (md) {
255
+ metadata.title = getOrEmpty(md["dc:title"] ?? md.title);
256
+ metadata.creator = getOrEmpty(md["dc:creator"] ?? md.creator);
257
+ metadata.created = getOrEmpty(md["dcterms:created"] ?? md.created);
258
+ metadata.modified = getOrEmpty(md["dcterms:modified"] ?? md.modified);
259
+ }
260
+ }
261
+ const versionXml = this.getTextFile("version.xml");
262
+ if (versionXml) {
263
+ const v = this.parseXml(versionXml);
264
+ const ver = v?.Version?.OWPMLVersion ?? v?.version?.owpmlVersion;
265
+ if (typeof ver === "string") {
266
+ metadata.version = ver;
267
+ }
268
+ }
269
+ const settingsXml = this.getTextFile("settings.xml");
270
+ if (settingsXml) {
271
+ const s = this.parseXml(settingsXml);
272
+ // 표준 예시: ha:HWPApplicationSetting > ha:CaretPosition(listIDRef, paraIDRef, pos)
273
+ const app = s?.HWPApplicationSetting ?? s?.Settings ?? s?.settings;
274
+ const caret = app?.CaretPosition ?? app?.caretPosition;
275
+ if (caret && (caret["@listIDRef"] || caret["@paraIDRef"] || caret["@pos"])) {
276
+ const listId = caret["@listIDRef"] ?? "0";
277
+ const paraId = caret["@paraIDRef"] ?? "0";
278
+ const pos = caret["@pos"] ?? "0";
279
+ metadata.caretPosition = `${listId}:${paraId}:${pos}`;
280
+ }
281
+ }
282
+ return metadata;
283
+ }
284
+ async getDocumentInfo() {
285
+ if (!this.zip)
286
+ throw new HwpxNotLoadedError();
287
+ const summary = this.summarizePackage();
288
+ const metadata = this.readMetadata();
289
+ return { metadata, summary };
290
+ }
291
+ async extractText(options) {
292
+ if (!this.zip)
293
+ throw new HwpxNotLoadedError();
294
+ const summary = this.summarizePackage();
295
+ if (summary.hasEncryptionInfo) {
296
+ throw new HwpxEncryptedDocumentError();
297
+ }
298
+ // HWPX 본문: Contents/section*.xml 에서 hp:t 텍스트를 추출
299
+ const joiner = options?.joinParagraphs ?? "\n";
300
+ let sectionPaths = this.getSectionPathsBySpine() ?? Object.keys(this.files)
301
+ .filter((p) => /^contents\/section\d+\.xml$/.test(p.toLowerCase()))
302
+ .sort((a, b) => {
303
+ const na = Number(a.match(/section(\d+)\.xml/)?.[1] ?? 0);
304
+ const nb = Number(b.match(/section(\d+)\.xml/)?.[1] ?? 0);
305
+ return na - nb;
306
+ });
307
+ // Fallback: 탐색에 실패하면 Contents/*.xml 중 루트가 section 인 파일을 수색
308
+ if (sectionPaths.length === 0) {
309
+ const candidates = Object.keys(this.files).filter((p) => p.startsWith("Contents/") && p.toLowerCase().endsWith(".xml"));
310
+ for (const p of candidates) {
311
+ const xmlText = this.getTextFile(p);
312
+ if (!xmlText)
313
+ continue;
314
+ const xml = this.parseXml(xmlText);
315
+ if (xml && (xml.sec || xml.section || xml["hp:section"])) {
316
+ sectionPaths.push(p);
317
+ }
318
+ }
319
+ sectionPaths.sort((a, b) => {
320
+ const na = Number(a.match(/section(\d+)\.xml/)?.[1] ?? 0);
321
+ const nb = Number(b.match(/section(\d+)\.xml/)?.[1] ?? 0);
322
+ return na - nb;
323
+ });
324
+ }
325
+ const paragraphs = [];
326
+ for (const path of sectionPaths) {
327
+ const xmlText = this.getTextFile(path);
328
+ if (!xmlText)
329
+ continue;
330
+ const xml = this.parseXml(xmlText);
331
+ // 구조 참고: sec > p* > run* > t, 네임스페이스 제거됨
332
+ const section = xml?.sec ?? xml?.section ?? xml?.["hp:section"];
333
+ if (!section) {
334
+ const segs = [];
335
+ this.collectAllText(xml, segs);
336
+ if (segs.length)
337
+ paragraphs.push(segs.join(""));
338
+ continue;
339
+ }
340
+ const ps = section?.p ?? section?.["hp:p"];
341
+ if (!ps) {
342
+ const segs = [];
343
+ this.collectAllText(section, segs);
344
+ if (segs.length)
345
+ paragraphs.push(segs.join(""));
346
+ continue;
347
+ }
348
+ const paras = Array.isArray(ps) ? ps : [ps];
349
+ for (const p of paras) {
350
+ paragraphs.push(this.extractParagraphText(p));
351
+ }
352
+ }
353
+ const combined = paragraphs.join(joiner);
354
+ if (combined.trim().length > 0)
355
+ return combined;
356
+ // Fallback: Preview text
357
+ const prvPath = this.findFilePathIgnoreCase("Preview/PrvText.txt") ||
358
+ this.findFilePathIgnoreCase("preview/prvtext.txt");
359
+ if (prvPath) {
360
+ const prv = this.getTextFile(prvPath);
361
+ if (prv && prv.trim().length > 0)
362
+ return prv;
363
+ }
364
+ return combined;
365
+ }
366
+ /**
367
+ * 한 문단(<hp:p>)에서 텍스트를 추출. 표/이미지 등 인라인 컨트롤이 있으면 셀/내부 문단을 재귀 탐색.
368
+ *
369
+ * 표는 셀 단위로 텍스트를 모은 후 같은 행 내 셀은 공백으로, 행 사이는 줄바꿈으로 결합한다.
370
+ */
371
+ extractParagraphText(p) {
372
+ const runs = p?.run ?? p?.["hp:run"];
373
+ if (!runs)
374
+ return "";
375
+ const runArr = Array.isArray(runs) ? runs : [runs];
376
+ const pieces = [];
377
+ for (const run of runArr) {
378
+ // 섹션/컬럼 설정 같은 메타 컨트롤은 텍스트 없음 — secPr/ctrl 안에 든 자식까지 무시
379
+ if (run?.secPr || run?.ctrl)
380
+ continue;
381
+ // 직접 텍스트
382
+ const t = run?.t ?? run?.["hp:t"];
383
+ pushTextNode(t, pieces);
384
+ // 표
385
+ const tbl = run?.tbl ?? run?.["hp:tbl"];
386
+ if (tbl) {
387
+ const tbls = Array.isArray(tbl) ? tbl : [tbl];
388
+ for (const tb of tbls) {
389
+ pieces.push(this.extractTableText(tb));
390
+ }
391
+ }
392
+ }
393
+ return pieces.join("");
394
+ }
395
+ extractTableText(tbl) {
396
+ const trs = tbl?.tr ?? tbl?.["hp:tr"];
397
+ if (!trs)
398
+ return "";
399
+ const trArr = Array.isArray(trs) ? trs : [trs];
400
+ const rowTexts = [];
401
+ for (const tr of trArr) {
402
+ const tcs = tr?.tc ?? tr?.["hp:tc"];
403
+ if (!tcs)
404
+ continue;
405
+ const tcArr = Array.isArray(tcs) ? tcs : [tcs];
406
+ const cellTexts = [];
407
+ for (const tc of tcArr) {
408
+ cellTexts.push(this.extractCellText(tc));
409
+ }
410
+ rowTexts.push(cellTexts.join(" "));
411
+ }
412
+ return rowTexts.join("\n");
413
+ }
414
+ extractCellText(tc) {
415
+ const sub = tc?.subList ?? tc?.["hp:subList"];
416
+ if (!sub)
417
+ return "";
418
+ const ps = sub?.p ?? sub?.["hp:p"];
419
+ if (!ps)
420
+ return "";
421
+ const paras = Array.isArray(ps) ? ps : [ps];
422
+ return paras.map((q) => this.extractParagraphText(q)).join("\n");
423
+ }
424
+ /**
425
+ * 문서 전체를 Markdown 으로 변환.
426
+ * 표는 마크다운 표 (셀 병합은 평탄화), 이미지는 `![](BinData/...)`.
427
+ */
428
+ async extractMarkdown(options) {
429
+ if (!this.zip)
430
+ throw new HwpxNotLoadedError();
431
+ const summary = this.summarizePackage();
432
+ if (summary.hasEncryptionInfo) {
433
+ throw new HwpxEncryptedDocumentError();
434
+ }
435
+ let sectionPaths = this.getSectionPathsBySpine() ?? Object.keys(this.files)
436
+ .filter((p) => /^contents\/section\d+\.xml$/.test(p.toLowerCase()))
437
+ .sort();
438
+ if (sectionPaths.length === 0) {
439
+ const candidates = Object.keys(this.files).filter((p) => p.startsWith("Contents/") && p.toLowerCase().endsWith(".xml"));
440
+ for (const p of candidates) {
441
+ const xmlText = this.getTextFile(p);
442
+ if (!xmlText)
443
+ continue;
444
+ const xml = this.parseXml(xmlText);
445
+ if (xml && (xml.sec || xml.section || xml["hp:section"]))
446
+ sectionPaths.push(p);
447
+ }
448
+ }
449
+ const blocks = [];
450
+ for (const path of sectionPaths) {
451
+ const xmlText = this.getTextFile(path);
452
+ if (!xmlText)
453
+ continue;
454
+ const xml = this.parseXml(xmlText);
455
+ const section = xml?.sec ?? xml?.section ?? xml?.["hp:section"];
456
+ if (!section)
457
+ continue;
458
+ const ps = section?.p ?? section?.["hp:p"];
459
+ if (!ps)
460
+ continue;
461
+ const paras = Array.isArray(ps) ? ps : [ps];
462
+ for (const p of paras) {
463
+ const md = this.extractParagraphMarkdown(p, options);
464
+ if (md.trim().length > 0)
465
+ blocks.push(md);
466
+ }
467
+ }
468
+ return blocks.join("\n\n").trim() + "\n";
469
+ }
470
+ extractParagraphMarkdown(p, options) {
471
+ const runs = p?.run ?? p?.["hp:run"];
472
+ if (!runs)
473
+ return "";
474
+ const runArr = Array.isArray(runs) ? runs : [runs];
475
+ const parts = [];
476
+ let textBuf = "";
477
+ for (const run of runArr) {
478
+ if (run?.secPr || run?.ctrl)
479
+ continue;
480
+ // 텍스트 + charPrIDRef → 굵게/기울임 적용
481
+ const t = run?.t ?? run?.["hp:t"];
482
+ const pieces = [];
483
+ pushTextNode(t, pieces);
484
+ let raw = pieces.join("");
485
+ if (raw.length > 0) {
486
+ const charPrId = run?.["@charPrIDRef"];
487
+ if (charPrId !== undefined && this.characterProperties.has(String(charPrId))) {
488
+ const cs = this.characterProperties.get(String(charPrId));
489
+ let s = escapeMd(raw);
490
+ if (cs?.bold)
491
+ s = `**${s}**`;
492
+ if (cs?.italic)
493
+ s = `*${s}*`;
494
+ textBuf += s;
495
+ }
496
+ else {
497
+ textBuf += escapeMd(raw);
498
+ }
499
+ }
500
+ // 표
501
+ const tbl = run?.tbl ?? run?.["hp:tbl"];
502
+ if (tbl) {
503
+ if (textBuf) {
504
+ parts.push(textBuf);
505
+ textBuf = "";
506
+ }
507
+ const tbls = Array.isArray(tbl) ? tbl : [tbl];
508
+ for (const tb of tbls)
509
+ parts.push(this.extractTableMarkdown(tb, options));
510
+ }
511
+ // 그림
512
+ const pic = run?.pic ?? run?.["hp:pic"];
513
+ if (pic) {
514
+ if (textBuf) {
515
+ parts.push(textBuf);
516
+ textBuf = "";
517
+ }
518
+ const href = pic?.["@href"];
519
+ const img = pic?.img ?? pic?.["hc:img"];
520
+ const ref = img?.["@binaryItemIDRef"];
521
+ const path = typeof href === "string" ? href : ref ? `BinData/${ref}` : "";
522
+ if (path) {
523
+ if (options?.embedImages) {
524
+ const data = this.files[path];
525
+ if (data) {
526
+ const ext = path.split(".").pop()?.toLowerCase() ?? "";
527
+ const mime = ext === "png" ? "image/png" : ext === "jpg" || ext === "jpeg" ? "image/jpeg" : ext === "gif" ? "image/gif" : "application/octet-stream";
528
+ parts.push(`![](data:${mime};base64,${this.toBase64(data)})`);
529
+ }
530
+ else {
531
+ parts.push(`![](${path})`);
532
+ }
533
+ }
534
+ else if (options?.imageSrcResolver) {
535
+ parts.push(`![](${options.imageSrcResolver(path)})`);
536
+ }
537
+ else {
538
+ parts.push(`![](${path})`);
539
+ }
540
+ }
541
+ }
542
+ }
543
+ if (textBuf)
544
+ parts.push(textBuf);
545
+ return parts.join("\n\n");
546
+ }
547
+ extractTableMarkdown(tbl, options) {
548
+ const trs = tbl?.tr ?? tbl?.["hp:tr"];
549
+ if (!trs)
550
+ return "";
551
+ const trArr = Array.isArray(trs) ? trs : [trs];
552
+ // 행/셀 텍스트 모음 (병합은 무시 — 마크다운 표 한계)
553
+ const rows = [];
554
+ let maxCols = 0;
555
+ for (const tr of trArr) {
556
+ const tcs = tr?.tc ?? tr?.["hp:tc"];
557
+ if (!tcs)
558
+ continue;
559
+ const tcArr = Array.isArray(tcs) ? tcs : [tcs];
560
+ const cellTexts = [];
561
+ for (const tc of tcArr) {
562
+ const sub = tc?.subList ?? tc?.["hp:subList"];
563
+ if (!sub) {
564
+ cellTexts.push("");
565
+ continue;
566
+ }
567
+ const cps = sub?.p ?? sub?.["hp:p"];
568
+ if (!cps) {
569
+ cellTexts.push("");
570
+ continue;
571
+ }
572
+ const cellParas = Array.isArray(cps) ? cps : [cps];
573
+ const inner = cellParas
574
+ .map((q) => this.extractParagraphMarkdown(q, options))
575
+ .join(" ")
576
+ .replace(/\n+/g, " ")
577
+ .replace(/\|/g, "\\|");
578
+ cellTexts.push(inner);
579
+ }
580
+ if (cellTexts.length > maxCols)
581
+ maxCols = cellTexts.length;
582
+ rows.push(cellTexts);
583
+ }
584
+ if (rows.length === 0)
585
+ return "";
586
+ // 모든 행의 셀 수를 maxCols 로 패딩
587
+ for (const r of rows) {
588
+ while (r.length < maxCols)
589
+ r.push("");
590
+ }
591
+ const fmt = (cells) => `| ${cells.map((c) => c || " ").join(" | ")} |`;
592
+ const lines = [];
593
+ lines.push(fmt(rows[0]));
594
+ lines.push(fmt(new Array(maxCols).fill("---")));
595
+ for (let i = 1; i < rows.length; i++)
596
+ lines.push(fmt(rows[i]));
597
+ return lines.join("\n");
598
+ }
599
+ // 아주 단순한 텍스트 템플릿 치환: {{key}} → value (문단 텍스트에만 적용)
600
+ applyTemplateToText(raw, data) {
601
+ return raw.replace(/\{\{\s*([\w.]+)\s*\}\}/g, (_m, key) => {
602
+ const value = key.split('.').reduce((acc, k) => (acc && acc[k] !== undefined ? acc[k] : undefined), data);
603
+ return value === undefined || value === null ? '' : String(value);
604
+ });
605
+ }
606
+ async extractHtml(options) {
607
+ if (!this.zip)
608
+ throw new HwpxNotLoadedError();
609
+ const summary = this.summarizePackage();
610
+ if (summary.hasEncryptionInfo) {
611
+ throw new HwpxEncryptedDocumentError();
612
+ }
613
+ const paragraphTag = options?.paragraphTag ?? "p";
614
+ const enableImages = options?.renderImages ?? true;
615
+ const enableTables = options?.renderTables ?? true;
616
+ const enableStyles = options?.renderStyles ?? true;
617
+ let sectionPaths = this.getSectionPathsBySpine() ?? Object.keys(this.files)
618
+ .filter((p) => /^contents\/section\d+\.xml$/.test(p.toLowerCase()))
619
+ .sort((a, b) => {
620
+ const na = Number(a.match(/section(\d+)\.xml/)?.[1] ?? 0);
621
+ const nb = Number(b.match(/section(\d+)\.xml/)?.[1] ?? 0);
622
+ return na - nb;
623
+ });
624
+ if (sectionPaths.length === 0) {
625
+ const candidates = Object.keys(this.files).filter((p) => p.startsWith("Contents/") && p.toLowerCase().endsWith(".xml"));
626
+ for (const p of candidates) {
627
+ const xmlText = this.getTextFile(p);
628
+ if (!xmlText)
629
+ continue;
630
+ const xml = this.parseXml(xmlText);
631
+ if (xml && (xml.sec || xml.section || xml["hp:section"])) {
632
+ sectionPaths.push(p);
633
+ }
634
+ }
635
+ sectionPaths.sort((a, b) => {
636
+ const na = Number(a.match(/section(\d+)\.xml/)?.[1] ?? 0);
637
+ const nb = Number(b.match(/section(\d+)\.xml/)?.[1] ?? 0);
638
+ return na - nb;
639
+ });
640
+ }
641
+ const tableClass = options?.tableClassName ?? "hwpx-table";
642
+ const pieces = [];
643
+ for (const path of sectionPaths) {
644
+ const xmlText = this.getTextFile(path);
645
+ if (!xmlText)
646
+ continue;
647
+ const xml = this.parseXml(xmlText);
648
+ const section = xml?.sec ?? xml?.section ?? xml?.["hp:section"];
649
+ if (!section)
650
+ continue;
651
+ // paragraphs (표가 paragraph 안의 run 에 포함되어 있을 수 있으므로 분리 추출)
652
+ const ps = section?.p ?? section?.["hp:p"];
653
+ if (ps) {
654
+ const paras = Array.isArray(ps) ? ps : [ps];
655
+ for (const p of paras) {
656
+ // 텍스트와 이미지 등 인라인 컨텐츠
657
+ const inner = this.renderNodeToHtml(p, { enableImages, enableStyles }, options);
658
+ const alignStyle = this.getAlignStyle(p);
659
+ const styleAttr = alignStyle ? ` style="${alignStyle}"` : "";
660
+ pieces.push(`<${paragraphTag}${styleAttr}>${inner}</${paragraphTag}>`);
661
+ // paragraph 내 표는 <p> 형제 요소로 출력 (HTML 에서 <p> 안에 <table> 불가)
662
+ if (enableTables) {
663
+ for (const tbl of this.collectTablesInParagraph(p)) {
664
+ pieces.push(this.renderTableHtml(tbl, tableClass, options));
665
+ }
666
+ }
667
+ }
668
+ }
669
+ // section 직속 tables (구식 HWPX)
670
+ const tbls = section?.tbl ?? section?.["hp:tbl"];
671
+ if (tbls && enableTables) {
672
+ const tables = Array.isArray(tbls) ? tbls : [tbls];
673
+ for (const tbl of tables) {
674
+ pieces.push(this.renderTableHtml(tbl, tableClass, options));
675
+ }
676
+ }
677
+ }
678
+ let html = pieces.join("");
679
+ if (html.trim().length > 0)
680
+ return html;
681
+ // Fallback: Preview text
682
+ const prvPath = this.findFilePathIgnoreCase("Preview/PrvText.txt") ||
683
+ this.findFilePathIgnoreCase("preview/prvtext.txt");
684
+ if (prvPath) {
685
+ const prv = this.getTextFile(prvPath);
686
+ if (prv && prv.trim().length > 0) {
687
+ const escaped = this.escapeHtml(prv);
688
+ html = `<p>${escaped.replace(/\n+/g, '</p><p>')}</p>`;
689
+ }
690
+ }
691
+ return html;
692
+ }
693
+ collectTablesInParagraph(p) {
694
+ const out = [];
695
+ const runs = p?.run ?? p?.["hp:run"];
696
+ if (!runs)
697
+ return out;
698
+ const runArr = Array.isArray(runs) ? runs : [runs];
699
+ for (const run of runArr) {
700
+ if (run?.secPr || run?.ctrl)
701
+ continue;
702
+ const tbl = run?.tbl ?? run?.["hp:tbl"];
703
+ if (!tbl)
704
+ continue;
705
+ if (Array.isArray(tbl))
706
+ out.push(...tbl);
707
+ else
708
+ out.push(tbl);
709
+ }
710
+ return out;
711
+ }
712
+ renderTableHtml(tbl, tableClass, options) {
713
+ const trs = tbl?.tr ?? tbl?.["hp:tr"];
714
+ const rows = trs ? (Array.isArray(trs) ? trs : [trs]) : [];
715
+ const enableImages = options?.renderImages ?? true;
716
+ const enableStyles = options?.renderStyles ?? true;
717
+ const rowHtml = [];
718
+ rows.forEach((tr, rowIndex) => {
719
+ const tcs = tr?.tc ?? tr?.["hp:tc"];
720
+ const cells = tcs ? (Array.isArray(tcs) ? tcs : [tcs]) : [];
721
+ const cellHtml = [];
722
+ for (const tc of cells) {
723
+ // 셀 안 paragraph: <hp:tc><hp:subList><hp:p>...</hp:p></hp:subList></hp:tc>
724
+ // 또는 직접 <hp:tc><hp:p>... 둘 다 지원
725
+ const inner = this.renderCellContentHtml(tc, { enableImages, enableStyles }, options);
726
+ // 구형(속성 @colSpan) + 신형(자식 <hp:cellSpan colSpan rowSpan>) 둘 다 지원
727
+ const cellSpan = tc?.cellSpan ?? tc?.["hp:cellSpan"];
728
+ const colSpan = tc?.["@colSpan"] ?? tc?.["@colspan"] ?? tc?.["@gridSpan"] ?? cellSpan?.["@colSpan"];
729
+ const rowSpan = tc?.["@rowSpan"] ?? tc?.["@rowspan"] ?? cellSpan?.["@rowSpan"];
730
+ const alignStyle = this.getAlignStyle(tc);
731
+ const attrs = [];
732
+ if (colSpan && String(colSpan) !== "1")
733
+ attrs.push(` colspan="${String(colSpan)}"`);
734
+ if (rowSpan && String(rowSpan) !== "1")
735
+ attrs.push(` rowspan="${String(rowSpan)}"`);
736
+ if (alignStyle)
737
+ attrs.push(` style="${alignStyle}"`);
738
+ const isHeader = options?.tableHeaderFirstRow && rowIndex === 0;
739
+ const tag = isHeader ? "th" : "td";
740
+ cellHtml.push(`<${tag}${attrs.join("")}>${inner}</${tag}>`);
741
+ }
742
+ rowHtml.push(`<tr>${cellHtml.join("")}</tr>`);
743
+ });
744
+ return `<table class="${tableClass}">${rowHtml.join("")}</table>`;
745
+ }
746
+ renderCellContentHtml(tc, flags, options) {
747
+ // subList 우선 (현대 HWPX), 없으면 tc 자체를 노드로 처리
748
+ const sub = tc?.subList ?? tc?.["hp:subList"];
749
+ if (sub)
750
+ return this.renderNodeToHtml(sub, flags, options);
751
+ return this.renderNodeToHtml(tc, flags, options);
752
+ }
753
+ getAlignStyle(node) {
754
+ const a = node?.["@align"] ?? node?.["@textAlign"] ?? node?.paraPr?.["@align"] ?? node?.cellPr?.["@align"];
755
+ if (typeof a !== "string")
756
+ return "";
757
+ const v = a.toLowerCase();
758
+ if (v === "center" || v === "right" || v === "left" || v === "justify") {
759
+ return `text-align:${v}`;
760
+ }
761
+ return "";
762
+ }
763
+ renderNodeToHtml(node, flags, options) {
764
+ if (!node)
765
+ return "";
766
+ // paragraph aggregation
767
+ const ps = node?.["hp:p"] ?? node?.p;
768
+ if (ps) {
769
+ const paras = Array.isArray(ps) ? ps : [ps];
770
+ return paras.map((p) => this.renderNodeToHtml(p, flags, options)).join("\n");
771
+ }
772
+ // runs
773
+ const runs = node?.["hp:run"] ?? node?.run;
774
+ const runArr = runs ? (Array.isArray(runs) ? runs : [runs]) : [];
775
+ if (runArr.length > 0) {
776
+ return runArr.map((run) => this.renderRunToHtml(run, flags, options)).join("");
777
+ }
778
+ // direct text
779
+ if (typeof node === "string")
780
+ return this.escapeHtml(node);
781
+ if (typeof node?.["#text"] === "string")
782
+ return this.escapeHtml(node["#text"]);
783
+ return "";
784
+ }
785
+ collectAllText(node, out) {
786
+ if (node == null)
787
+ return;
788
+ // 설정 관련 노드들은 건너뛰기
789
+ if (typeof node === "object" && (node.secPr || node.ctrl || node.linesegarray)) {
790
+ return;
791
+ }
792
+ if (typeof node === "string") {
793
+ out.push(node);
794
+ return;
795
+ }
796
+ if (typeof node === "object") {
797
+ const text = node["#text"];
798
+ if (typeof text === "string")
799
+ out.push(text);
800
+ // 't' 속성이 있으면 직접 추출
801
+ const t = node.t;
802
+ if (typeof t === "string") {
803
+ out.push(t);
804
+ return; // t가 있으면 더 이상 탐색하지 않음
805
+ }
806
+ for (const [k, v] of Object.entries(node)) {
807
+ if (k === "#text" || k === "t")
808
+ continue;
809
+ // 설정 관련 키들은 건너뛰기
810
+ if (k === "secPr" || k === "ctrl" || k === "linesegarray")
811
+ continue;
812
+ this.collectAllText(v, out);
813
+ }
814
+ }
815
+ }
816
+ renderRunToHtml(run, flags, options) {
817
+ // 섹션 설정이나 컨트롤 정보가 있는 run은 건너뛰기
818
+ if (run?.secPr || run?.ctrl)
819
+ return "";
820
+ // Text
821
+ const t = run?.["hp:t"] ?? run?.t;
822
+ const text = typeof t === "string" ? t : typeof t?.["#text"] === "string" ? t["#text"] : "";
823
+ let html = this.escapeHtml(text);
824
+ // Image (simplified): hp:picture or hp:img-like reference to BinData
825
+ if (flags.enableImages) {
826
+ const binRef = this.findBinRefInRun(run);
827
+ if (typeof binRef === "string") {
828
+ // Resolve binaryItemIDRef through manifest if needed
829
+ const binPath = this.resolveBinaryPath(binRef);
830
+ if (binPath) {
831
+ let src;
832
+ if (options?.embedImages) {
833
+ const data = this.files[binPath];
834
+ if (data) {
835
+ const mime = this.detectMimeType(binPath);
836
+ const b64 = this.toBase64(data);
837
+ src = `data:${mime};base64,${b64}`;
838
+ }
839
+ else {
840
+ src = binPath;
841
+ }
842
+ }
843
+ else if (options?.imageSrcResolver) {
844
+ src = options.imageSrcResolver(binPath);
845
+ }
846
+ else {
847
+ src = binPath;
848
+ }
849
+ html += `<img src="${this.escapeHtml(src)}" alt="" />`;
850
+ }
851
+ }
852
+ }
853
+ // Styles: Resolve charPrIDRef to actual character properties
854
+ if (flags.enableStyles) {
855
+ const charPrId = run?.["@charPrIDRef"];
856
+ if (charPrId && this.characterProperties.has(charPrId)) {
857
+ const charProps = this.characterProperties.get(charPrId);
858
+ let open = "";
859
+ let close = "";
860
+ const styleParts = [];
861
+ // Apply formatting
862
+ if (charProps?.bold) {
863
+ open += "<strong>";
864
+ close = "</strong>" + close;
865
+ }
866
+ if (charProps?.italic) {
867
+ open += "<em>";
868
+ close = "</em>" + close;
869
+ }
870
+ // Handle underline
871
+ if (charProps?.underline && charProps.underline?.["@type"] !== "NONE") {
872
+ styleParts.push("text-decoration:underline");
873
+ }
874
+ // Handle color
875
+ if (charProps?.textColor && charProps.textColor !== "#000000") {
876
+ styleParts.push(`color:${this.normalizeColor(charProps.textColor)}`);
877
+ }
878
+ // Handle font size (convert HWPUNIT to points)
879
+ if (charProps?.height) {
880
+ const sizeInPt = this.convertHwpUnitToPoints(charProps.height);
881
+ styleParts.push(`font-size:${sizeInPt}pt`);
882
+ }
883
+ // Handle background color
884
+ if (charProps?.shadeColor && charProps.shadeColor !== "none" && charProps.shadeColor !== "#FFFFFF") {
885
+ styleParts.push(`background-color:${this.normalizeColor(charProps.shadeColor)}`);
886
+ }
887
+ const styleAttr = styleParts.length ? ` style="${styleParts.join(";")}"` : "";
888
+ if (open || styleAttr) {
889
+ html = `${open}<span${styleAttr}>${html}</span>${close}`;
890
+ }
891
+ }
892
+ }
893
+ return html;
894
+ }
895
+ findBinRefInRun(run) {
896
+ // common patterns - note: XML parser removes namespaces, so hp:pic becomes 'pic', hc:img becomes 'img'
897
+ const pic = run?.["hp:picture"] ?? run?.picture ?? run?.pic;
898
+ const draw = run?.["hp:draw"] ?? run?.draw;
899
+ const img = run?.["hp:img"] ?? run?.img;
900
+ const hcImg = run?.["hc:img"] ?? run?.["hp:hc:img"];
901
+ const tryExtract = (node) => {
902
+ if (!node)
903
+ return undefined;
904
+ // Check for binaryItemIDRef attribute (used by hc:img)
905
+ const binaryRef = node?.["@binaryItemIDRef"];
906
+ if (typeof binaryRef === "string")
907
+ return binaryRef;
908
+ // For picture elements, the img may be nested inside (hc:img becomes nested img)
909
+ const nestedImg = node?.img;
910
+ if (nestedImg && typeof nestedImg?.["@binaryItemIDRef"] === "string") {
911
+ return nestedImg["@binaryItemIDRef"];
912
+ }
913
+ // Check for traditional hp:binItem reference
914
+ const ref = node?.["hp:binItem"]?.["@ref"] ?? node?.binItem?.["@ref"] ?? node?.["@ref"];
915
+ if (typeof ref === "string")
916
+ return ref;
917
+ return undefined;
918
+ };
919
+ return tryExtract(pic) || tryExtract(draw) || tryExtract(img) || tryExtract(hcImg);
920
+ }
921
+ resolveBinaryPath(binRef) {
922
+ // First, try direct path (legacy format)
923
+ const directPath = `BinData/${binRef}`;
924
+ if (this.files[directPath]) {
925
+ return directPath;
926
+ }
927
+ // Try to resolve through manifest
928
+ try {
929
+ const summary = this.summarizePackage();
930
+ if (summary.manifest) {
931
+ const manifestItem = summary.manifest.find(item => item.id === binRef);
932
+ if (manifestItem?.href) {
933
+ // The href might include the full path or relative path
934
+ const resolvedPath = manifestItem.href.startsWith('BinData/')
935
+ ? manifestItem.href
936
+ : `BinData/${manifestItem.href}`;
937
+ if (this.files[resolvedPath]) {
938
+ return resolvedPath;
939
+ }
940
+ // Try the href as-is
941
+ if (this.files[manifestItem.href]) {
942
+ return manifestItem.href;
943
+ }
944
+ }
945
+ }
946
+ }
947
+ catch (e) {
948
+ // Fall back if manifest parsing fails
949
+ }
950
+ // Fallback: return the direct path even if file doesn't exist
951
+ return directPath;
952
+ }
953
+ normalizeColor(c) {
954
+ const s = c.trim();
955
+ if (/^#?[0-9a-fA-F]{6}$/.test(s))
956
+ return s.startsWith('#') ? s : `#${s}`;
957
+ return s; // fallback as-is
958
+ }
959
+ normalizeSize(sz) {
960
+ const n = typeof sz === 'number' ? sz : Number(sz);
961
+ if (!isNaN(n))
962
+ return `${n}pt`;
963
+ return String(sz);
964
+ }
965
+ convertHwpUnitToPoints(hwpUnit) {
966
+ // HWPUNIT is approximately 1/100th of a point
967
+ // 1000 HWPUNIT = 10 points
968
+ const units = typeof hwpUnit === 'number' ? hwpUnit : parseInt(String(hwpUnit), 10);
969
+ return Math.round((units / 100) * 10) / 10; // Round to 1 decimal place
970
+ }
971
+ parseStyleDefinitions() {
972
+ // Clear existing definitions
973
+ this.characterProperties.clear();
974
+ this.fontFaces.clear();
975
+ // Find and parse header.xml
976
+ const headerXml = this.getTextFile("Contents/header.xml");
977
+ if (!headerXml)
978
+ return;
979
+ try {
980
+ const header = this.parseXml(headerXml);
981
+ const root = header?.head ?? header;
982
+ if (!root)
983
+ return;
984
+ // Character properties are in head/refList/charProperties
985
+ const refList = root?.refList;
986
+ if (!refList)
987
+ return;
988
+ // Parse font faces
989
+ const fontfaces = refList?.fontfaces;
990
+ if (fontfaces?.fontface) {
991
+ const fonts = Array.isArray(fontfaces.fontface) ? fontfaces.fontface : [fontfaces.fontface];
992
+ for (const font of fonts) {
993
+ const id = font?.["@id"];
994
+ if (id) {
995
+ this.fontFaces.set(id, font);
996
+ }
997
+ }
998
+ }
999
+ // Parse character properties from refList
1000
+ const charProperties = refList?.charProperties;
1001
+ if (charProperties?.charPr) {
1002
+ const charPrs = Array.isArray(charProperties.charPr) ? charProperties.charPr : [charProperties.charPr];
1003
+ for (const charPr of charPrs) {
1004
+ const id = charPr?.["@id"];
1005
+ if (id) {
1006
+ this.characterProperties.set(id, this.processCharacterProperties(charPr));
1007
+ }
1008
+ }
1009
+ }
1010
+ }
1011
+ catch {
1012
+ // Silent fail - styles are optional
1013
+ }
1014
+ }
1015
+ processCharacterProperties(charPr) {
1016
+ // Bold is indicated by presence of <hh:bold/> element (after namespace removal, becomes 'bold')
1017
+ const hasBold = charPr?.bold !== undefined;
1018
+ const hasItalic = charPr?.italic !== undefined;
1019
+ return {
1020
+ height: charPr?.["@height"], // Font size in HWPUNIT
1021
+ textColor: charPr?.["@textColor"], // Text color
1022
+ shadeColor: charPr?.["@shadeColor"], // Background color
1023
+ bold: hasBold, // Bold formatting (element presence)
1024
+ italic: hasItalic, // Italic formatting (element presence)
1025
+ underline: charPr?.underline, // Underline info
1026
+ strikeout: charPr?.strikeout, // Strikeout info
1027
+ fontRef: charPr?.fontRef, // Font reference
1028
+ raw: charPr // Keep original for debugging
1029
+ };
1030
+ }
1031
+ detectMimeType(path) {
1032
+ const lower = path.toLowerCase();
1033
+ if (lower.endsWith(".png"))
1034
+ return "image/png";
1035
+ if (lower.endsWith(".jpg") || lower.endsWith(".jpeg"))
1036
+ return "image/jpeg";
1037
+ if (lower.endsWith(".gif"))
1038
+ return "image/gif";
1039
+ if (lower.endsWith(".bmp"))
1040
+ return "image/bmp";
1041
+ if (lower.endsWith(".webp"))
1042
+ return "image/webp";
1043
+ return "application/octet-stream";
1044
+ }
1045
+ toBase64(bytes) {
1046
+ if (typeof Buffer !== "undefined") {
1047
+ return Buffer.from(bytes).toString("base64");
1048
+ }
1049
+ let binary = "";
1050
+ for (let i = 0; i < bytes.length; i++)
1051
+ binary += String.fromCharCode(bytes[i]);
1052
+ // btoa may not exist in Node, handled by Buffer path above
1053
+ // @ts-ignore
1054
+ return btoa(binary);
1055
+ }
1056
+ extractTextFromNode(node) {
1057
+ if (!node)
1058
+ return "";
1059
+ // hp:p → hp:run → hp:t
1060
+ const ps = node?.["hp:p"] ?? node?.p;
1061
+ if (ps) {
1062
+ const paras = Array.isArray(ps) ? ps : [ps];
1063
+ return paras.map((p) => this.extractTextFromNode(p)).join("\n");
1064
+ }
1065
+ const runs = node?.["hp:run"] ?? node?.run;
1066
+ const runArr = runs ? (Array.isArray(runs) ? runs : [runs]) : [];
1067
+ const textPieces = [];
1068
+ for (const run of runArr) {
1069
+ // 섹션 설정이나 컨트롤 정보가 있는 run은 건너뛰기
1070
+ if (run?.secPr || run?.ctrl)
1071
+ continue;
1072
+ const t = run?.["hp:t"] ?? run?.t;
1073
+ if (t === undefined || t === null)
1074
+ continue;
1075
+ if (typeof t === "string")
1076
+ textPieces.push(t);
1077
+ else if (typeof t?.["#text"] === "string")
1078
+ textPieces.push(t["#text"]);
1079
+ }
1080
+ if (textPieces.length > 0)
1081
+ return textPieces.join("");
1082
+ // direct text
1083
+ if (typeof node === "string")
1084
+ return node;
1085
+ if (typeof node?.["#text"] === "string")
1086
+ return node["#text"];
1087
+ return "";
1088
+ }
1089
+ escapeHtml(text) {
1090
+ return text
1091
+ .replace(/&/g, "&amp;")
1092
+ .replace(/</g, "&lt;")
1093
+ .replace(/>/g, "&gt;");
1094
+ }
1095
+ async listImages() {
1096
+ if (!this.zip)
1097
+ throw new HwpxNotLoadedError();
1098
+ // 이미지: BinData/ 내 파일들 (원 규격상 다양한 바이너리 포함)
1099
+ return Object.keys(this.files)
1100
+ .filter((p) => p.startsWith("BinData/") && !p.endsWith("/"))
1101
+ .sort();
1102
+ }
1103
+ }
1104
+ export default HwpxReader;