hwp-convert 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +185 -0
  2. package/LICENSE +25 -0
  3. package/NOTICE +23 -0
  4. package/README.md +338 -0
  5. package/dist/browser/hwp-convert.browser.mjs +20677 -0
  6. package/dist/browser/hwp-convert.browser.mjs.map +7 -0
  7. package/dist/cli.d.ts +2 -0
  8. package/dist/cli.js +267 -0
  9. package/dist/index.d.ts +5 -0
  10. package/dist/index.js +5 -0
  11. package/dist/lib/errors.d.ts +9 -0
  12. package/dist/lib/errors.js +18 -0
  13. package/dist/lib/hwp/binData.d.ts +15 -0
  14. package/dist/lib/hwp/binData.js +64 -0
  15. package/dist/lib/hwp/bodyText.d.ts +31 -0
  16. package/dist/lib/hwp/bodyText.js +208 -0
  17. package/dist/lib/hwp/byteReader.d.ts +40 -0
  18. package/dist/lib/hwp/byteReader.js +116 -0
  19. package/dist/lib/hwp/cfbReader.d.ts +44 -0
  20. package/dist/lib/hwp/cfbReader.js +134 -0
  21. package/dist/lib/hwp/control.d.ts +17 -0
  22. package/dist/lib/hwp/control.js +290 -0
  23. package/dist/lib/hwp/converter.d.ts +22 -0
  24. package/dist/lib/hwp/converter.js +41 -0
  25. package/dist/lib/hwp/docInfo.d.ts +26 -0
  26. package/dist/lib/hwp/docInfo.js +396 -0
  27. package/dist/lib/hwp/fileHeader.d.ts +42 -0
  28. package/dist/lib/hwp/fileHeader.js +66 -0
  29. package/dist/lib/hwp/htmlReader.d.ts +17 -0
  30. package/dist/lib/hwp/htmlReader.js +602 -0
  31. package/dist/lib/hwp/hwpxBuilder.d.ts +19 -0
  32. package/dist/lib/hwp/hwpxBuilder.js +633 -0
  33. package/dist/lib/hwp/index.d.ts +68 -0
  34. package/dist/lib/hwp/index.js +149 -0
  35. package/dist/lib/hwp/mdReader.d.ts +16 -0
  36. package/dist/lib/hwp/mdReader.js +485 -0
  37. package/dist/lib/hwp/mdWriter.d.ts +23 -0
  38. package/dist/lib/hwp/mdWriter.js +182 -0
  39. package/dist/lib/hwp/owpml.d.ts +33 -0
  40. package/dist/lib/hwp/owpml.js +86 -0
  41. package/dist/lib/hwp/record.d.ts +24 -0
  42. package/dist/lib/hwp/record.js +59 -0
  43. package/dist/lib/hwp/tags.d.ts +115 -0
  44. package/dist/lib/hwp/tags.js +217 -0
  45. package/dist/lib/hwp/types.d.ts +214 -0
  46. package/dist/lib/hwp/types.js +5 -0
  47. package/dist/lib/hwpxReader.d.ts +60 -0
  48. package/dist/lib/hwpxReader.js +1104 -0
  49. package/dist/lib/types.d.ts +47 -0
  50. package/dist/lib/types.js +1 -0
  51. package/dist/lib/writer.d.ts +19 -0
  52. package/dist/lib/writer.js +149 -0
  53. package/package.json +94 -0
@@ -0,0 +1,68 @@
1
+ /**
2
+ * HWP 5.0 바이너리 파서 진입점.
3
+ *
4
+ * 공개 API:
5
+ * - detectFormat(bytes): "hwp" | "hwpx" | "hwp3" | "unknown"
6
+ * - parseHwp(bytes): HwpDocument
7
+ * - hwpToText(bytes, options?): Promise<string>
8
+ * - hwpToHwpx(bytes, options?): Promise<Uint8Array>
9
+ * - parseFileHeader, versionToString, isVersionSupported
10
+ * - 에러: HwpEncryptedError, HwpUnsupportedError, HwpInvalidFormatError
11
+ * - 타입: HwpDocument, HwpSection
12
+ *
13
+ * 보존되는 것 (HWP → HWPX 라운드트립):
14
+ * - 표 (rowSpan/colSpan 포함), 임베디드 이미지 (BinData 패키징)
15
+ * - 7개 언어 그룹별 폰트, 글자 모양(굵게/기울임/밑줄/색/크기)
16
+ * - 문단 모양(정렬/들여쓰기/줄간격), 스타일, 번호 매기기 형식 문자열, 글머리표 문자
17
+ * - 도형(line) 좌표, 수식(EQEDIT) 스크립트
18
+ * - Preview/PrvText.txt 자동 생성
19
+ *
20
+ * 미지원 / 한계:
21
+ * - 암호화된 HWP / 배포용 ViewText / HWP 3.0: 명시적 에러
22
+ * - 머리말/꼬리말/각주: 파싱되나 본문 흐름 외부에 출력하지 않음
23
+ * - BorderFill 정의: ID 슬롯만 채움 (색/굵기/대각선 미보존)
24
+ * - 차트(CHART_DATA) / OLE / 글맵시: 미지원
25
+ * - 도형(line) 외 사각형/타원/호/다각형/곡선: 종류만 보존
26
+ */
27
+ import { type MarkdownWriteOptions } from "./converter.js";
28
+ import type { HwpDocument, ImageResolver } from "./types.js";
29
+ export type { HwpDocument, HwpSection, ImageResolver, ConvertOptions } from "./types.js";
30
+ export type { MarkdownWriteOptions } from "./converter.js";
31
+ export { hwpDocumentToMarkdown, markdownToHwpDocument, htmlToHwpDocument, } from "./converter.js";
32
+ export { parseFileHeader, versionToString, isVersionSupported } from "./fileHeader.js";
33
+ export declare class HwpUnsupportedError extends Error {
34
+ constructor(msg: string);
35
+ }
36
+ export declare class HwpEncryptedError extends Error {
37
+ constructor();
38
+ }
39
+ export declare class HwpInvalidFormatError extends Error {
40
+ constructor(msg?: string);
41
+ }
42
+ export type DetectedFormat = "hwp" | "hwpx" | "hwp3" | "unknown";
43
+ export declare function detectFormat(data: Uint8Array): DetectedFormat;
44
+ /**
45
+ * HWP 5.0 바이너리를 파싱하여 HwpDocument IR 반환.
46
+ */
47
+ export declare function parseHwp(data: Uint8Array): HwpDocument;
48
+ export declare function hwpToText(data: Uint8Array, options?: {
49
+ paragraphSeparator?: string;
50
+ sectionSeparator?: string;
51
+ }): Promise<string>;
52
+ export declare function hwpToHwpx(data: Uint8Array, options?: {
53
+ title?: string;
54
+ creator?: string;
55
+ }): Promise<Uint8Array>;
56
+ export declare function hwpToMarkdown(data: Uint8Array, options?: MarkdownWriteOptions): Promise<string>;
57
+ /** Markdown 텍스트를 HWPX 패키지로 변환. */
58
+ export declare function markdownToHwpx(md: string, options?: {
59
+ title?: string;
60
+ creator?: string;
61
+ imageResolver?: ImageResolver;
62
+ }): Promise<Uint8Array>;
63
+ /** HTML 문서를 HWPX 패키지로 변환. */
64
+ export declare function htmlToHwpx(html: string, options?: {
65
+ title?: string;
66
+ creator?: string;
67
+ imageResolver?: ImageResolver;
68
+ }): Promise<Uint8Array>;
@@ -0,0 +1,149 @@
1
+ /**
2
+ * HWP 5.0 바이너리 파서 진입점.
3
+ *
4
+ * 공개 API:
5
+ * - detectFormat(bytes): "hwp" | "hwpx" | "hwp3" | "unknown"
6
+ * - parseHwp(bytes): HwpDocument
7
+ * - hwpToText(bytes, options?): Promise<string>
8
+ * - hwpToHwpx(bytes, options?): Promise<Uint8Array>
9
+ * - parseFileHeader, versionToString, isVersionSupported
10
+ * - 에러: HwpEncryptedError, HwpUnsupportedError, HwpInvalidFormatError
11
+ * - 타입: HwpDocument, HwpSection
12
+ *
13
+ * 보존되는 것 (HWP → HWPX 라운드트립):
14
+ * - 표 (rowSpan/colSpan 포함), 임베디드 이미지 (BinData 패키징)
15
+ * - 7개 언어 그룹별 폰트, 글자 모양(굵게/기울임/밑줄/색/크기)
16
+ * - 문단 모양(정렬/들여쓰기/줄간격), 스타일, 번호 매기기 형식 문자열, 글머리표 문자
17
+ * - 도형(line) 좌표, 수식(EQEDIT) 스크립트
18
+ * - Preview/PrvText.txt 자동 생성
19
+ *
20
+ * 미지원 / 한계:
21
+ * - 암호화된 HWP / 배포용 ViewText / HWP 3.0: 명시적 에러
22
+ * - 머리말/꼬리말/각주: 파싱되나 본문 흐름 외부에 출력하지 않음
23
+ * - BorderFill 정의: ID 슬롯만 채움 (색/굵기/대각선 미보존)
24
+ * - 차트(CHART_DATA) / OLE / 글맵시: 미지원
25
+ * - 도형(line) 외 사각형/타원/호/다각형/곡선: 종류만 보존
26
+ */
27
+ import { parseFileHeader, isVersionSupported, versionToString } from "./fileHeader.js";
28
+ import { HwpCfbReader } from "./cfbReader.js";
29
+ import { parseDocInfo } from "./docInfo.js";
30
+ import { parseBodyTextSection } from "./bodyText.js";
31
+ import { loadBinDataContent } from "./binData.js";
32
+ import { hwpDocumentToText, hwpDocumentToHwpx, hwpDocumentToMarkdown, markdownToHwpDocument, htmlToHwpDocument, } from "./converter.js";
33
+ export { hwpDocumentToMarkdown, markdownToHwpDocument, htmlToHwpDocument, } from "./converter.js";
34
+ export { parseFileHeader, versionToString, isVersionSupported } from "./fileHeader.js";
35
+ export class HwpUnsupportedError extends Error {
36
+ constructor(msg) {
37
+ super(msg);
38
+ this.name = "HwpUnsupportedError";
39
+ }
40
+ }
41
+ export class HwpEncryptedError extends Error {
42
+ constructor() {
43
+ super("암호화된 HWP 문서는 현재 지원하지 않습니다.");
44
+ this.name = "HwpEncryptedError";
45
+ }
46
+ }
47
+ export class HwpInvalidFormatError extends Error {
48
+ constructor(msg = "유효한 HWP 5.0 파일이 아닙니다.") {
49
+ super(msg);
50
+ this.name = "HwpInvalidFormatError";
51
+ }
52
+ }
53
+ export function detectFormat(data) {
54
+ if (data.byteLength >= 8) {
55
+ // CFB/OLE 시그니처
56
+ if (data[0] === 0xd0 &&
57
+ data[1] === 0xcf &&
58
+ data[2] === 0x11 &&
59
+ data[3] === 0xe0 &&
60
+ data[4] === 0xa1 &&
61
+ data[5] === 0xb1 &&
62
+ data[6] === 0x1a &&
63
+ data[7] === 0xe1) {
64
+ return "hwp";
65
+ }
66
+ // ZIP 시그니처
67
+ if (data[0] === 0x50 && data[1] === 0x4b && data[2] === 0x03 && data[3] === 0x04) {
68
+ return "hwpx";
69
+ }
70
+ }
71
+ // HWP 3.0
72
+ if (data.byteLength >= 17) {
73
+ const sig = String.fromCharCode(...data.subarray(0, 17));
74
+ if (sig === "HWP Document File")
75
+ return "hwp3";
76
+ }
77
+ return "unknown";
78
+ }
79
+ /**
80
+ * HWP 5.0 바이너리를 파싱하여 HwpDocument IR 반환.
81
+ */
82
+ export function parseHwp(data) {
83
+ const fmt = detectFormat(data);
84
+ if (fmt === "hwp3") {
85
+ throw new HwpUnsupportedError("HWP 3.0 포맷은 지원하지 않습니다. 한컴오피스/LibreOffice 에서 HWP 5.0 으로 다시 저장해 주세요.");
86
+ }
87
+ if (fmt !== "hwp") {
88
+ throw new HwpInvalidFormatError(`HWP 5.0(CFB) 시그니처가 아닙니다 (감지: ${fmt}).`);
89
+ }
90
+ const cfb = new HwpCfbReader(data);
91
+ const headerBytes = cfb.readFileHeader();
92
+ const fileHeader = parseFileHeader(headerBytes);
93
+ if (fileHeader.flags.encrypted) {
94
+ throw new HwpEncryptedError();
95
+ }
96
+ if (!isVersionSupported(fileHeader.version)) {
97
+ throw new HwpUnsupportedError(`지원하지 않는 HWP 버전: ${versionToString(fileHeader.version)} (5.0 ~ 5.1 지원)`);
98
+ }
99
+ if (fileHeader.flags.distribution) {
100
+ // ViewText 복호화는 1차 포팅 범위 밖
101
+ throw new HwpUnsupportedError("배포용 문서(ViewText)는 현재 지원하지 않습니다. 일반 HWP 로 저장 후 시도해 주세요.");
102
+ }
103
+ const compressed = fileHeader.flags.compressed;
104
+ const docInfoBytes = cfb.readDocInfo(compressed);
105
+ const { docInfo } = parseDocInfo(docInfoBytes);
106
+ const sectionCount = cfb.sectionCount(false);
107
+ const sections = [];
108
+ for (let i = 0; i < sectionCount; i++) {
109
+ const secBytes = cfb.readBodySection(i, compressed, false);
110
+ if (!secBytes)
111
+ continue;
112
+ try {
113
+ sections.push(parseBodyTextSection(secBytes));
114
+ }
115
+ catch {
116
+ // 개별 섹션 실패 시 빈 섹션으로 대체 (전체 실패 방지)
117
+ sections.push({ paragraphs: [] });
118
+ }
119
+ }
120
+ const binData = loadBinDataContent(cfb, docInfo.binData);
121
+ return {
122
+ header: fileHeader,
123
+ docInfo,
124
+ sections,
125
+ binData,
126
+ };
127
+ }
128
+ export async function hwpToText(data, options) {
129
+ const doc = parseHwp(data);
130
+ return hwpDocumentToText(doc, options);
131
+ }
132
+ export async function hwpToHwpx(data, options) {
133
+ const doc = parseHwp(data);
134
+ return await hwpDocumentToHwpx(doc, options);
135
+ }
136
+ export async function hwpToMarkdown(data, options) {
137
+ const doc = parseHwp(data);
138
+ return hwpDocumentToMarkdown(doc, options);
139
+ }
140
+ /** Markdown 텍스트를 HWPX 패키지로 변환. */
141
+ export async function markdownToHwpx(md, options) {
142
+ const doc = markdownToHwpDocument(md, { imageResolver: options?.imageResolver });
143
+ return await hwpDocumentToHwpx(doc, options);
144
+ }
145
+ /** HTML 문서를 HWPX 패키지로 변환. */
146
+ export async function htmlToHwpx(html, options) {
147
+ const doc = htmlToHwpDocument(html, { imageResolver: options?.imageResolver });
148
+ return await hwpDocumentToHwpx(doc, options);
149
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Markdown → HwpDocument IR.
3
+ *
4
+ * `marked` 의 lexer 로 토큰 트리를 만들고 IR 로 변환.
5
+ * - heading → 굵은 paragraph (charShape 별도 정의: 큰 사이즈 + bold)
6
+ * - paragraph → text + bold/italic run 분할
7
+ * - list (ordered/unordered) → "1. " / "- " prefix 가 포함된 paragraph (간단 표현)
8
+ * - blockquote → 인용 paragraph (회색 배경)
9
+ * - code (block / inline) → 모노스페이스 charShape
10
+ * - table → HwpTableControl (셀 paragraph 재귀)
11
+ * - image → HwpPictureControl + binData 등록 (data: URI 만 지원)
12
+ * - link → 텍스트 그대로 (링크 자체는 보존하지 않음 — HWP 필드 컨트롤은 별도 작업)
13
+ */
14
+ import type { HwpDocument, ConvertOptions } from "./types.js";
15
+ /** Markdown 텍스트를 HwpDocument 로 변환. */
16
+ export declare function markdownToHwpDocument(md: string, options?: ConvertOptions): HwpDocument;