hwpkit-dev 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ .npmignore ADDED
@@ -0,0 +1,11 @@
1
+ node_modules
2
+ dist
3
+ *.hml
4
+ *.hml.md
5
+ *.hwpx
6
+ *.pdf
7
+ *.zip
8
+ .claude
9
+ tests
10
+ data
11
+ playground
package/README.md ADDED
@@ -0,0 +1,223 @@
1
+ # HWPKit
2
+
3
+ [![npm version](https://img.shields.io/npm/v/hwpkit.svg)](https://www.npmjs.com/package/hwpkit)
4
+ [![license](https://img.shields.io/npm/l/hwpkit.svg)](https://github.com/INMD1/hwpkit/blob/main/license.md)
5
+
6
+ **HWP / HWPX / DOCX / Markdown 양방향 문서 변환 라이브러리**
7
+
8
+ 한국 문서 포맷(HWP, HWPX)과 국제 표준(DOCX, Markdown)을 상호 변환하는 TypeScript 라이브러리입니다.
9
+ 브라우저와 Node.js 환경 모두에서 동작하며, 데이터 무결성과 무중단 변환을 최우선으로 설계했습니다.
10
+
11
+ ---
12
+
13
+ ## 주요 특징
14
+
15
+ - **Pipeline 체이닝 API** - `Pipeline.open(file).to('hwpx')` 한 줄로 변환
16
+ - **데이터 무결성 100%** - 텍스트, 표, 이미지 누락 없이 변환
17
+ - **무중단 변환** - 어떤 입력이 들어와도 크래시 없이 `Outcome<T>` 반환
18
+ - **4단계 표 폴백** - Full > Grid > Flat > Text 순서로 안전 변환
19
+ - **Result 모나드** - null/throw 대신 `Ok | Fail` 명시적 결과 처리
20
+ - **TypeScript 완전 지원** - 모든 노드 타입과 API에 대한 타입 정의
21
+
22
+ ---
23
+
24
+ ## 변환 지원 현황
25
+
26
+ | 입력 \ 출력 | HWPX | DOCX | Markdown |
27
+ |------------|:----:|:----:|:--------:|
28
+ | **HWPX** | - | O | O |
29
+ | **HWP** | O | O | O |
30
+ | **DOCX** | O | - | O |
31
+ | **Markdown** | O | O | - |
32
+
33
+ ---
34
+
35
+ ## 설치
36
+
37
+ ```bash
38
+ npm install hwpkit
39
+ ```
40
+
41
+ ---
42
+
43
+ ## 사용법
44
+
45
+ ### Pipeline API (권장)
46
+
47
+ ```typescript
48
+ import { Pipeline } from 'hwpkit';
49
+
50
+ // 파일 변환
51
+ const result = await Pipeline.open(uint8ArrayData, 'docx').to('hwpx');
52
+
53
+ if (result.ok) {
54
+ // result.data: Uint8Array (변환된 파일)
55
+ // result.warns: string[] (폴백 발생 시 경고 목록)
56
+ saveFile(result.data);
57
+ } else {
58
+ console.error(result.error);
59
+ }
60
+
61
+ // 문서 구조만 추출 (인코딩 없이)
62
+ const inspectResult = await Pipeline.open(data, 'docx').inspect();
63
+ if (inspectResult.ok) {
64
+ console.log(inspectResult.data); // DocRoot
65
+ }
66
+
67
+ // File/Blob 입력 (비동기)
68
+ const pipeline = await Pipeline.openAsync(file, 'hwpx');
69
+ const converted = await pipeline.to('docx');
70
+
71
+ // Markdown 문자열 직접 입력
72
+ const mdResult = await Pipeline.open('# Hello\n\nWorld').to('docx');
73
+ ```
74
+
75
+ ### Decoder / Encoder 직접 사용
76
+
77
+ ```typescript
78
+ import { DocxDecoder } from 'hwpkit';
79
+ import { MdEncoder } from 'hwpkit';
80
+
81
+ const decoder = new DocxDecoder();
82
+ const encoder = new MdEncoder();
83
+
84
+ const docResult = await decoder.decode(docxBytes);
85
+ if (!docResult.ok) throw new Error(docResult.error);
86
+
87
+ const mdResult = await encoder.encode(docResult.data);
88
+ if (mdResult.ok) {
89
+ const mdText = new TextDecoder().decode(mdResult.data);
90
+ }
91
+ ```
92
+
93
+ ### 문서 모델 직접 구성
94
+
95
+ ```typescript
96
+ import { buildRoot, buildSheet, buildPara, buildSpan, buildGrid, buildRow, buildCell } from 'hwpkit';
97
+
98
+ const doc = buildRoot({ title: '제목' }, [
99
+ buildSheet([
100
+ buildPara([buildSpan('Hello World', { b: true, pt: 14 })], { heading: 1 }),
101
+ buildPara([buildSpan('본문 텍스트입니다.')]),
102
+ buildGrid([
103
+ buildRow([
104
+ buildCell([buildPara([buildSpan('A1')])]),
105
+ buildCell([buildPara([buildSpan('B1')])]),
106
+ ]),
107
+ buildRow([
108
+ buildCell([buildPara([buildSpan('A2')])]),
109
+ buildCell([buildPara([buildSpan('B2')])]),
110
+ ]),
111
+ ]),
112
+ ]),
113
+ ]);
114
+ ```
115
+
116
+ ### 트리 순회
117
+
118
+ ```typescript
119
+ import { TreeWalker, walkNode, countNodes, validateRoot } from 'hwpkit';
120
+
121
+ // 텍스트 추출
122
+ const walker = new TreeWalker();
123
+ const text = walker.extractText(docRoot);
124
+
125
+ // 노드 통계
126
+ const counts = countNodes(docRoot);
127
+ // { root: 1, sheet: 1, para: 5, span: 5, txt: 5, grid: 1, row: 2, cell: 4 }
128
+
129
+ // 유효성 검증
130
+ const errors = validateRoot(docRoot);
131
+ ```
132
+
133
+ ---
134
+
135
+ ## 아키텍처
136
+
137
+ ```
138
+ 입력 파일 --> [ Decoder ] --> [ DocRoot ] --> [ Encoder ] --> 출력 파일
139
+ |
140
+ Pipeline.inspect()
141
+ ```
142
+
143
+ ### 문서 추상 모델 (Doc Model)
144
+
145
+ 모든 문서는 `DocRoot` 트리로 변환되어 포맷 간 변환의 중간 표현으로 사용됩니다.
146
+
147
+ ```
148
+ DocRoot
149
+ └─ SheetNode (섹션/페이지)
150
+ ├─ ParaNode (문단)
151
+ │ ├─ SpanNode (텍스트 런)
152
+ │ │ └─ TxtNode / BrNode / PbNode
153
+ │ ├─ ImgNode (이미지)
154
+ │ └─ LinkNode (하이퍼링크)
155
+ └─ GridNode (표)
156
+ └─ RowNode
157
+ └─ CellNode
158
+ └─ ParaNode ...
159
+ ```
160
+
161
+ ### 안전 계층
162
+
163
+ - **ShieldedParser** - 개별 노드 파싱 실패가 전체를 중단시키지 않음
164
+ - **StyleBridge** - 포맷 간 스타일/단위 변환 (`Metric.*`)
165
+ - **Outcome<T>** - 모든 결과를 `Ok | Fail`로 감싸 null/throw 제거
166
+
167
+ ### 디렉토리 구조
168
+
169
+ ```
170
+ src/
171
+ ├── model/ # 문서 추상 모델 (DocRoot, 속성, 빌더)
172
+ ├── contract/ # Decoder/Encoder 인터페이스, Result 모나드
173
+ ├── pipeline/ # Pipeline 오케스트레이터, 포맷 레지스트리
174
+ ├── decoders/ # 입력 포맷 → DocRoot
175
+ │ ├── docx/ # DocxDecoder
176
+ │ ├── hwpx/ # HwpxDecoder
177
+ │ ├── hwp/ # HwpScanner
178
+ │ └── md/ # MdDecoder
179
+ ├── encoders/ # DocRoot → 출력 포맷
180
+ │ ├── docx/ # DocxEncoder
181
+ │ ├── hwpx/ # HwpxEncoder
182
+ │ └── md/ # MdEncoder
183
+ ├── walk/ # 트리 순회 (TreeWalker, walkNode)
184
+ ├── safety/ # ShieldedParser, StyleBridge
185
+ └── toolkit/ # XmlKit, ArchiveKit, BinaryKit, TextKit
186
+ ```
187
+
188
+ ---
189
+
190
+ ## 개발
191
+
192
+ ```bash
193
+ # 의존성 설치
194
+ npm install
195
+
196
+ # 타입 체크
197
+ npm run typecheck
198
+
199
+ # 테스트 실행
200
+ npm test
201
+
202
+ # 빌드 (ESM + CJS + d.ts)
203
+ npm run build
204
+
205
+ # 개발 모드 (watch)
206
+ npm run dev
207
+ ```
208
+
209
+ ### 의존성
210
+
211
+ | 패키지 | 용도 |
212
+ |--------|------|
213
+ | `pako` | ZIP inflate/deflate |
214
+ | `xml2js` | XML 파싱/빌드 |
215
+ | `saxes` | SAX 스트리밍 파서 (대용량 XML) |
216
+ | `tsup` | 빌드 (esbuild 기반) |
217
+ | `vitest` | 테스트 프레임워크 |
218
+
219
+ ---
220
+
221
+ ## 라이선스
222
+
223
+ 이 프로젝트는 **LGPL-2.1** 라이선스를 따릅니다. 자세한 내용은 [`license.md`](./license.md)를 참고하세요.
@@ -0,0 +1,313 @@
1
+ type Align = 'left' | 'center' | 'right' | 'justify';
2
+ type VAlign = 'top' | 'mid' | 'bot';
3
+ type Heading = 1 | 2 | 3 | 4 | 5 | 6;
4
+ type StrokeKind = 'solid' | 'dash' | 'dot' | 'double' | 'none';
5
+ interface TextProps {
6
+ b?: boolean;
7
+ i?: boolean;
8
+ u?: boolean;
9
+ s?: boolean;
10
+ sup?: boolean;
11
+ sub?: boolean;
12
+ font?: string;
13
+ pt?: number;
14
+ color?: string;
15
+ bg?: string;
16
+ }
17
+ interface ParaProps {
18
+ align?: Align;
19
+ heading?: Heading;
20
+ indentPt?: number;
21
+ spaceBefore?: number;
22
+ spaceAfter?: number;
23
+ lineHeight?: number;
24
+ listLv?: number;
25
+ listOrd?: boolean;
26
+ listMark?: string;
27
+ }
28
+ interface Stroke {
29
+ kind: StrokeKind;
30
+ pt: number;
31
+ color: string;
32
+ }
33
+ interface CellProps {
34
+ top?: Stroke;
35
+ bot?: Stroke;
36
+ left?: Stroke;
37
+ right?: Stroke;
38
+ bg?: string;
39
+ padPt?: number;
40
+ align?: Align;
41
+ va?: VAlign;
42
+ isHeader?: boolean;
43
+ }
44
+ interface TableLook {
45
+ firstRow?: boolean;
46
+ lastRow?: boolean;
47
+ firstCol?: boolean;
48
+ lastCol?: boolean;
49
+ bandedRows?: boolean;
50
+ bandedCols?: boolean;
51
+ }
52
+ interface GridProps {
53
+ widthPct?: number;
54
+ colWidths?: number[];
55
+ defaultStroke?: Stroke;
56
+ look?: TableLook;
57
+ headerRow?: boolean;
58
+ }
59
+ interface PageDims {
60
+ wPt: number;
61
+ hPt: number;
62
+ mt: number;
63
+ mb: number;
64
+ ml: number;
65
+ mr: number;
66
+ orient?: 'portrait' | 'landscape';
67
+ }
68
+ interface DocMeta {
69
+ title?: string;
70
+ author?: string;
71
+ subject?: string;
72
+ desc?: string;
73
+ keywords?: string;
74
+ created?: string;
75
+ modified?: string;
76
+ }
77
+ declare const A4: PageDims;
78
+ declare const DEFAULT_STROKE: Stroke;
79
+
80
+ type BlockTag = 'root' | 'sheet' | 'para' | 'span' | 'txt' | 'img' | 'link' | 'grid' | 'row' | 'cell' | 'br' | 'pb' | 'pagenum';
81
+ interface TxtNode {
82
+ tag: 'txt';
83
+ content: string;
84
+ }
85
+ interface BrNode {
86
+ tag: 'br';
87
+ }
88
+ interface PbNode {
89
+ tag: 'pb';
90
+ }
91
+ interface PageNumNode {
92
+ tag: 'pagenum';
93
+ format?: 'decimal' | 'roman' | 'romanCaps';
94
+ }
95
+ interface ImgNode {
96
+ tag: 'img';
97
+ b64: string;
98
+ mime: 'image/png' | 'image/jpeg' | 'image/gif' | 'image/bmp';
99
+ w: number;
100
+ h: number;
101
+ alt?: string;
102
+ }
103
+ interface SpanNode {
104
+ tag: 'span';
105
+ props: TextProps;
106
+ kids: (TxtNode | BrNode | PbNode | PageNumNode)[];
107
+ }
108
+ interface LinkNode {
109
+ tag: 'link';
110
+ href: string;
111
+ kids: SpanNode[];
112
+ }
113
+ interface ParaNode {
114
+ tag: 'para';
115
+ props: ParaProps;
116
+ kids: (SpanNode | ImgNode | LinkNode)[];
117
+ }
118
+ interface CellNode {
119
+ tag: 'cell';
120
+ cs: number;
121
+ rs: number;
122
+ props: CellProps;
123
+ kids: ParaNode[];
124
+ }
125
+ interface RowNode {
126
+ tag: 'row';
127
+ kids: CellNode[];
128
+ }
129
+ interface GridNode {
130
+ tag: 'grid';
131
+ props: GridProps;
132
+ kids: RowNode[];
133
+ }
134
+ type ContentNode = ParaNode | GridNode;
135
+ interface SheetNode {
136
+ tag: 'sheet';
137
+ dims: PageDims;
138
+ kids: ContentNode[];
139
+ header?: ParaNode[];
140
+ footer?: ParaNode[];
141
+ }
142
+ interface DocRoot {
143
+ tag: 'root';
144
+ meta: DocMeta;
145
+ kids: SheetNode[];
146
+ }
147
+ type AnyNode = DocRoot | SheetNode | ParaNode | SpanNode | TxtNode | ImgNode | LinkNode | GridNode | RowNode | CellNode | BrNode | PbNode | PageNumNode;
148
+
149
+ type Outcome<T> = Ok<T> | Fail;
150
+ interface Ok<T> {
151
+ ok: true;
152
+ data: T;
153
+ warns: string[];
154
+ }
155
+ interface Fail {
156
+ ok: false;
157
+ error: string;
158
+ warns: string[];
159
+ }
160
+ declare function succeed<T>(data: T, warns?: string[]): Ok<T>;
161
+ declare function fail(error: string, warns?: string[]): Fail;
162
+
163
+ interface Decoder {
164
+ readonly format: string;
165
+ decode(data: Uint8Array): Promise<Outcome<DocRoot>>;
166
+ }
167
+
168
+ interface Encoder {
169
+ readonly format: string;
170
+ encode(doc: DocRoot): Promise<Outcome<Uint8Array>>;
171
+ }
172
+
173
+ declare class Pipeline {
174
+ private raw;
175
+ private srcFmt;
176
+ private constructor();
177
+ /** 파일을 열고 포맷을 자동 감지하거나 명시 */
178
+ static open(input: Uint8Array | string, fmt?: string): Pipeline;
179
+ /** File/Blob 비동기 입력 */
180
+ static openAsync(input: File | Blob | Uint8Array | string, fmt?: string): Promise<Pipeline>;
181
+ /** 목표 포맷으로 변환 */
182
+ to(targetFmt: string): Promise<Outcome<Uint8Array>>;
183
+ /** DocRoot만 추출 (인코딩 없이) */
184
+ inspect(): Promise<Outcome<DocRoot>>;
185
+ }
186
+
187
+ declare class FormatRegistry {
188
+ private decoders;
189
+ private encoders;
190
+ registerDecoder(d: Decoder): void;
191
+ registerEncoder(e: Encoder): void;
192
+ getDecoder(fmt: string): Decoder | undefined;
193
+ getEncoder(fmt: string): Encoder | undefined;
194
+ supportedInputs(): string[];
195
+ supportedOutputs(): string[];
196
+ }
197
+ declare const registry: FormatRegistry;
198
+
199
+ declare function buildRoot(meta?: DocMeta, kids?: SheetNode[]): DocRoot;
200
+ declare function buildSheet(kids?: ContentNode[], dims?: PageDims, opts?: {
201
+ header?: ParaNode[];
202
+ footer?: ParaNode[];
203
+ }): SheetNode;
204
+ declare function buildPageNum(format?: PageNumNode['format']): PageNumNode;
205
+ declare function buildPara(kids?: ParaNode['kids'], props?: ParaProps): ParaNode;
206
+ declare function buildSpan(content: string, props?: TextProps): SpanNode;
207
+ declare function buildImg(b64: string, mime: ImgNode['mime'], w: number, h: number, alt?: string): ImgNode;
208
+ declare function buildGrid(kids: RowNode[], props?: GridProps): GridNode;
209
+ declare function buildRow(kids: CellNode[]): RowNode;
210
+ declare function buildCell(kids: ParaNode[], opts?: {
211
+ cs?: number;
212
+ rs?: number;
213
+ props?: CellProps;
214
+ }): CellNode;
215
+
216
+ declare class ShieldedParser {
217
+ private log;
218
+ /** 단일 요소 안전 파싱 */
219
+ guard<T>(fn: () => T, fallback: T, label: string): T;
220
+ /** 배열 각 요소 독립 파싱 (하나 실패해도 나머지 계속) */
221
+ guardAll<I, O>(items: I[], fn: (x: I, i: number) => O, fb: (x: I, i: number) => O, label: string): O[];
222
+ /**
223
+ * 표 전용 4단계 폴백
224
+ * Lv1: Full → Lv2: Grid → Lv3: Flat → Lv4: Text
225
+ */
226
+ guardGrid<T>(node: unknown, lv1Full: (n: unknown) => T, lv2Grid: (n: unknown) => T, lv3Flat: (n: unknown) => T, lv4Text: (n: unknown) => T, label: string): {
227
+ value: T;
228
+ level: 1 | 2 | 3 | 4;
229
+ };
230
+ /** 이미지 안전 파싱 */
231
+ guardImg<T>(node: unknown, fn: (n: unknown) => T, placeholder: (alt: string) => T, label: string): T;
232
+ private warn;
233
+ flush(): string[];
234
+ }
235
+
236
+ declare const Metric: {
237
+ readonly hwpToPt: (v: number) => number;
238
+ readonly ptToHwp: (v: number) => number;
239
+ readonly hwpToDxa: (v: number) => number;
240
+ readonly dxaToHwp: (v: number) => number;
241
+ readonly hwpToEmu: (v: number) => number;
242
+ readonly emuToHwp: (v: number) => number;
243
+ readonly dxaToPt: (v: number) => number;
244
+ readonly ptToDxa: (v: number) => number;
245
+ readonly dxaToEmu: (v: number) => number;
246
+ readonly emuToDxa: (v: number) => number;
247
+ readonly emuToPt: (v: number) => number;
248
+ readonly ptToEmu: (v: number) => number;
249
+ readonly hHeightToPt: (v: number) => number;
250
+ readonly ptToHHeight: (v: number) => number;
251
+ readonly halfPtToPt: (v: number) => number;
252
+ readonly ptToHalfPt: (v: number) => number;
253
+ };
254
+ declare function safeHex(raw: string | number | null | undefined): string | undefined;
255
+ declare function safeAlign(raw?: string): Align;
256
+ declare function safeStrokeHwpx(type?: string, w?: number, c?: string): Stroke;
257
+ declare function safeStrokeDocx(val?: string, sz?: number, c?: string): Stroke;
258
+ declare function safeFont(raw?: string): string;
259
+ declare function safeFontToKr(raw?: string): string;
260
+
261
+ type WalkCallback = (node: AnyNode, parent: AnyNode | null, depth: number) => void | 'stop';
262
+ declare function walkNode(node: AnyNode, cb: WalkCallback, parent?: AnyNode | null, depth?: number): boolean;
263
+ declare class TreeWalker {
264
+ walk(root: DocRoot, cb: WalkCallback): void;
265
+ findAll<T extends AnyNode>(root: DocRoot, predicate: (n: AnyNode) => n is T): T[];
266
+ extractText(root: DocRoot): string;
267
+ }
268
+
269
+ declare function countNodes(root: DocRoot): Record<string, number>;
270
+ declare function validateRoot(root: DocRoot): string[];
271
+
272
+ declare const XmlKit: {
273
+ /** @deprecated Use parseStrict instead */
274
+ parse(xml: string): Promise<unknown>;
275
+ parseStrict(xml: string): Promise<unknown>;
276
+ attr(node: Record<string, unknown>, key: string): string | undefined;
277
+ text(node: Record<string, unknown> | string | undefined): string;
278
+ };
279
+
280
+ interface ZipEntry {
281
+ name: string;
282
+ data: Uint8Array;
283
+ }
284
+ declare const ArchiveKit: {
285
+ inflate(compressed: Uint8Array): Promise<Uint8Array>;
286
+ deflate(data: Uint8Array): Promise<Uint8Array>;
287
+ unzip(zipData: Uint8Array): Promise<Map<string, Uint8Array>>;
288
+ zip(entries: ZipEntry[]): Promise<Uint8Array>;
289
+ };
290
+
291
+ /**
292
+ * OLE2 Compound File Binary Format (CFB) parser.
293
+ * Used for legacy HWP 5.0 files.
294
+ */
295
+ declare const BinaryKit: {
296
+ readU16LE(buf: Uint8Array, offset: number): number;
297
+ readU32LE(buf: Uint8Array, offset: number): number;
298
+ isOle2(data: Uint8Array): boolean;
299
+ parseCfb(data: Uint8Array): Map<string, Uint8Array>;
300
+ };
301
+
302
+ declare const TextKit: {
303
+ decode(data: Uint8Array, encoding?: string): string;
304
+ encode(text: string): Uint8Array;
305
+ escapeXml(s: string): string;
306
+ unescapeXml(s: string): string;
307
+ normalizeWhitespace(s: string): string;
308
+ stripControl(s: string): string;
309
+ base64Encode(data: Uint8Array): string;
310
+ base64Decode(b64: string): Uint8Array;
311
+ };
312
+
313
+ export { A4, type Align, type AnyNode, ArchiveKit, BinaryKit, type BlockTag, type BrNode, type CellNode, type CellProps, type ContentNode, DEFAULT_STROKE, type Decoder, type DocMeta, type DocRoot, type Encoder, type Fail, type GridNode, type GridProps, type Heading, type ImgNode, type LinkNode, Metric, type Ok, type Outcome, type PageDims, type PageNumNode, type ParaNode, type ParaProps, type PbNode, Pipeline, type RowNode, type SheetNode, ShieldedParser, type SpanNode, type Stroke, type StrokeKind, type TableLook, TextKit, type TextProps, TreeWalker, type TxtNode, type VAlign, XmlKit, buildCell, buildGrid, buildImg, buildPageNum, buildPara, buildRoot, buildRow, buildSheet, buildSpan, countNodes, fail, registry, safeAlign, safeFont, safeFontToKr, safeHex, safeStrokeDocx, safeStrokeHwpx, succeed, validateRoot, walkNode };