hwp-convert 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +185 -0
- package/LICENSE +25 -0
- package/NOTICE +23 -0
- package/README.md +338 -0
- package/dist/browser/hwp-convert.browser.mjs +20677 -0
- package/dist/browser/hwp-convert.browser.mjs.map +7 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +267 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/dist/lib/errors.d.ts +9 -0
- package/dist/lib/errors.js +18 -0
- package/dist/lib/hwp/binData.d.ts +15 -0
- package/dist/lib/hwp/binData.js +64 -0
- package/dist/lib/hwp/bodyText.d.ts +31 -0
- package/dist/lib/hwp/bodyText.js +208 -0
- package/dist/lib/hwp/byteReader.d.ts +40 -0
- package/dist/lib/hwp/byteReader.js +116 -0
- package/dist/lib/hwp/cfbReader.d.ts +44 -0
- package/dist/lib/hwp/cfbReader.js +134 -0
- package/dist/lib/hwp/control.d.ts +17 -0
- package/dist/lib/hwp/control.js +290 -0
- package/dist/lib/hwp/converter.d.ts +22 -0
- package/dist/lib/hwp/converter.js +41 -0
- package/dist/lib/hwp/docInfo.d.ts +26 -0
- package/dist/lib/hwp/docInfo.js +396 -0
- package/dist/lib/hwp/fileHeader.d.ts +42 -0
- package/dist/lib/hwp/fileHeader.js +66 -0
- package/dist/lib/hwp/htmlReader.d.ts +17 -0
- package/dist/lib/hwp/htmlReader.js +602 -0
- package/dist/lib/hwp/hwpxBuilder.d.ts +19 -0
- package/dist/lib/hwp/hwpxBuilder.js +633 -0
- package/dist/lib/hwp/index.d.ts +68 -0
- package/dist/lib/hwp/index.js +149 -0
- package/dist/lib/hwp/mdReader.d.ts +16 -0
- package/dist/lib/hwp/mdReader.js +485 -0
- package/dist/lib/hwp/mdWriter.d.ts +23 -0
- package/dist/lib/hwp/mdWriter.js +182 -0
- package/dist/lib/hwp/owpml.d.ts +33 -0
- package/dist/lib/hwp/owpml.js +86 -0
- package/dist/lib/hwp/record.d.ts +24 -0
- package/dist/lib/hwp/record.js +59 -0
- package/dist/lib/hwp/tags.d.ts +115 -0
- package/dist/lib/hwp/tags.js +217 -0
- package/dist/lib/hwp/types.d.ts +214 -0
- package/dist/lib/hwp/types.js +5 -0
- package/dist/lib/hwpxReader.d.ts +60 -0
- package/dist/lib/hwpxReader.js +1104 -0
- package/dist/lib/types.d.ts +47 -0
- package/dist/lib/types.js +1 -0
- package/dist/lib/writer.d.ts +19 -0
- package/dist/lib/writer.js +149 -0
- package/package.json +94 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HWP 문서 IR (Intermediate Representation) 타입.
|
|
3
|
+
* rhwp 의 model 모듈을 단순화하여 HWPX 변환에 필요한 정보만 정의.
|
|
4
|
+
*/
|
|
5
|
+
import type { FileHeader } from "./fileHeader.js";
|
|
6
|
+
/**
|
|
7
|
+
* 이미지 src(데이터 URI 가 아닌 file://·로컬/상대 경로)를 바이트로 해석하는 resolver.
|
|
8
|
+
* 코어는 이를 주입받기만 한다(브라우저 안전). Node(CLI) 측에서 fs 기반 구현을 제공한다.
|
|
9
|
+
* null 을 반환하면 해당 이미지는 스킵된다.
|
|
10
|
+
*/
|
|
11
|
+
export type ImageResolver = (src: string) => {
|
|
12
|
+
data: Uint8Array;
|
|
13
|
+
extension: string;
|
|
14
|
+
} | null;
|
|
15
|
+
/** md/html → HwpDocument 변환 옵션. */
|
|
16
|
+
export interface ConvertOptions {
|
|
17
|
+
imageResolver?: ImageResolver;
|
|
18
|
+
}
|
|
19
|
+
export interface HwpRun {
|
|
20
|
+
/** 글자 모양 ID (DocInfo의 CHAR_SHAPE 인덱스) */
|
|
21
|
+
charShapeId: number;
|
|
22
|
+
/** 텍스트 — 일반 문자만 (컨트롤 문자는 별도 controls 에 표시) */
|
|
23
|
+
text: string;
|
|
24
|
+
}
|
|
25
|
+
export interface HwpParagraph {
|
|
26
|
+
paraShapeId: number;
|
|
27
|
+
styleId: number;
|
|
28
|
+
/** 문단 텍스트 (모든 run을 이은 평문) */
|
|
29
|
+
text: string;
|
|
30
|
+
/** 글자 모양 변화점 기준 run 분할 */
|
|
31
|
+
runs: HwpRun[];
|
|
32
|
+
/** 인라인 컨트롤들 (표/그림/필드/머리말 등) */
|
|
33
|
+
controls: HwpControl[];
|
|
34
|
+
}
|
|
35
|
+
export interface HwpSection {
|
|
36
|
+
paragraphs: HwpParagraph[];
|
|
37
|
+
}
|
|
38
|
+
export type HwpControl = HwpTableControl | HwpPictureControl | HwpHeaderControl | HwpFooterControl | HwpFootnoteControl | HwpFieldControl | HwpShapeControl | HwpEquationControl | HwpUnknownControl;
|
|
39
|
+
export interface HwpShapeControl {
|
|
40
|
+
kind: "shape";
|
|
41
|
+
/** "line" | "rectangle" | "ellipse" | "arc" | "polygon" | "curve" */
|
|
42
|
+
shapeType: "line" | "rectangle" | "ellipse" | "arc" | "polygon" | "curve";
|
|
43
|
+
/** 시작 좌표 (HWPUNIT) */
|
|
44
|
+
x1?: number;
|
|
45
|
+
y1?: number;
|
|
46
|
+
/** 끝 좌표 (HWPUNIT) */
|
|
47
|
+
x2?: number;
|
|
48
|
+
y2?: number;
|
|
49
|
+
}
|
|
50
|
+
export interface HwpEquationControl {
|
|
51
|
+
kind: "equation";
|
|
52
|
+
/** 수식 스크립트 (HWP 자체 수식 언어, LaTeX 와 비슷하지만 다름) */
|
|
53
|
+
script: string;
|
|
54
|
+
}
|
|
55
|
+
export interface HwpTableControl {
|
|
56
|
+
kind: "table";
|
|
57
|
+
rowCount: number;
|
|
58
|
+
colCount: number;
|
|
59
|
+
cells: HwpTableCell[];
|
|
60
|
+
}
|
|
61
|
+
export interface HwpTableCell {
|
|
62
|
+
col: number;
|
|
63
|
+
row: number;
|
|
64
|
+
colSpan: number;
|
|
65
|
+
rowSpan: number;
|
|
66
|
+
paragraphs: HwpParagraph[];
|
|
67
|
+
}
|
|
68
|
+
export interface HwpPictureControl {
|
|
69
|
+
kind: "picture";
|
|
70
|
+
/** DocInfo BIN_DATA 의 storageId */
|
|
71
|
+
binDataId: number;
|
|
72
|
+
}
|
|
73
|
+
export interface HwpHeaderControl {
|
|
74
|
+
kind: "header";
|
|
75
|
+
paragraphs: HwpParagraph[];
|
|
76
|
+
}
|
|
77
|
+
export interface HwpFooterControl {
|
|
78
|
+
kind: "footer";
|
|
79
|
+
paragraphs: HwpParagraph[];
|
|
80
|
+
}
|
|
81
|
+
export interface HwpFootnoteControl {
|
|
82
|
+
kind: "footnote";
|
|
83
|
+
paragraphs: HwpParagraph[];
|
|
84
|
+
}
|
|
85
|
+
export interface HwpFieldControl {
|
|
86
|
+
kind: "field";
|
|
87
|
+
/** "%hlk", "%clk", "%dte" 등 */
|
|
88
|
+
ctrlId: string;
|
|
89
|
+
/** 필드 명령 (UTF-16) */
|
|
90
|
+
command?: string;
|
|
91
|
+
}
|
|
92
|
+
export interface HwpUnknownControl {
|
|
93
|
+
kind: "unknown";
|
|
94
|
+
/** 4글자 ctrl_id ("tbl ", "secd", "cold", "$pic" 등) */
|
|
95
|
+
ctrlId: string;
|
|
96
|
+
}
|
|
97
|
+
export interface HwpDocInfo {
|
|
98
|
+
/** 언어별 폰트 그룹 [HANGUL, LATIN, HANJA, JAPANESE, OTHER, SYMBOL, USER] */
|
|
99
|
+
fontFaces: HwpFaceName[][];
|
|
100
|
+
charShapes: HwpCharShape[];
|
|
101
|
+
paraShapes: HwpParaShape[];
|
|
102
|
+
styles: HwpStyle[];
|
|
103
|
+
binData: HwpBinDataRef[];
|
|
104
|
+
/** 테두리/채우기 정의 — paraShape 의 borderFillIDRef 가 참조 */
|
|
105
|
+
borderFills: HwpBorderFill[];
|
|
106
|
+
/** 번호 매기기 — paraShape 의 numberingIDRef 가 참조 */
|
|
107
|
+
numberings: HwpNumbering[];
|
|
108
|
+
/** 글머리표 */
|
|
109
|
+
bullets: HwpBullet[];
|
|
110
|
+
/** 탭 정의 — paraShape 의 tabPrIDRef 가 참조 */
|
|
111
|
+
tabDefs: HwpTabDef[];
|
|
112
|
+
}
|
|
113
|
+
export interface HwpBorderLine {
|
|
114
|
+
/** 0=NONE, 1=SOLID, 2=DASH, 3=DOT, 4=DASH_DOT, 5=DASH_DOT_DOT, 6=LONG_DASH, 7=CIRCLE,
|
|
115
|
+
* 8=DOUBLE, 9=THIN_THICK_DOUBLE, 10=THICK_THIN_DOUBLE, 11=THIN_THICK_THIN_TRIPLE,
|
|
116
|
+
* 12=WAVE, 13=DOUBLE_WAVE, 14=THICK_3D, 15=THICK_3D_REVERSE, 16=THIN_3D, 17=THIN_3D_REVERSE */
|
|
117
|
+
lineType: number;
|
|
118
|
+
/** HWP 너비 인덱스 (0..15). mm 변환은 BORDER_WIDTH_MM 표 참조. */
|
|
119
|
+
widthIndex: number;
|
|
120
|
+
/** 0xAABBGGRR */
|
|
121
|
+
color: number;
|
|
122
|
+
}
|
|
123
|
+
export interface HwpDiagonalLine {
|
|
124
|
+
/** 0=NONE, 1=FORWARD, 2=BACKWARD, 3=CROSS */
|
|
125
|
+
diagonalType: number;
|
|
126
|
+
widthIndex: number;
|
|
127
|
+
color: number;
|
|
128
|
+
}
|
|
129
|
+
export interface HwpSolidFill {
|
|
130
|
+
/** 0xAABBGGRR */
|
|
131
|
+
backgroundColor: number;
|
|
132
|
+
patternColor: number;
|
|
133
|
+
/** -1=NONE, 0=HORIZONTAL, 1=VERTICAL, ... */
|
|
134
|
+
patternType: number;
|
|
135
|
+
}
|
|
136
|
+
export interface HwpBorderFill {
|
|
137
|
+
/** 비트필드 속성 (3D, shadow, centerLine 등) */
|
|
138
|
+
attr: number;
|
|
139
|
+
/** [left, right, top, bottom] */
|
|
140
|
+
borders: [HwpBorderLine, HwpBorderLine, HwpBorderLine, HwpBorderLine];
|
|
141
|
+
diagonal: HwpDiagonalLine;
|
|
142
|
+
/** 채우기. solid 만 보존 (gradient/image 는 미보존) */
|
|
143
|
+
fill?: HwpSolidFill;
|
|
144
|
+
}
|
|
145
|
+
export interface HwpNumbering {
|
|
146
|
+
startNumber: number;
|
|
147
|
+
/** 수준별 (1~7) 번호 형식 문자열 (예: "^1.", "^1.^2.", "^1)") */
|
|
148
|
+
levelFormats: string[];
|
|
149
|
+
}
|
|
150
|
+
export interface HwpBullet {
|
|
151
|
+
/** 글머리 문자 (예: ●, ○, ■, …) */
|
|
152
|
+
bulletChar: string;
|
|
153
|
+
}
|
|
154
|
+
export interface HwpTabDef {
|
|
155
|
+
attr: number;
|
|
156
|
+
autoTabLeft: boolean;
|
|
157
|
+
autoTabRight: boolean;
|
|
158
|
+
}
|
|
159
|
+
export interface HwpFaceName {
|
|
160
|
+
name: string;
|
|
161
|
+
substituteName?: string;
|
|
162
|
+
}
|
|
163
|
+
export interface HwpCharShape {
|
|
164
|
+
faceNameIds: {
|
|
165
|
+
hangul: number;
|
|
166
|
+
latin: number;
|
|
167
|
+
hanja: number;
|
|
168
|
+
japanese: number;
|
|
169
|
+
other: number;
|
|
170
|
+
symbol: number;
|
|
171
|
+
user: number;
|
|
172
|
+
};
|
|
173
|
+
baseSize: number;
|
|
174
|
+
property: number;
|
|
175
|
+
textColor: number;
|
|
176
|
+
shadeColor: number;
|
|
177
|
+
underlineColor: number;
|
|
178
|
+
shadowColor: number;
|
|
179
|
+
outlineColor?: number;
|
|
180
|
+
bold: boolean;
|
|
181
|
+
italic: boolean;
|
|
182
|
+
underline: boolean;
|
|
183
|
+
strikeout: boolean;
|
|
184
|
+
}
|
|
185
|
+
export interface HwpParaShape {
|
|
186
|
+
alignment: "left" | "right" | "center" | "justify" | "distribute" | "distributeSpace" | "unknown";
|
|
187
|
+
property: number;
|
|
188
|
+
leftMargin: number;
|
|
189
|
+
rightMargin: number;
|
|
190
|
+
indent: number;
|
|
191
|
+
prevSpacing: number;
|
|
192
|
+
nextSpacing: number;
|
|
193
|
+
lineSpacing: number;
|
|
194
|
+
}
|
|
195
|
+
export interface HwpStyle {
|
|
196
|
+
name: string;
|
|
197
|
+
engName?: string;
|
|
198
|
+
paraShapeId: number;
|
|
199
|
+
charShapeId: number;
|
|
200
|
+
}
|
|
201
|
+
export interface HwpBinDataRef {
|
|
202
|
+
storageId: number;
|
|
203
|
+
extension?: string;
|
|
204
|
+
type: "embedding" | "storage" | "link";
|
|
205
|
+
}
|
|
206
|
+
export interface HwpDocument {
|
|
207
|
+
header: FileHeader;
|
|
208
|
+
docInfo: HwpDocInfo;
|
|
209
|
+
sections: HwpSection[];
|
|
210
|
+
binData: Map<number, {
|
|
211
|
+
data: Uint8Array;
|
|
212
|
+
extension: string;
|
|
213
|
+
}>;
|
|
214
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { HwpxDocumentInfo, HwpxReaderApi as HwpxReaderInterface, HwpxTextExtractOptions, HwpxHtmlOptions } from "./types.js";
|
|
2
|
+
export declare class HwpxReader implements HwpxReaderInterface {
|
|
3
|
+
private zip;
|
|
4
|
+
private files;
|
|
5
|
+
private encryptedCache;
|
|
6
|
+
private characterProperties;
|
|
7
|
+
private fontFaces;
|
|
8
|
+
loadFromArrayBuffer(buffer: ArrayBuffer): Promise<void>;
|
|
9
|
+
private isLikelyHwpxMime;
|
|
10
|
+
private getTextFile;
|
|
11
|
+
private findFilePathIgnoreCase;
|
|
12
|
+
private parseXml;
|
|
13
|
+
private summarizePackage;
|
|
14
|
+
private getSectionPathsBySpine;
|
|
15
|
+
private detectEncryption;
|
|
16
|
+
private containsEncryptionMarker;
|
|
17
|
+
private readMetadata;
|
|
18
|
+
getDocumentInfo(): Promise<HwpxDocumentInfo>;
|
|
19
|
+
extractText(options?: HwpxTextExtractOptions): Promise<string>;
|
|
20
|
+
/**
|
|
21
|
+
* 한 문단(<hp:p>)에서 텍스트를 추출. 표/이미지 등 인라인 컨트롤이 있으면 셀/내부 문단을 재귀 탐색.
|
|
22
|
+
*
|
|
23
|
+
* 표는 셀 단위로 텍스트를 모은 후 같은 행 내 셀은 공백으로, 행 사이는 줄바꿈으로 결합한다.
|
|
24
|
+
*/
|
|
25
|
+
private extractParagraphText;
|
|
26
|
+
private extractTableText;
|
|
27
|
+
private extractCellText;
|
|
28
|
+
/**
|
|
29
|
+
* 문서 전체를 Markdown 으로 변환.
|
|
30
|
+
* 표는 마크다운 표 (셀 병합은 평탄화), 이미지는 ``.
|
|
31
|
+
*/
|
|
32
|
+
extractMarkdown(options?: {
|
|
33
|
+
embedImages?: boolean;
|
|
34
|
+
imageSrcResolver?: (binPath: string) => string;
|
|
35
|
+
}): Promise<string>;
|
|
36
|
+
private extractParagraphMarkdown;
|
|
37
|
+
private extractTableMarkdown;
|
|
38
|
+
applyTemplateToText(raw: string, data: Record<string, unknown>): string;
|
|
39
|
+
extractHtml(options?: HwpxHtmlOptions): Promise<string>;
|
|
40
|
+
private collectTablesInParagraph;
|
|
41
|
+
private renderTableHtml;
|
|
42
|
+
private renderCellContentHtml;
|
|
43
|
+
private getAlignStyle;
|
|
44
|
+
private renderNodeToHtml;
|
|
45
|
+
private collectAllText;
|
|
46
|
+
private renderRunToHtml;
|
|
47
|
+
private findBinRefInRun;
|
|
48
|
+
private resolveBinaryPath;
|
|
49
|
+
private normalizeColor;
|
|
50
|
+
private normalizeSize;
|
|
51
|
+
private convertHwpUnitToPoints;
|
|
52
|
+
private parseStyleDefinitions;
|
|
53
|
+
private processCharacterProperties;
|
|
54
|
+
private detectMimeType;
|
|
55
|
+
private toBase64;
|
|
56
|
+
private extractTextFromNode;
|
|
57
|
+
private escapeHtml;
|
|
58
|
+
listImages(): Promise<string[]>;
|
|
59
|
+
}
|
|
60
|
+
export default HwpxReader;
|