hwpkit-dev 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +11 -0
- package/README.md +223 -0
- package/dist/index.d.mts +313 -0
- package/dist/index.d.ts +317 -0
- package/dist/index.js +3546 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +3479 -0
- package/dist/index.mjs.map +1 -0
- package/license.md +136 -0
- package/package.json +45 -0
- package/src/contract/decoder.ts +7 -0
- package/src/contract/encoder.ts +7 -0
- package/src/contract/result.ts +21 -0
- package/src/decoders/docx/DocxDecoder.ts +986 -0
- package/src/decoders/hwp/HwpScanner.ts +809 -0
- package/src/decoders/hwpx/HwpxDecoder.ts +759 -0
- package/src/decoders/md/MdDecoder.ts +180 -0
- package/src/encoders/docx/DocxEncoder.ts +710 -0
- package/src/encoders/hwp/HwpEncoder.ts +711 -0
- package/src/encoders/hwpx/HwpxEncoder.ts +770 -0
- package/src/encoders/md/MdEncoder.ts +108 -0
- package/src/index.ts +47 -0
- package/src/model/builders.ts +66 -0
- package/src/model/doc-props.ts +138 -0
- package/src/model/doc-tree.ts +90 -0
- package/src/pipeline/Pipeline.ts +71 -0
- package/src/pipeline/registry.ts +18 -0
- package/src/safety/ShieldedParser.ts +91 -0
- package/src/safety/StyleBridge.ts +106 -0
- package/src/toolkit/ArchiveKit.ts +150 -0
- package/src/toolkit/BinaryKit.ts +187 -0
- package/src/toolkit/TextKit.ts +57 -0
- package/src/toolkit/XmlKit.ts +91 -0
- package/src/walk/TreeWalker.ts +42 -0
- package/src/walk/tree-ops.ts +26 -0
- package/tsconfig.json +23 -0
- package/tsup.config.ts +12 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import type { Decoder } from '../../contract/decoder';
|
|
2
|
+
import type { DocRoot, ContentNode, ParaNode, SpanNode, ImgNode } from '../../model/doc-tree';
|
|
3
|
+
import type { Outcome } from '../../contract/result';
|
|
4
|
+
import type { ParaProps, TextProps } from '../../model/doc-props';
|
|
5
|
+
import { A4 } from '../../model/doc-props';
|
|
6
|
+
import { succeed, fail } from '../../contract/result';
|
|
7
|
+
import { buildRoot, buildSheet, buildPara, buildSpan, buildImg, buildGrid, buildRow, buildCell } from '../../model/builders';
|
|
8
|
+
import { ShieldedParser } from '../../safety/ShieldedParser';
|
|
9
|
+
import { TextKit } from '../../toolkit/TextKit';
|
|
10
|
+
import { registry } from '../../pipeline/registry';
|
|
11
|
+
|
|
12
|
+
export class MdDecoder implements Decoder {
|
|
13
|
+
readonly format = 'md';
|
|
14
|
+
|
|
15
|
+
async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
|
|
16
|
+
const shield = new ShieldedParser();
|
|
17
|
+
const warns: string[] = [];
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
const text = TextKit.decode(data);
|
|
21
|
+
const lines = text.split(/\r?\n/);
|
|
22
|
+
const kids: ContentNode[] = [];
|
|
23
|
+
|
|
24
|
+
let i = 0;
|
|
25
|
+
while (i < lines.length) {
|
|
26
|
+
const line = lines[i];
|
|
27
|
+
|
|
28
|
+
// Heading
|
|
29
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
30
|
+
if (headingMatch) {
|
|
31
|
+
const level = headingMatch[1].length as 1 | 2 | 3 | 4 | 5 | 6;
|
|
32
|
+
kids.push(buildPara([buildSpan(headingMatch[2], { b: level <= 2 })], { heading: level }));
|
|
33
|
+
i++; continue;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Table (pipe + separator line)
|
|
37
|
+
if (line.includes('|') && i + 1 < lines.length && lines[i + 1].match(/^\s*\|?\s*[-:]+\s*\|/)) {
|
|
38
|
+
const tableResult = shield.guard(() => parseMdTable(lines, i), null, `md:table@${i}`);
|
|
39
|
+
if (tableResult) { kids.push(tableResult.node); i = tableResult.nextLine; continue; }
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// HR
|
|
43
|
+
if (line.match(/^[-*_]{3,}$/)) { kids.push(buildPara([buildSpan('')], {})); i++; continue; }
|
|
44
|
+
|
|
45
|
+
// List item
|
|
46
|
+
const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+)$/);
|
|
47
|
+
if (listMatch) {
|
|
48
|
+
kids.push(buildPara(parseInline(listMatch[3]), {
|
|
49
|
+
listLv: Math.floor(listMatch[1].length / 2),
|
|
50
|
+
listOrd: /\d+\./.test(listMatch[2]),
|
|
51
|
+
}));
|
|
52
|
+
i++; continue;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Blockquote
|
|
56
|
+
const bqMatch = line.match(/^>\s*(.*)$/);
|
|
57
|
+
if (bqMatch) { kids.push(buildPara([buildSpan(bqMatch[1])], { indentPt: 28 })); i++; continue; }
|
|
58
|
+
|
|
59
|
+
// Code block
|
|
60
|
+
if (line.startsWith('```')) {
|
|
61
|
+
const codeLines: string[] = [];
|
|
62
|
+
i++;
|
|
63
|
+
while (i < lines.length && !lines[i].startsWith('```')) { codeLines.push(lines[i]); i++; }
|
|
64
|
+
i++;
|
|
65
|
+
kids.push(buildPara([buildSpan(codeLines.join('\n'), { font: 'Courier New' })], {}));
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Empty line
|
|
70
|
+
if (line.trim() === '') { i++; continue; }
|
|
71
|
+
|
|
72
|
+
// Regular paragraph — check for alignment div
|
|
73
|
+
const alignMatch = line.match(/^<div\s+align="(center|right|left)">(.*?)<\/div>$/i);
|
|
74
|
+
if (alignMatch) {
|
|
75
|
+
const align = alignMatch[1].toLowerCase() as 'left' | 'center' | 'right';
|
|
76
|
+
kids.push(buildPara(parseInline(alignMatch[2]), { align }));
|
|
77
|
+
i++; continue;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Regular paragraph
|
|
81
|
+
kids.push(buildPara(parseInline(line), {}));
|
|
82
|
+
i++;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
warns.push(...shield.flush());
|
|
86
|
+
const sheet = buildSheet(kids.length > 0 ? kids : [buildPara([buildSpan('')])], A4);
|
|
87
|
+
return succeed(buildRoot({}, [sheet]), warns);
|
|
88
|
+
} catch (e: any) {
|
|
89
|
+
warns.push(...shield.flush());
|
|
90
|
+
return fail(`MD decode error: ${e?.message ?? String(e)}`, warns);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function parseInline(text: string): (SpanNode | ImgNode)[] {
|
|
96
|
+
const result: (SpanNode | ImgNode)[] = [];
|
|
97
|
+
let rem = text;
|
|
98
|
+
|
|
99
|
+
while (rem.length > 0) {
|
|
100
|
+
// Image: 
|
|
101
|
+
let m = rem.match(/^(.*?)!\[([^\]]*)\]\((data:([^;]+);base64,([^)]+))\)(.*)/s);
|
|
102
|
+
if (m) {
|
|
103
|
+
if (m[1]) result.push(buildSpan(m[1]));
|
|
104
|
+
const mime = m[4] as ImgNode['mime'];
|
|
105
|
+
const validMimes = ['image/png', 'image/jpeg', 'image/gif', 'image/bmp'];
|
|
106
|
+
result.push(buildImg(m[5], validMimes.includes(mime) ? mime : 'image/png', 100, 100, m[2] || undefined));
|
|
107
|
+
rem = m[6]; continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Image:  — non-base64
|
|
111
|
+
m = rem.match(/^(.*?)!\[([^\]]*)\]\(([^)]+)\)(.*)/s);
|
|
112
|
+
if (m) {
|
|
113
|
+
if (m[1]) result.push(buildSpan(m[1]));
|
|
114
|
+
// Can't convert URL to base64, just preserve alt text
|
|
115
|
+
result.push(buildSpan(`[이미지: ${m[2] || m[3]}]`));
|
|
116
|
+
rem = m[4]; continue;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Bold+italic
|
|
120
|
+
m = rem.match(/^(.*?)\*\*\*(.+?)\*\*\*(.*)/s);
|
|
121
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { b: true, i: true })); rem = m[3]; continue; }
|
|
122
|
+
|
|
123
|
+
// Bold
|
|
124
|
+
m = rem.match(/^(.*?)\*\*(.+?)\*\*(.*)/s);
|
|
125
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { b: true })); rem = m[3]; continue; }
|
|
126
|
+
|
|
127
|
+
// Italic
|
|
128
|
+
m = rem.match(/^(.*?)\*(.+?)\*(.*)/s);
|
|
129
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { i: true })); rem = m[3]; continue; }
|
|
130
|
+
|
|
131
|
+
// Strikethrough ~~text~~
|
|
132
|
+
m = rem.match(/^(.*?)~~(.+?)~~(.*)/s);
|
|
133
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { s: true })); rem = m[3]; continue; }
|
|
134
|
+
|
|
135
|
+
// Underline <u>text</u>
|
|
136
|
+
m = rem.match(/^(.*?)<u>(.+?)<\/u>(.*)/si);
|
|
137
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { u: true })); rem = m[3]; continue; }
|
|
138
|
+
|
|
139
|
+
// Superscript <sup>text</sup>
|
|
140
|
+
m = rem.match(/^(.*?)<sup>(.+?)<\/sup>(.*)/si);
|
|
141
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { sup: true })); rem = m[3]; continue; }
|
|
142
|
+
|
|
143
|
+
// Subscript <sub>text</sub>
|
|
144
|
+
m = rem.match(/^(.*?)<sub>(.+?)<\/sub>(.*)/si);
|
|
145
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { sub: true })); rem = m[3]; continue; }
|
|
146
|
+
|
|
147
|
+
// Inline code
|
|
148
|
+
m = rem.match(/^(.*?)`(.+?)`(.*)/s);
|
|
149
|
+
if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { font: 'Courier New' })); rem = m[3]; continue; }
|
|
150
|
+
|
|
151
|
+
result.push(buildSpan(rem));
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return result.length > 0 ? result : [buildSpan(text)];
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function parseMdTable(lines: string[], startLine: number): { node: any; nextLine: number } | null {
|
|
159
|
+
const parse = (line: string) => line.split('|').map(c => c.trim()).filter((c, i, arr) => i > 0 || c !== '');
|
|
160
|
+
const headers = parse(lines[startLine]);
|
|
161
|
+
|
|
162
|
+
let cur = startLine + 2;
|
|
163
|
+
const rows: string[][] = [];
|
|
164
|
+
while (cur < lines.length) {
|
|
165
|
+
if (!lines[cur].includes('|')) break;
|
|
166
|
+
const cells = parse(lines[cur]);
|
|
167
|
+
if (cells.length === 0) break;
|
|
168
|
+
rows.push(cells);
|
|
169
|
+
cur++;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const allRows = [headers, ...rows];
|
|
173
|
+
const gridRows = allRows.map((row, ri) =>
|
|
174
|
+
buildRow(row.map(cell => buildCell([buildPara([buildSpan(cell, ri === 0 ? { b: true } : {})])]))),
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
return { node: buildGrid(gridRows), nextLine: cur };
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
registry.registerDecoder(new MdDecoder());
|