hwpkit-dev 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,180 @@
1
+ import type { Decoder } from '../../contract/decoder';
2
+ import type { DocRoot, ContentNode, ParaNode, SpanNode, ImgNode } from '../../model/doc-tree';
3
+ import type { Outcome } from '../../contract/result';
4
+ import type { ParaProps, TextProps } from '../../model/doc-props';
5
+ import { A4 } from '../../model/doc-props';
6
+ import { succeed, fail } from '../../contract/result';
7
+ import { buildRoot, buildSheet, buildPara, buildSpan, buildImg, buildGrid, buildRow, buildCell } from '../../model/builders';
8
+ import { ShieldedParser } from '../../safety/ShieldedParser';
9
+ import { TextKit } from '../../toolkit/TextKit';
10
+ import { registry } from '../../pipeline/registry';
11
+
12
+ export class MdDecoder implements Decoder {
13
+ readonly format = 'md';
14
+
15
+ async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
16
+ const shield = new ShieldedParser();
17
+ const warns: string[] = [];
18
+
19
+ try {
20
+ const text = TextKit.decode(data);
21
+ const lines = text.split(/\r?\n/);
22
+ const kids: ContentNode[] = [];
23
+
24
+ let i = 0;
25
+ while (i < lines.length) {
26
+ const line = lines[i];
27
+
28
+ // Heading
29
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
30
+ if (headingMatch) {
31
+ const level = headingMatch[1].length as 1 | 2 | 3 | 4 | 5 | 6;
32
+ kids.push(buildPara([buildSpan(headingMatch[2], { b: level <= 2 })], { heading: level }));
33
+ i++; continue;
34
+ }
35
+
36
+ // Table (pipe + separator line)
37
+ if (line.includes('|') && i + 1 < lines.length && lines[i + 1].match(/^\s*\|?\s*[-:]+\s*\|/)) {
38
+ const tableResult = shield.guard(() => parseMdTable(lines, i), null, `md:table@${i}`);
39
+ if (tableResult) { kids.push(tableResult.node); i = tableResult.nextLine; continue; }
40
+ }
41
+
42
+ // HR
43
+ if (line.match(/^[-*_]{3,}$/)) { kids.push(buildPara([buildSpan('')], {})); i++; continue; }
44
+
45
+ // List item
46
+ const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+)$/);
47
+ if (listMatch) {
48
+ kids.push(buildPara(parseInline(listMatch[3]), {
49
+ listLv: Math.floor(listMatch[1].length / 2),
50
+ listOrd: /\d+\./.test(listMatch[2]),
51
+ }));
52
+ i++; continue;
53
+ }
54
+
55
+ // Blockquote
56
+ const bqMatch = line.match(/^>\s*(.*)$/);
57
+ if (bqMatch) { kids.push(buildPara([buildSpan(bqMatch[1])], { indentPt: 28 })); i++; continue; }
58
+
59
+ // Code block
60
+ if (line.startsWith('```')) {
61
+ const codeLines: string[] = [];
62
+ i++;
63
+ while (i < lines.length && !lines[i].startsWith('```')) { codeLines.push(lines[i]); i++; }
64
+ i++;
65
+ kids.push(buildPara([buildSpan(codeLines.join('\n'), { font: 'Courier New' })], {}));
66
+ continue;
67
+ }
68
+
69
+ // Empty line
70
+ if (line.trim() === '') { i++; continue; }
71
+
72
+ // Regular paragraph — check for alignment div
73
+ const alignMatch = line.match(/^<div\s+align="(center|right|left)">(.*?)<\/div>$/i);
74
+ if (alignMatch) {
75
+ const align = alignMatch[1].toLowerCase() as 'left' | 'center' | 'right';
76
+ kids.push(buildPara(parseInline(alignMatch[2]), { align }));
77
+ i++; continue;
78
+ }
79
+
80
+ // Regular paragraph
81
+ kids.push(buildPara(parseInline(line), {}));
82
+ i++;
83
+ }
84
+
85
+ warns.push(...shield.flush());
86
+ const sheet = buildSheet(kids.length > 0 ? kids : [buildPara([buildSpan('')])], A4);
87
+ return succeed(buildRoot({}, [sheet]), warns);
88
+ } catch (e: any) {
89
+ warns.push(...shield.flush());
90
+ return fail(`MD decode error: ${e?.message ?? String(e)}`, warns);
91
+ }
92
+ }
93
+ }
94
+
95
+ function parseInline(text: string): (SpanNode | ImgNode)[] {
96
+ const result: (SpanNode | ImgNode)[] = [];
97
+ let rem = text;
98
+
99
+ while (rem.length > 0) {
100
+ // Image: ![alt](data:mime;base64,...)
101
+ let m = rem.match(/^(.*?)!\[([^\]]*)\]\((data:([^;]+);base64,([^)]+))\)(.*)/s);
102
+ if (m) {
103
+ if (m[1]) result.push(buildSpan(m[1]));
104
+ const mime = m[4] as ImgNode['mime'];
105
+ const validMimes = ['image/png', 'image/jpeg', 'image/gif', 'image/bmp'];
106
+ result.push(buildImg(m[5], validMimes.includes(mime) ? mime : 'image/png', 100, 100, m[2] || undefined));
107
+ rem = m[6]; continue;
108
+ }
109
+
110
+ // Image: ![alt](url) — non-base64
111
+ m = rem.match(/^(.*?)!\[([^\]]*)\]\(([^)]+)\)(.*)/s);
112
+ if (m) {
113
+ if (m[1]) result.push(buildSpan(m[1]));
114
+ // Can't convert URL to base64, just preserve alt text
115
+ result.push(buildSpan(`[이미지: ${m[2] || m[3]}]`));
116
+ rem = m[4]; continue;
117
+ }
118
+
119
+ // Bold+italic
120
+ m = rem.match(/^(.*?)\*\*\*(.+?)\*\*\*(.*)/s);
121
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { b: true, i: true })); rem = m[3]; continue; }
122
+
123
+ // Bold
124
+ m = rem.match(/^(.*?)\*\*(.+?)\*\*(.*)/s);
125
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { b: true })); rem = m[3]; continue; }
126
+
127
+ // Italic
128
+ m = rem.match(/^(.*?)\*(.+?)\*(.*)/s);
129
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { i: true })); rem = m[3]; continue; }
130
+
131
+ // Strikethrough ~~text~~
132
+ m = rem.match(/^(.*?)~~(.+?)~~(.*)/s);
133
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { s: true })); rem = m[3]; continue; }
134
+
135
+ // Underline <u>text</u>
136
+ m = rem.match(/^(.*?)<u>(.+?)<\/u>(.*)/si);
137
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { u: true })); rem = m[3]; continue; }
138
+
139
+ // Superscript <sup>text</sup>
140
+ m = rem.match(/^(.*?)<sup>(.+?)<\/sup>(.*)/si);
141
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { sup: true })); rem = m[3]; continue; }
142
+
143
+ // Subscript <sub>text</sub>
144
+ m = rem.match(/^(.*?)<sub>(.+?)<\/sub>(.*)/si);
145
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { sub: true })); rem = m[3]; continue; }
146
+
147
+ // Inline code
148
+ m = rem.match(/^(.*?)`(.+?)`(.*)/s);
149
+ if (m) { if (m[1]) result.push(buildSpan(m[1])); result.push(buildSpan(m[2], { font: 'Courier New' })); rem = m[3]; continue; }
150
+
151
+ result.push(buildSpan(rem));
152
+ break;
153
+ }
154
+
155
+ return result.length > 0 ? result : [buildSpan(text)];
156
+ }
157
+
158
+ function parseMdTable(lines: string[], startLine: number): { node: any; nextLine: number } | null {
159
+ const parse = (line: string) => line.split('|').map(c => c.trim()).filter((c, i, arr) => i > 0 || c !== '');
160
+ const headers = parse(lines[startLine]);
161
+
162
+ let cur = startLine + 2;
163
+ const rows: string[][] = [];
164
+ while (cur < lines.length) {
165
+ if (!lines[cur].includes('|')) break;
166
+ const cells = parse(lines[cur]);
167
+ if (cells.length === 0) break;
168
+ rows.push(cells);
169
+ cur++;
170
+ }
171
+
172
+ const allRows = [headers, ...rows];
173
+ const gridRows = allRows.map((row, ri) =>
174
+ buildRow(row.map(cell => buildCell([buildPara([buildSpan(cell, ri === 0 ? { b: true } : {})])]))),
175
+ );
176
+
177
+ return { node: buildGrid(gridRows), nextLine: cur };
178
+ }
179
+
180
+ registry.registerDecoder(new MdDecoder());