@jackuait/blok 0.10.0-beta.13 → 0.10.0-beta.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,382 @@
1
+ import { createIdGenerator } from './id-generator';
2
+ import { mapToNearestPresetName } from '../../../components/utils/color-mapping';
3
+ import type { OutputBlockData } from './types';
4
+
5
+ /**
6
+ * Walk the wrapper's top-level children and convert each block-level HTML
7
+ * element into one or more Blok JSON blocks.
8
+ */
9
+ export function buildBlocks(wrapper: HTMLElement): OutputBlockData[] {
10
+ const nextId = createIdGenerator();
11
+ const blocks: OutputBlockData[] = [];
12
+
13
+ for (const node of Array.from(wrapper.childNodes)) {
14
+ convertNode(node, blocks, nextId);
15
+ }
16
+
17
+ return blocks;
18
+ }
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Converters
22
+ // ---------------------------------------------------------------------------
23
+
24
+ function convertNode(
25
+ node: Node,
26
+ blocks: OutputBlockData[],
27
+ nextId: (prefix: string) => string
28
+ ): void {
29
+ if (node.nodeType === Node.TEXT_NODE) {
30
+ const text = node.textContent?.trim() ?? '';
31
+
32
+ if (text) {
33
+ blocks.push({ id: nextId('paragraph'), type: 'paragraph', data: { text } });
34
+ }
35
+
36
+ return;
37
+ }
38
+
39
+ if (node.nodeType !== Node.ELEMENT_NODE) {
40
+ return;
41
+ }
42
+
43
+ const el = node as HTMLElement;
44
+ const tag = el.tagName;
45
+
46
+ if (tag === 'P') {
47
+ blocks.push({ id: nextId('paragraph'), type: 'paragraph', data: { text: el.innerHTML } });
48
+
49
+ return;
50
+ }
51
+
52
+ if (/^H[1-6]$/.test(tag)) {
53
+ const level = Number(tag[1]);
54
+
55
+ blocks.push({ id: nextId('header'), type: 'header', data: { text: el.innerHTML, level } });
56
+
57
+ return;
58
+ }
59
+
60
+ if (tag === 'BLOCKQUOTE') {
61
+ blocks.push({
62
+ id: nextId('quote'),
63
+ type: 'quote',
64
+ data: { text: el.innerHTML, size: 'default' },
65
+ });
66
+
67
+ return;
68
+ }
69
+
70
+ if (tag === 'PRE') {
71
+ blocks.push({
72
+ id: nextId('code'),
73
+ type: 'code',
74
+ data: { code: el.textContent ?? '', language: 'plain-text' },
75
+ });
76
+
77
+ return;
78
+ }
79
+
80
+ if (tag === 'HR') {
81
+ blocks.push({ id: nextId('divider'), type: 'divider', data: {} });
82
+
83
+ return;
84
+ }
85
+
86
+ if (tag === 'IMG') {
87
+ const src = el.getAttribute('src') ?? '';
88
+ const widthStyle = parseCssProperty(el, 'width');
89
+ let width: number | null = null;
90
+
91
+ if (widthStyle) {
92
+ const parsed = parseInt(widthStyle, 10);
93
+
94
+ if (!isNaN(parsed)) {
95
+ width = parsed;
96
+ }
97
+ }
98
+
99
+ blocks.push({
100
+ id: nextId('image'),
101
+ type: 'image',
102
+ data: { url: src },
103
+ stretched: null,
104
+ key: null,
105
+ width,
106
+ });
107
+
108
+ return;
109
+ }
110
+
111
+ if (tag === 'DETAILS') {
112
+ const summary = el.querySelector('summary');
113
+ const text = summary ? summary.innerHTML : el.innerHTML;
114
+
115
+ blocks.push({ id: nextId('toggle'), type: 'toggle', data: { text } });
116
+
117
+ return;
118
+ }
119
+
120
+ if (tag === 'UL' || tag === 'OL') {
121
+ flattenList(el, tag === 'OL' ? 'ordered' : 'unordered', 0, blocks, nextId);
122
+
123
+ return;
124
+ }
125
+
126
+ if (tag === 'TABLE') {
127
+ convertTable(el, blocks, nextId);
128
+
129
+ return;
130
+ }
131
+
132
+ if (tag === 'ASIDE') {
133
+ convertCallout(el, blocks, nextId);
134
+
135
+ return;
136
+ }
137
+
138
+ // Unknown block element: extract innerHTML as paragraph
139
+ blocks.push({
140
+ id: nextId('paragraph'),
141
+ type: 'paragraph',
142
+ data: { text: el.innerHTML },
143
+ });
144
+ }
145
+
146
+ // ---------------------------------------------------------------------------
147
+ // List flattening
148
+ // ---------------------------------------------------------------------------
149
+
150
+ function flattenList(
151
+ listEl: HTMLElement,
152
+ style: 'ordered' | 'unordered',
153
+ depth: number,
154
+ blocks: OutputBlockData[],
155
+ nextId: (prefix: string) => string
156
+ ): void {
157
+ const startAttr = listEl.getAttribute('start');
158
+ const startValue = startAttr ? Number(startAttr) : null;
159
+ let isFirstItem = true;
160
+
161
+ for (const child of Array.from(listEl.children)) {
162
+ if (child.tagName !== 'LI') {
163
+ continue;
164
+ }
165
+
166
+ // Clone the li so we can remove nested lists without mutating DOM
167
+ const clone = child.cloneNode(true) as HTMLElement;
168
+ const nestedLists: HTMLElement[] = [];
169
+
170
+ for (const nested of Array.from(clone.querySelectorAll('ul, ol'))) {
171
+ nestedLists.push(nested.cloneNode(true) as HTMLElement);
172
+ nested.remove();
173
+ }
174
+
175
+ const text = clone.innerHTML.trim();
176
+
177
+ // Use aria-level if present (1-based → 0-based), otherwise use nesting depth
178
+ const ariaLevel = (child as HTMLElement).getAttribute('aria-level');
179
+ let itemDepth = depth;
180
+
181
+ if (ariaLevel) {
182
+ itemDepth = Math.max(0, parseInt(ariaLevel, 10) - 1);
183
+ }
184
+
185
+ blocks.push({
186
+ id: nextId('list'),
187
+ type: 'list',
188
+ data: {
189
+ text,
190
+ style,
191
+ depth: itemDepth === 0 ? null : itemDepth,
192
+ checked: null,
193
+ start: isFirstItem && startValue !== null ? startValue : null,
194
+ },
195
+ });
196
+
197
+ isFirstItem = false;
198
+
199
+ // Recursively process nested lists
200
+ for (const nested of nestedLists) {
201
+ const nestedStyle = nested.tagName === 'OL' ? 'ordered' : 'unordered';
202
+
203
+ flattenList(nested, nestedStyle, depth + 1, blocks, nextId);
204
+ }
205
+ }
206
+ }
207
+
208
+ // ---------------------------------------------------------------------------
209
+ // Table conversion
210
+ // ---------------------------------------------------------------------------
211
+
212
+ function convertTable(
213
+ tableEl: HTMLElement,
214
+ blocks: OutputBlockData[],
215
+ nextId: (prefix: string) => string
216
+ ): void {
217
+ const tableId = nextId('table');
218
+ const rows = Array.from(tableEl.querySelectorAll('tr'));
219
+
220
+ let withHeadings = false;
221
+ const content: Record<string, unknown>[][] = [];
222
+
223
+ for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
224
+ const row = rows[rowIdx];
225
+ const cells = Array.from(row.querySelectorAll('td, th'));
226
+
227
+ if (rowIdx === 0 && cells.some((c) => c.tagName === 'TH')) {
228
+ withHeadings = true;
229
+ }
230
+
231
+ const rowData: Record<string, unknown>[] = [];
232
+
233
+ for (const cell of cells) {
234
+ const cellEl = cell as HTMLElement;
235
+ const cellText = cellEl.innerHTML.trim();
236
+
237
+ if (cellText) {
238
+ const childId = nextId('paragraph');
239
+
240
+ blocks.push({
241
+ id: childId,
242
+ type: 'paragraph',
243
+ parent: tableId,
244
+ data: { text: cellText },
245
+ });
246
+
247
+ // Parse cell colors
248
+ const bgColor = parseCssProperty(cellEl, 'background-color');
249
+ const textColor = parseCssProperty(cellEl, 'color');
250
+
251
+ rowData.push({
252
+ blocks: [childId],
253
+ color: bgColor ? mapToNearestPresetName(bgColor, 'bg') : null,
254
+ textColor: textColor ? mapToNearestPresetName(textColor, 'text') : null,
255
+ });
256
+ } else {
257
+ rowData.push({ blocks: [], color: null, textColor: null });
258
+ }
259
+ }
260
+
261
+ content.push(rowData);
262
+ }
263
+
264
+ // Parse column widths from first row cells
265
+ const firstRowCells = rows[0] ? Array.from(rows[0].querySelectorAll('td, th')) : [];
266
+ const colWidths = firstRowCells.map((cell) => {
267
+ const width = parseCssProperty(cell as HTMLElement, 'width');
268
+
269
+ if (width) {
270
+ const px = parseInt(width, 10);
271
+
272
+ return isNaN(px) ? null : px;
273
+ }
274
+
275
+ return null;
276
+ });
277
+ const hasWidths = colWidths.some((w) => w !== null);
278
+
279
+ // Insert table block before its child paragraph blocks
280
+ const tableBlock: OutputBlockData = {
281
+ id: tableId,
282
+ type: 'table',
283
+ data: {
284
+ withHeadings,
285
+ withHeadingColumn: false,
286
+ content,
287
+ ...(hasWidths ? { colWidths } : {}),
288
+ },
289
+ };
290
+
291
+ // Find first child block index to insert table before its children
292
+ const firstChildIdx = blocks.findIndex((b) => b.parent === tableId);
293
+
294
+ if (firstChildIdx >= 0) {
295
+ blocks.splice(firstChildIdx, 0, tableBlock);
296
+ } else {
297
+ blocks.push(tableBlock);
298
+ }
299
+ }
300
+
301
+ // ---------------------------------------------------------------------------
302
+ // Callout conversion
303
+ // ---------------------------------------------------------------------------
304
+
305
+ function convertCallout(
306
+ asideEl: HTMLElement,
307
+ blocks: OutputBlockData[],
308
+ nextId: (prefix: string) => string
309
+ ): void {
310
+ const calloutId = nextId('callout');
311
+ const bgColor = parseCssProperty(asideEl, 'background-color');
312
+ const backgroundColor = bgColor ? mapToNearestPresetName(bgColor, 'bg') : null;
313
+
314
+ const childIds: string[] = [];
315
+
316
+ for (const child of Array.from(asideEl.childNodes)) {
317
+ if (child.nodeType === Node.ELEMENT_NODE) {
318
+ const childEl = child as HTMLElement;
319
+ const childId = nextId('paragraph');
320
+
321
+ blocks.push({
322
+ id: childId,
323
+ type: 'paragraph',
324
+ parent: calloutId,
325
+ data: { text: childEl.innerHTML },
326
+ });
327
+
328
+ childIds.push(childId);
329
+ } else if (child.nodeType === Node.TEXT_NODE) {
330
+ const text = child.textContent?.trim() ?? '';
331
+
332
+ if (text) {
333
+ const childId = nextId('paragraph');
334
+
335
+ blocks.push({
336
+ id: childId,
337
+ type: 'paragraph',
338
+ parent: calloutId,
339
+ data: { text },
340
+ });
341
+
342
+ childIds.push(childId);
343
+ }
344
+ }
345
+ }
346
+
347
+ // Insert callout block before its children
348
+ const firstChildIdx = blocks.findIndex((b) => b.parent === calloutId);
349
+
350
+ const calloutBlock: OutputBlockData = {
351
+ id: calloutId,
352
+ type: 'callout',
353
+ data: {
354
+ emoji: '\u{1F4A1}',
355
+ backgroundColor: backgroundColor ?? 'gray',
356
+ },
357
+ content: childIds,
358
+ };
359
+
360
+ if (firstChildIdx >= 0) {
361
+ blocks.splice(firstChildIdx, 0, calloutBlock);
362
+ } else {
363
+ blocks.push(calloutBlock);
364
+ }
365
+ }
366
+
367
+ // ---------------------------------------------------------------------------
368
+ // Helpers
369
+ // ---------------------------------------------------------------------------
370
+
371
+ function parseCssProperty(el: HTMLElement, property: string): string | null {
372
+ const style = el.getAttribute('style');
373
+
374
+ if (!style) {
375
+ return null;
376
+ }
377
+
378
+ const regex = new RegExp(`(?<![\\-a-z])${property}\\s*:\\s*([^;]+)`);
379
+ const match = regex.exec(style);
380
+
381
+ return match ? match[1].trim() : null;
382
+ }
@@ -0,0 +1,11 @@
1
+ export function createIdGenerator(): (prefix: string) => string {
2
+ const counters = new Map<string, number>();
3
+
4
+ return (prefix: string): string => {
5
+ const count = (counters.get(prefix) ?? 0) + 1;
6
+
7
+ counters.set(prefix, count);
8
+
9
+ return `${prefix}-${count}`;
10
+ };
11
+ }
@@ -0,0 +1,21 @@
1
+ import { preprocess } from './preprocessor';
2
+ import { sanitize } from './sanitizer';
3
+ import { buildBlocks } from './block-builder';
4
+ import type { OutputData } from './types';
5
+
6
+ /**
7
+ * Convert HTML to Blok JSON.
8
+ * Runs: preprocess → sanitize → build blocks → serialize.
9
+ */
10
+ export function convertHtml(html: string): string {
11
+ const dom = new DOMParser().parseFromString(html, 'text/html');
12
+ const wrapper = dom.body;
13
+
14
+ preprocess(wrapper);
15
+ sanitize(wrapper);
16
+
17
+ const blocks = buildBlocks(wrapper);
18
+ const output: OutputData = { version: '2.31.0', blocks };
19
+
20
+ return JSON.stringify(output);
21
+ }