@jacobbubu/md-to-lark 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +171 -0
  3. package/dist/btt/build-tree.js +79 -0
  4. package/dist/btt/index.js +1 -0
  5. package/dist/btt/types.js +1 -0
  6. package/dist/cli/publish-md-to-lark.js +15 -0
  7. package/dist/commands/publish-md/args.js +224 -0
  8. package/dist/commands/publish-md/command.js +97 -0
  9. package/dist/commands/publish-md/index.js +1 -0
  10. package/dist/commands/publish-md/input-resolver.js +48 -0
  11. package/dist/commands/publish-md/mermaid-render.js +17 -0
  12. package/dist/commands/publish-md/pipeline-transform.js +4 -0
  13. package/dist/commands/publish-md/preset-loader.js +113 -0
  14. package/dist/commands/publish-md/presets/medium.js +7 -0
  15. package/dist/commands/publish-md/presets/zh-format.js +8 -0
  16. package/dist/commands/publish-md/title-policy.js +93 -0
  17. package/dist/index.js +1 -0
  18. package/dist/interop/btt-to-last.js +79 -0
  19. package/dist/interop/codec-btt-to-last.js +435 -0
  20. package/dist/interop/codec-last-to-btt.js +383 -0
  21. package/dist/interop/codec-shared.js +722 -0
  22. package/dist/interop/index.js +2 -0
  23. package/dist/interop/last-to-btt.js +17 -0
  24. package/dist/lark/block-types.js +42 -0
  25. package/dist/lark/client.js +36 -0
  26. package/dist/lark/docx/ops.js +596 -0
  27. package/dist/lark/docx/render-btt.js +156 -0
  28. package/dist/lark/docx/render-models.js +1 -0
  29. package/dist/lark/docx/render-payload.js +338 -0
  30. package/dist/lark/docx/render-post-process.js +98 -0
  31. package/dist/lark/docx/render-table.js +87 -0
  32. package/dist/lark/docx/render-types.js +7 -0
  33. package/dist/lark/index.js +2 -0
  34. package/dist/lark/types.js +1 -0
  35. package/dist/last/api.js +1687 -0
  36. package/dist/last/index.js +3 -0
  37. package/dist/last/preview-terminal.js +296 -0
  38. package/dist/last/textual-block-types.js +19 -0
  39. package/dist/last/to-markdown.js +303 -0
  40. package/dist/last/types.js +11 -0
  41. package/dist/pipeline/hast-to-last.js +946 -0
  42. package/dist/pipeline/index.js +3 -0
  43. package/dist/pipeline/markdown/md-to-hast.js +34 -0
  44. package/dist/pipeline/markdown/prepare-markdown.js +1049 -0
  45. package/dist/preview/index.js +1 -0
  46. package/dist/preview/markdown-terminal.js +350 -0
  47. package/dist/publish/asset-adapter.js +123 -0
  48. package/dist/publish/btt-patch.js +65 -0
  49. package/dist/publish/common.js +139 -0
  50. package/dist/publish/ids.js +9 -0
  51. package/dist/publish/index.js +7 -0
  52. package/dist/publish/last-normalize.js +327 -0
  53. package/dist/publish/process-file.js +228 -0
  54. package/dist/publish/runtime.js +133 -0
  55. package/dist/publish/stage-cache.js +56 -0
  56. package/dist/shared/rate-limiter.js +18 -0
  57. package/dist/shared/retry.js +141 -0
  58. package/package.json +78 -0
@@ -0,0 +1,946 @@
1
+ import { toString } from 'hast-util-to-string';
2
+ import { LAST_TEXTUAL_BLOCK_TYPE_SET } from '../last/textual-block-types.js';
3
+ const BLOCK_CONTAINER_TAGS = new Set([
4
+ 'article',
5
+ 'section',
6
+ 'main',
7
+ 'div',
8
+ 'aside',
9
+ 'header',
10
+ 'footer',
11
+ ]);
12
+ const DEFAULT_ALIGN = 'left';
13
+ function createContext() {
14
+ return {
15
+ blocks: {},
16
+ blockCounter: 1,
17
+ inlineCounter: 1,
18
+ };
19
+ }
20
+ function nextBlockId(ctx) {
21
+ const id = `b_${ctx.blockCounter}`;
22
+ ctx.blockCounter += 1;
23
+ return id;
24
+ }
25
+ function nextInlineId(ctx) {
26
+ const id = `i_${ctx.inlineCounter}`;
27
+ ctx.inlineCounter += 1;
28
+ return id;
29
+ }
30
+ function createDefaultMarks() {
31
+ return {
32
+ bold: false,
33
+ italic: false,
34
+ strikethrough: false,
35
+ underline: false,
36
+ inlineCode: false,
37
+ textColor: null,
38
+ backgroundColor: null,
39
+ link: null,
40
+ };
41
+ }
42
+ function cloneMarks(marks) {
43
+ const link = marks.link ? { url: marks.link.url } : null;
44
+ return {
45
+ ...marks,
46
+ link,
47
+ };
48
+ }
49
+ function createTextPayload(inlines, overrides) {
50
+ return {
51
+ style: {
52
+ align: DEFAULT_ALIGN,
53
+ language: null,
54
+ ...overrides,
55
+ },
56
+ inlines,
57
+ };
58
+ }
59
+ function addBlock(ctx, block) {
60
+ ctx.blocks[block.id] = block;
61
+ }
62
+ function isElement(node) {
63
+ return node.type === 'element';
64
+ }
65
+ function isText(node) {
66
+ return node.type === 'text';
67
+ }
68
+ function getChildren(node) {
69
+ return Array.isArray(node.children) ? node.children : [];
70
+ }
71
+ function getClassNames(element) {
72
+ const raw = element.properties?.className;
73
+ if (!raw)
74
+ return [];
75
+ if (Array.isArray(raw)) {
76
+ return raw.map(String);
77
+ }
78
+ return [String(raw)];
79
+ }
80
+ function getStringProp(element, key) {
81
+ const raw = element.properties?.[key];
82
+ if (raw == null)
83
+ return null;
84
+ return String(raw);
85
+ }
86
+ function getBooleanProp(element, key) {
87
+ const raw = element.properties?.[key];
88
+ if (typeof raw === 'boolean')
89
+ return raw;
90
+ if (typeof raw === 'string') {
91
+ if (raw === 'true')
92
+ return true;
93
+ if (raw === 'false')
94
+ return false;
95
+ }
96
+ return null;
97
+ }
98
+ function parseAlignValue(raw) {
99
+ if (!raw)
100
+ return undefined;
101
+ const value = raw.trim().toLowerCase();
102
+ if (value === 'left' || value === 'center' || value === 'right') {
103
+ return value;
104
+ }
105
+ return undefined;
106
+ }
107
+ function parseAlignFromStyle(rawStyle) {
108
+ if (!rawStyle)
109
+ return undefined;
110
+ const matched = /(?:^|;)\s*text-align\s*:\s*(left|center|right)\s*(?:;|$)/i.exec(rawStyle);
111
+ return parseAlignValue(matched?.[1] ?? null);
112
+ }
113
+ function extractTableCellAlign(cell) {
114
+ if (!cell)
115
+ return undefined;
116
+ const fromAlignAttr = parseAlignValue(getStringProp(cell, 'align'));
117
+ if (fromAlignAttr !== undefined)
118
+ return fromAlignAttr;
119
+ return parseAlignFromStyle(getStringProp(cell, 'style'));
120
+ }
121
+ function appendChild(parent, childId) {
122
+ parent.children.push(childId);
123
+ }
124
+ function createTextualBlock(ctx, type, parentId, inlines, styleOverrides) {
125
+ const blockId = nextBlockId(ctx);
126
+ const block = {
127
+ id: blockId,
128
+ type,
129
+ parentId,
130
+ children: [],
131
+ payload: createTextPayload(inlines, styleOverrides),
132
+ };
133
+ addBlock(ctx, block);
134
+ return blockId;
135
+ }
136
+ function createDividerBlock(ctx, parentId) {
137
+ const blockId = nextBlockId(ctx);
138
+ addBlock(ctx, {
139
+ id: blockId,
140
+ type: 'divider',
141
+ parentId,
142
+ children: [],
143
+ payload: {},
144
+ });
145
+ return blockId;
146
+ }
147
+ function createImageBlock(ctx, parentId, sourceUrl) {
148
+ const blockId = nextBlockId(ctx);
149
+ const blockBase = {
150
+ id: blockId,
151
+ type: 'image',
152
+ parentId,
153
+ children: [],
154
+ payload: {
155
+ width: 0,
156
+ height: 0,
157
+ token: '',
158
+ align: 'left',
159
+ },
160
+ };
161
+ if (sourceUrl) {
162
+ blockBase.selector = { attrs: { sourceUrl } };
163
+ }
164
+ addBlock(ctx, blockBase);
165
+ return blockId;
166
+ }
167
+ function createIframeBlock(ctx, parentId, url, iframeType) {
168
+ const blockId = nextBlockId(ctx);
169
+ addBlock(ctx, {
170
+ id: blockId,
171
+ type: 'iframe',
172
+ parentId,
173
+ children: [],
174
+ payload: {
175
+ component: {
176
+ url,
177
+ iframeType,
178
+ },
179
+ },
180
+ });
181
+ return blockId;
182
+ }
183
+ function createTableBlock(ctx, parentId) {
184
+ const blockId = nextBlockId(ctx);
185
+ const block = {
186
+ id: blockId,
187
+ type: 'table',
188
+ parentId,
189
+ children: [],
190
+ payload: {
191
+ cells: [],
192
+ rowSize: 0,
193
+ columnSize: 0,
194
+ columnWidth: [],
195
+ headerColumn: false,
196
+ headerRow: false,
197
+ mergeInfo: [],
198
+ },
199
+ };
200
+ addBlock(ctx, block);
201
+ return blockId;
202
+ }
203
+ function createTableCellBlock(ctx, parentId) {
204
+ const blockId = nextBlockId(ctx);
205
+ addBlock(ctx, {
206
+ id: blockId,
207
+ type: 'table_cell',
208
+ parentId,
209
+ children: [],
210
+ payload: {},
211
+ });
212
+ return blockId;
213
+ }
214
+ function mergeAdjacentTextRuns(inlines) {
215
+ const merged = [];
216
+ for (const inline of inlines) {
217
+ const prev = merged.at(-1);
218
+ if (prev &&
219
+ inline.kind === 'text_run' &&
220
+ prev.kind === 'text_run' &&
221
+ JSON.stringify(prev.marks) === JSON.stringify(inline.marks)) {
222
+ prev.text = (prev.text ?? '') + (inline.text ?? '');
223
+ continue;
224
+ }
225
+ merged.push(inline);
226
+ }
227
+ return merged;
228
+ }
229
+ function hasClassName(element, expected) {
230
+ return getClassNames(element).includes(expected);
231
+ }
232
+ function isMathInlineCodeElement(element) {
233
+ if (element.tagName !== 'code')
234
+ return false;
235
+ return hasClassName(element, 'math-inline');
236
+ }
237
+ function isMathDisplayCodeElement(element) {
238
+ if (element.tagName !== 'code')
239
+ return false;
240
+ return hasClassName(element, 'math-display');
241
+ }
242
+ function parseInlineNodes(ctx, nodes, marks = createDefaultMarks()) {
243
+ const result = [];
244
+ for (const node of nodes) {
245
+ if (isText(node)) {
246
+ if (node.value.length === 0) {
247
+ continue;
248
+ }
249
+ result.push({
250
+ id: nextInlineId(ctx),
251
+ kind: 'text_run',
252
+ marks: cloneMarks(marks),
253
+ text: node.value,
254
+ });
255
+ continue;
256
+ }
257
+ if (!isElement(node)) {
258
+ continue;
259
+ }
260
+ if (node.tagName === 'br') {
261
+ result.push({
262
+ id: nextInlineId(ctx),
263
+ kind: 'text_run',
264
+ marks: cloneMarks(marks),
265
+ text: '\n',
266
+ });
267
+ continue;
268
+ }
269
+ if (node.tagName === 'input' && getStringProp(node, 'type') === 'checkbox') {
270
+ continue;
271
+ }
272
+ if (node.tagName === 'code') {
273
+ if (isMathInlineCodeElement(node)) {
274
+ const formula = trimBoundaryNewlines(toString(node));
275
+ if (formula.length > 0) {
276
+ result.push({
277
+ id: nextInlineId(ctx),
278
+ kind: 'equation',
279
+ marks: cloneMarks(marks),
280
+ latex: formula,
281
+ });
282
+ }
283
+ continue;
284
+ }
285
+ const nextMarks = cloneMarks(marks);
286
+ nextMarks.inlineCode = true;
287
+ result.push({
288
+ id: nextInlineId(ctx),
289
+ kind: 'text_run',
290
+ marks: nextMarks,
291
+ text: toString(node),
292
+ });
293
+ continue;
294
+ }
295
+ if (node.tagName === 'img') {
296
+ const alt = getStringProp(node, 'alt') ?? '';
297
+ if (alt.length > 0) {
298
+ result.push({
299
+ id: nextInlineId(ctx),
300
+ kind: 'text_run',
301
+ marks: cloneMarks(marks),
302
+ text: alt,
303
+ });
304
+ }
305
+ continue;
306
+ }
307
+ const nextMarks = cloneMarks(marks);
308
+ if (node.tagName === 'strong' || node.tagName === 'b') {
309
+ nextMarks.bold = true;
310
+ }
311
+ if (node.tagName === 'em' || node.tagName === 'i') {
312
+ nextMarks.italic = true;
313
+ }
314
+ if (node.tagName === 'del' || node.tagName === 's' || node.tagName === 'strike') {
315
+ nextMarks.strikethrough = true;
316
+ }
317
+ if (node.tagName === 'u') {
318
+ nextMarks.underline = true;
319
+ }
320
+ if (node.tagName === 'a') {
321
+ const href = getStringProp(node, 'href');
322
+ nextMarks.link = href ? { url: href } : null;
323
+ }
324
+ result.push(...parseInlineNodes(ctx, getChildren(node), nextMarks));
325
+ }
326
+ return mergeAdjacentTextRuns(result);
327
+ }
328
+ function isWhitespaceTextNode(node) {
329
+ return isText(node) && node.value.trim().length === 0;
330
+ }
331
+ function getMeaningfulChildren(nodes) {
332
+ return nodes.filter((child) => !isWhitespaceTextNode(child));
333
+ }
334
+ function findStandaloneImageSrcInParagraph(paragraph) {
335
+ const meaningfulChildren = getMeaningfulChildren(getChildren(paragraph));
336
+ if (meaningfulChildren.length !== 1)
337
+ return null;
338
+ const only = meaningfulChildren[0];
339
+ if (!only || !isElement(only) || only.tagName !== 'img')
340
+ return null;
341
+ return getStringProp(only, 'src');
342
+ }
343
+ function parseHttpUrl(url) {
344
+ try {
345
+ const parsed = new URL(url);
346
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
347
+ return undefined;
348
+ }
349
+ return parsed;
350
+ }
351
+ catch {
352
+ return undefined;
353
+ }
354
+ }
355
+ function safeDecodeURIComponent(input) {
356
+ try {
357
+ return decodeURIComponent(input);
358
+ }
359
+ catch {
360
+ return input;
361
+ }
362
+ }
363
+ function hostEqualsOrEndsWith(host, target) {
364
+ return host === target || host.endsWith(`.${target}`);
365
+ }
366
+ function resolveIframeTypeByUrl(rawUrl) {
367
+ const normalized = rawUrl.trim();
368
+ if (!normalized)
369
+ return undefined;
370
+ const maybeDecoded = /^https?%3a%2f%2f/i.test(normalized) ? safeDecodeURIComponent(normalized) : normalized;
371
+ const parsed = parseHttpUrl(maybeDecoded);
372
+ if (!parsed)
373
+ return undefined;
374
+ const host = parsed.hostname.toLowerCase();
375
+ if (hostEqualsOrEndsWith(host, 'bilibili.com') || hostEqualsOrEndsWith(host, 'b23.tv')) {
376
+ return 'bilibili';
377
+ }
378
+ if (hostEqualsOrEndsWith(host, 'douyin.com')) {
379
+ return 'xigua';
380
+ }
381
+ if (hostEqualsOrEndsWith(host, 'youku.com')) {
382
+ return 'youku';
383
+ }
384
+ if (hostEqualsOrEndsWith(host, 'airtable.com')) {
385
+ return 'airtable';
386
+ }
387
+ if (hostEqualsOrEndsWith(host, 'map.baidu.com')) {
388
+ return 'baidu_map';
389
+ }
390
+ if (hostEqualsOrEndsWith(host, 'amap.com')) {
391
+ return 'gaode_map';
392
+ }
393
+ if (hostEqualsOrEndsWith(host, 'figma.com')) {
394
+ return 'figma';
395
+ }
396
+ if (hostEqualsOrEndsWith(host, 'modao.cc')) {
397
+ return 'modao';
398
+ }
399
+ if (hostEqualsOrEndsWith(host, 'canva.cn') || hostEqualsOrEndsWith(host, 'canva.com')) {
400
+ return 'canva';
401
+ }
402
+ if (hostEqualsOrEndsWith(host, 'codepen.io')) {
403
+ return 'codepen';
404
+ }
405
+ if (hostEqualsOrEndsWith(host, 'wenjuan.feishu.cn')) {
406
+ return 'feishu_wenjuan';
407
+ }
408
+ if (hostEqualsOrEndsWith(host, 'jinshuju.net') || hostEqualsOrEndsWith(host, 'jinshuju.com')) {
409
+ return 'jinshuju';
410
+ }
411
+ return undefined;
412
+ }
413
+ function findStandaloneIframePayloadInParagraph(paragraph) {
414
+ const meaningfulChildren = getMeaningfulChildren(getChildren(paragraph));
415
+ if (meaningfulChildren.length !== 1)
416
+ return null;
417
+ const only = meaningfulChildren[0];
418
+ if (!only || !isElement(only) || only.tagName !== 'a')
419
+ return null;
420
+ const href = getStringProp(only, 'href');
421
+ if (!href)
422
+ return null;
423
+ const iframeType = resolveIframeTypeByUrl(href);
424
+ if (!iframeType)
425
+ return null;
426
+ const maybeDecoded = /^https?%3a%2f%2f/i.test(href.trim()) ? safeDecodeURIComponent(href.trim()) : href.trim();
427
+ const parsed = parseHttpUrl(maybeDecoded);
428
+ if (!parsed)
429
+ return null;
430
+ return {
431
+ url: parsed.toString(),
432
+ iframeType,
433
+ };
434
+ }
435
+ function findStandaloneRichItemInTableCell(cell) {
436
+ let meaningfulChildren = getMeaningfulChildren(getChildren(cell));
437
+ if (meaningfulChildren.length !== 1)
438
+ return null;
439
+ let only = meaningfulChildren[0];
440
+ if (only && isElement(only) && (only.tagName === 'p' || only.tagName === 'div')) {
441
+ const nested = getMeaningfulChildren(getChildren(only));
442
+ if (nested.length !== 1)
443
+ return null;
444
+ only = nested[0];
445
+ }
446
+ if (!only || !isElement(only))
447
+ return null;
448
+ if (only.tagName === 'img') {
449
+ return {
450
+ kind: 'image',
451
+ sourceUrl: getStringProp(only, 'src'),
452
+ };
453
+ }
454
+ if (only.tagName !== 'a')
455
+ return null;
456
+ const href = getStringProp(only, 'href');
457
+ if (!href)
458
+ return null;
459
+ const iframeType = resolveIframeTypeByUrl(href);
460
+ if (!iframeType)
461
+ return null;
462
+ const maybeDecoded = /^https?%3a%2f%2f/i.test(href.trim()) ? safeDecodeURIComponent(href.trim()) : href.trim();
463
+ const parsed = parseHttpUrl(maybeDecoded);
464
+ if (!parsed)
465
+ return null;
466
+ return {
467
+ kind: 'iframe',
468
+ url: parsed.toString(),
469
+ iframeType,
470
+ };
471
+ }
472
+ function parseHeadingType(tagName) {
473
+ const level = Number(tagName.slice(1));
474
+ if (!Number.isInteger(level) || level < 1)
475
+ return 'heading1';
476
+ if (level > 9)
477
+ return 'heading9';
478
+ return `heading${level}`;
479
+ }
480
+ function isTaskListItem(li) {
481
+ return getClassNames(li).includes('task-list-item');
482
+ }
483
+ function splitListItemContent(li) {
484
+ const contentNodes = [];
485
+ const nestedBlocks = [];
486
+ let checked = false;
487
+ let consumedLeadParagraph = false;
488
+ for (const child of getChildren(li)) {
489
+ if (isWhitespaceTextNode(child)) {
490
+ continue;
491
+ }
492
+ const childElement = isElement(child) ? child : null;
493
+ if (childElement && childElement.tagName === 'input' && getStringProp(childElement, 'type') === 'checkbox') {
494
+ checked = getBooleanProp(childElement, 'checked') ?? false;
495
+ continue;
496
+ }
497
+ if (childElement && (childElement.tagName === 'ul' || childElement.tagName === 'ol')) {
498
+ nestedBlocks.push(childElement);
499
+ continue;
500
+ }
501
+ if (childElement && (childElement.tagName === 'p' || childElement.tagName === 'div')) {
502
+ if (!consumedLeadParagraph && contentNodes.length === 0) {
503
+ contentNodes.push(...getChildren(childElement));
504
+ consumedLeadParagraph = true;
505
+ }
506
+ else {
507
+ nestedBlocks.push(childElement);
508
+ }
509
+ continue;
510
+ }
511
+ if (childElement &&
512
+ ['table', 'pre', 'blockquote', 'hr', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(childElement.tagName)) {
513
+ nestedBlocks.push(childElement);
514
+ continue;
515
+ }
516
+ contentNodes.push(child);
517
+ }
518
+ return { contentNodes, nestedBlocks, checked };
519
+ }
520
+ function convertList(ctx, list, parentId, kind) {
521
+ const ids = [];
522
+ for (const child of getChildren(list)) {
523
+ if (!isElement(child) || child.tagName !== 'li') {
524
+ continue;
525
+ }
526
+ const taskItem = isTaskListItem(child);
527
+ const { contentNodes, nestedBlocks, checked } = splitListItemContent(child);
528
+ const blockType = taskItem ? 'todo' : kind;
529
+ const blockId = createTextualBlock(ctx, blockType, parentId, parseInlineNodes(ctx, contentNodes), taskItem ? { done: checked } : undefined);
530
+ ids.push(blockId);
531
+ for (const nested of nestedBlocks) {
532
+ const childIds = convertBlock(ctx, nested, blockId);
533
+ const block = ctx.blocks[blockId];
534
+ if (block) {
535
+ for (const childId of childIds) {
536
+ appendChild(block, childId);
537
+ }
538
+ }
539
+ }
540
+ }
541
+ return ids;
542
+ }
543
+ function findLanguageFromCodeClass(codeElement) {
544
+ for (const className of getClassNames(codeElement)) {
545
+ if (className.startsWith('language-')) {
546
+ return className.slice('language-'.length) || null;
547
+ }
548
+ }
549
+ return null;
550
+ }
551
+ function convertPre(ctx, pre, parentId) {
552
+ const codeElement = getChildren(pre).find((node) => isElement(node) && node.tagName === 'code');
553
+ if (codeElement && isMathDisplayCodeElement(codeElement)) {
554
+ const formula = trimBoundaryNewlines(toString(codeElement));
555
+ const inlines = formula.length
556
+ ? [
557
+ {
558
+ id: nextInlineId(ctx),
559
+ kind: 'equation',
560
+ marks: createDefaultMarks(),
561
+ latex: formula,
562
+ },
563
+ ]
564
+ : [];
565
+ return [createTextualBlock(ctx, 'text', parentId, inlines)];
566
+ }
567
+ const sourceRaw = codeElement ? toString(codeElement) : toString(pre);
568
+ const source = trimSingleTrailingNewline(sourceRaw);
569
+ const language = codeElement ? findLanguageFromCodeClass(codeElement) : null;
570
+ const inlines = source.length
571
+ ? [
572
+ {
573
+ id: nextInlineId(ctx),
574
+ kind: 'text_run',
575
+ marks: createDefaultMarks(),
576
+ text: source,
577
+ },
578
+ ]
579
+ : [];
580
+ const codeId = createTextualBlock(ctx, 'code', parentId, inlines, {
581
+ language,
582
+ wrap: false,
583
+ });
584
+ return [codeId];
585
+ }
586
+ function trimSingleTrailingNewline(value) {
587
+ return value.replace(/\r?\n$/, '');
588
+ }
589
+ function extractTableRows(table) {
590
+ const rows = [];
591
+ for (const child of getChildren(table)) {
592
+ if (!isElement(child))
593
+ continue;
594
+ if (child.tagName === 'tr') {
595
+ rows.push(child);
596
+ continue;
597
+ }
598
+ if (child.tagName === 'thead' || child.tagName === 'tbody' || child.tagName === 'tfoot') {
599
+ for (const row of getChildren(child)) {
600
+ if (isElement(row) && row.tagName === 'tr') {
601
+ rows.push(row);
602
+ }
603
+ }
604
+ }
605
+ }
606
+ return rows;
607
+ }
608
+ function extractRowCells(row) {
609
+ const cells = [];
610
+ for (const child of getChildren(row)) {
611
+ if (isElement(child) && (child.tagName === 'th' || child.tagName === 'td')) {
612
+ cells.push(child);
613
+ }
614
+ }
615
+ return cells;
616
+ }
617
+ function convertTable(ctx, table, parentId) {
618
+ const tableId = createTableBlock(ctx, parentId);
619
+ const rows = extractTableRows(table);
620
+ const rowSize = rows.length;
621
+ const columnSize = rows.reduce((max, row) => Math.max(max, extractRowCells(row).length), 0);
622
+ const rowCellMatrix = rows.map((row) => extractRowCells(row));
623
+ const columnAlign = Array.from({ length: columnSize }, () => undefined);
624
+ for (let c = 0; c < columnSize; c += 1) {
625
+ for (let r = 0; r < rowSize; r += 1) {
626
+ const align = extractTableCellAlign(rowCellMatrix[r]?.[c]);
627
+ if (align === undefined)
628
+ continue;
629
+ columnAlign[c] = align;
630
+ break;
631
+ }
632
+ }
633
+ const hasExplicitColumnAlign = columnAlign.some((value) => value !== undefined);
634
+ const cells = [];
635
+ for (let r = 0; r < rowSize; r += 1) {
636
+ const rowCells = rowCellMatrix[r] ?? [];
637
+ for (let c = 0; c < columnSize; c += 1) {
638
+ const cell = rowCells[c];
639
+ const cellId = createTableCellBlock(ctx, tableId);
640
+ const cellBlock = ctx.blocks[cellId];
641
+ const richItem = cell ? findStandaloneRichItemInTableCell(cell) : null;
642
+ if (cellBlock?.type === 'table_cell' && richItem?.kind === 'image') {
643
+ const imageId = createImageBlock(ctx, cellId, richItem.sourceUrl);
644
+ cellBlock.children = [imageId];
645
+ cells.push(cellId);
646
+ continue;
647
+ }
648
+ if (cellBlock?.type === 'table_cell' && richItem?.kind === 'iframe') {
649
+ const iframeId = createIframeBlock(ctx, cellId, richItem.url, richItem.iframeType);
650
+ cellBlock.children = [iframeId];
651
+ cells.push(cellId);
652
+ continue;
653
+ }
654
+ const inlines = cell ? parseInlineNodes(ctx, getChildren(cell)) : [];
655
+ const declaredAlign = columnAlign[c];
656
+ const textId = createTextualBlock(ctx, 'text', cellId, inlines, declaredAlign ? { align: declaredAlign } : undefined);
657
+ if (cellBlock?.type === 'table_cell') {
658
+ cellBlock.children = [textId];
659
+ }
660
+ cells.push(cellId);
661
+ }
662
+ }
663
+ const tableBlock = ctx.blocks[tableId];
664
+ if (tableBlock?.type === 'table') {
665
+ tableBlock.children = [...cells];
666
+ tableBlock.payload = {
667
+ cells,
668
+ rowSize,
669
+ columnSize,
670
+ columnWidth: Array.from({ length: columnSize }, () => 240),
671
+ ...(hasExplicitColumnAlign ? { columnAlign } : {}),
672
+ headerColumn: false,
673
+ headerRow: getChildren(table).some((node) => isElement(node) && node.tagName === 'thead'),
674
+ mergeInfo: [],
675
+ };
676
+ }
677
+ return [tableId];
678
+ }
679
+ function convertBlockquote(ctx, blockquote, parentId) {
680
+ const quoteText = trimBoundaryNewlines(toString(blockquote));
681
+ const inlines = quoteText.length
682
+ ? [
683
+ {
684
+ id: nextInlineId(ctx),
685
+ kind: 'text_run',
686
+ marks: createDefaultMarks(),
687
+ text: quoteText,
688
+ },
689
+ ]
690
+ : [];
691
+ return [createTextualBlock(ctx, 'quote', parentId, inlines)];
692
+ }
693
+ function trimBoundaryNewlines(value) {
694
+ return value.replace(/^(?:\r?\n)+/, '').replace(/(?:\r?\n)+$/, '');
695
+ }
696
+ function convertUnknownElement(ctx, element, parentId) {
697
+ if (BLOCK_CONTAINER_TAGS.has(element.tagName)) {
698
+ const ids = [];
699
+ for (const child of getChildren(element)) {
700
+ ids.push(...convertBlock(ctx, child, parentId));
701
+ }
702
+ return ids;
703
+ }
704
+ const text = trimBoundaryNewlines(toString(element));
705
+ if (text.trim().length === 0) {
706
+ return [];
707
+ }
708
+ return [
709
+ createTextualBlock(ctx, 'text', parentId, [
710
+ {
711
+ id: nextInlineId(ctx),
712
+ kind: 'text_run',
713
+ marks: createDefaultMarks(),
714
+ text,
715
+ },
716
+ ]),
717
+ ];
718
+ }
719
+ function convertBlock(ctx, node, parentId) {
720
+ if (isWhitespaceTextNode(node)) {
721
+ return [];
722
+ }
723
+ if (isText(node)) {
724
+ return [
725
+ createTextualBlock(ctx, 'text', parentId, [
726
+ {
727
+ id: nextInlineId(ctx),
728
+ kind: 'text_run',
729
+ marks: createDefaultMarks(),
730
+ text: node.value,
731
+ },
732
+ ]),
733
+ ];
734
+ }
735
+ if (!isElement(node)) {
736
+ return [];
737
+ }
738
+ switch (node.tagName) {
739
+ case 'p': {
740
+ const standaloneImageSrc = findStandaloneImageSrcInParagraph(node);
741
+ if (standaloneImageSrc) {
742
+ return [createImageBlock(ctx, parentId, standaloneImageSrc)];
743
+ }
744
+ const standaloneIframe = findStandaloneIframePayloadInParagraph(node);
745
+ if (standaloneIframe) {
746
+ return [createIframeBlock(ctx, parentId, standaloneIframe.url, standaloneIframe.iframeType)];
747
+ }
748
+ return [createTextualBlock(ctx, 'text', parentId, parseInlineNodes(ctx, getChildren(node)))];
749
+ }
750
+ case 'h1':
751
+ case 'h2':
752
+ case 'h3':
753
+ case 'h4':
754
+ case 'h5':
755
+ case 'h6':
756
+ case 'h7':
757
+ case 'h8':
758
+ case 'h9':
759
+ return [
760
+ createTextualBlock(ctx, parseHeadingType(node.tagName), parentId, parseInlineNodes(ctx, getChildren(node))),
761
+ ];
762
+ case 'ul':
763
+ return convertList(ctx, node, parentId, 'bullet');
764
+ case 'ol':
765
+ return convertList(ctx, node, parentId, 'ordered');
766
+ case 'pre':
767
+ return convertPre(ctx, node, parentId);
768
+ case 'blockquote':
769
+ return convertBlockquote(ctx, node, parentId);
770
+ case 'hr':
771
+ return [createDividerBlock(ctx, parentId)];
772
+ case 'table':
773
+ return convertTable(ctx, node, parentId);
774
+ case 'img':
775
+ return [createImageBlock(ctx, parentId, getStringProp(node, 'src'))];
776
+ case 'br':
777
+ return [
778
+ createTextualBlock(ctx, 'text', parentId, [
779
+ {
780
+ id: nextInlineId(ctx),
781
+ kind: 'text_run',
782
+ marks: createDefaultMarks(),
783
+ text: '\n',
784
+ },
785
+ ]),
786
+ ];
787
+ default:
788
+ return convertUnknownElement(ctx, node, parentId);
789
+ }
790
+ }
791
+ function isTextualBlockNode(block) {
792
+ return LAST_TEXTUAL_BLOCK_TYPE_SET.has(block.type);
793
+ }
794
+ function toSearchText(inline) {
795
+ switch (inline.kind) {
796
+ case 'text_run':
797
+ return { text: inline.text ?? '', editable: true };
798
+ case 'mention_user':
799
+ return { text: inline.userId ?? '', editable: false };
800
+ case 'equation':
801
+ return { text: inline.latex ?? '', editable: false };
802
+ case 'mention_doc':
803
+ return { text: inline.title ?? '', editable: false };
804
+ case 'reminder':
805
+ return { text: '', editable: false };
806
+ case 'inline_block':
807
+ return { text: '', editable: false };
808
+ case 'inline_file':
809
+ return { text: '', editable: false };
810
+ case 'link_preview':
811
+ return { text: inline.title ?? inline.url ?? '', editable: false };
812
+ default:
813
+ return { text: '', editable: false };
814
+ }
815
+ }
816
+ function buildScopeForTopLevelTextBlock(scopeId, block) {
817
+ let normalizedText = '';
818
+ const segments = [];
819
+ for (const inline of block.payload.inlines) {
820
+ const projection = toSearchText(inline);
821
+ if (projection.text.length === 0) {
822
+ continue;
823
+ }
824
+ const from = normalizedText.length;
825
+ normalizedText += projection.text;
826
+ const to = normalizedText.length;
827
+ segments.push({
828
+ inlineId: inline.id,
829
+ inlineKind: inline.kind,
830
+ from,
831
+ to,
832
+ editable: projection.editable,
833
+ });
834
+ }
835
+ return {
836
+ id: scopeId,
837
+ blockId: block.id,
838
+ blockType: block.type,
839
+ normalizedText,
840
+ segments,
841
+ };
842
+ }
843
+ function getTopLevelBlockIds(doc) {
844
+ if ('topLevel' in doc) {
845
+ return [...doc.topLevel];
846
+ }
847
+ const root = doc.blocks[doc.rootId];
848
+ return root ? [...root.children] : [];
849
+ }
850
+ function buildIndexes(doc) {
851
+ const byType = {};
852
+ for (const block of Object.values(doc.blocks)) {
853
+ const entries = byType[block.type] ?? [];
854
+ entries.push(block.id);
855
+ byType[block.type] = entries;
856
+ }
857
+ const textScopes = {};
858
+ const textScopeByBlockId = {};
859
+ let scopeCounter = 1;
860
+ for (const childId of getTopLevelBlockIds(doc)) {
861
+ const block = doc.blocks[childId];
862
+ if (!block || !isTextualBlockNode(block) || block.type === 'page') {
863
+ continue;
864
+ }
865
+ const scopeId = `scope_${scopeCounter}`;
866
+ scopeCounter += 1;
867
+ const scope = buildScopeForTopLevelTextBlock(scopeId, block);
868
+ textScopes[scopeId] = scope;
869
+ textScopeByBlockId[block.id] = scopeId;
870
+ }
871
+ return {
872
+ byType,
873
+ textScopes,
874
+ textScopeByBlockId,
875
+ };
876
+ }
877
+ function normalizeDocumentId(value) {
878
+ if (!value || value.trim().length === 0) {
879
+ return 'doc_1';
880
+ }
881
+ const trimmed = value.trim();
882
+ return (trimmed.startsWith('doc_') ? trimmed : `doc_${trimmed}`);
883
+ }
884
+ function deepCloneBlock(value) {
885
+ return JSON.parse(JSON.stringify(value));
886
+ }
887
+ export function hastToLAST(hast, options) {
888
+ const ctx = createContext();
889
+ const mode = options?.mode ?? 'fragment';
890
+ const rootId = createTextualBlock(ctx, 'page', null, []);
891
+ const root = ctx.blocks[rootId];
892
+ if (!root || root.type !== 'page') {
893
+ throw new Error('Failed to initialize LAST root page block.');
894
+ }
895
+ for (const child of getChildren(hast)) {
896
+ const childIds = convertBlock(ctx, child, rootId);
897
+ for (const childId of childIds) {
898
+ appendChild(root, childId);
899
+ }
900
+ }
901
+ if (mode === 'document') {
902
+ const doc = {
903
+ schema: 'LAST',
904
+ version: '1.0.0',
905
+ id: normalizeDocumentId(options?.documentId),
906
+ rootId,
907
+ blocks: ctx.blocks,
908
+ indexes: {
909
+ byType: {},
910
+ textScopes: {},
911
+ textScopeByBlockId: {},
912
+ },
913
+ };
914
+ doc.indexes = buildIndexes(doc);
915
+ return doc;
916
+ }
917
+ const topLevel = [...root.children];
918
+ const fragmentBlocks = {};
919
+ for (const [id, block] of Object.entries(ctx.blocks)) {
920
+ if (id === rootId) {
921
+ continue;
922
+ }
923
+ fragmentBlocks[id] = deepCloneBlock(block);
924
+ }
925
+ for (const blockId of topLevel) {
926
+ const block = fragmentBlocks[blockId];
927
+ if (block) {
928
+ block.parentId = null;
929
+ }
930
+ }
931
+ const fragment = {
932
+ schema: 'LAST',
933
+ version: '1.0.0',
934
+ id: normalizeDocumentId(options?.documentId),
935
+ mode: 'fragment',
936
+ topLevel,
937
+ blocks: fragmentBlocks,
938
+ indexes: {
939
+ byType: {},
940
+ textScopes: {},
941
+ textScopeByBlockId: {},
942
+ },
943
+ };
944
+ fragment.indexes = buildIndexes(fragment);
945
+ return fragment;
946
+ }