@moxn/kb-migrate 0.4.2 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -23,7 +23,7 @@ export async function blocksToSections(blocks, client, pagePathMap, options) {
|
|
|
23
23
|
if (block.type === 'heading_2') {
|
|
24
24
|
// Flush current section if it has content
|
|
25
25
|
if (currentBlocks.length > 0) {
|
|
26
|
-
sections.push({ name: currentSectionName, content: currentBlocks });
|
|
26
|
+
sections.push({ name: currentSectionName, content: mergeConsecutiveListBlocks(currentBlocks) });
|
|
27
27
|
}
|
|
28
28
|
const h2 = block;
|
|
29
29
|
currentSectionName = richTextToPlain(h2.heading_2.rich_text) || 'Untitled';
|
|
@@ -36,7 +36,7 @@ export async function blocksToSections(blocks, client, pagePathMap, options) {
|
|
|
36
36
|
}
|
|
37
37
|
// Flush last section
|
|
38
38
|
if (currentBlocks.length > 0) {
|
|
39
|
-
sections.push({ name: currentSectionName, content: currentBlocks });
|
|
39
|
+
sections.push({ name: currentSectionName, content: mergeConsecutiveListBlocks(currentBlocks) });
|
|
40
40
|
}
|
|
41
41
|
// If no sections were created at all (empty page), return empty array
|
|
42
42
|
return sections;
|
|
@@ -74,10 +74,10 @@ async function convertBlock(block, client, pagePathMap, visitedSyncedBlocks) {
|
|
|
74
74
|
results.push(...convertToDo(block));
|
|
75
75
|
break;
|
|
76
76
|
case 'quote':
|
|
77
|
-
results.push(...convertQuote(block));
|
|
77
|
+
results.push(...(await convertQuote(block, client, pagePathMap, visitedSyncedBlocks)));
|
|
78
78
|
break;
|
|
79
79
|
case 'callout':
|
|
80
|
-
results.push(...convertCallout(block));
|
|
80
|
+
results.push(...(await convertCallout(block, client, pagePathMap, visitedSyncedBlocks)));
|
|
81
81
|
break;
|
|
82
82
|
case 'divider':
|
|
83
83
|
results.push(textBlock('---'));
|
|
@@ -140,25 +140,15 @@ async function convertBlock(block, client, pagePathMap, visitedSyncedBlocks) {
|
|
|
140
140
|
console.warn(` Skipping unsupported Notion block type: ${block.type}`);
|
|
141
141
|
break;
|
|
142
142
|
}
|
|
143
|
-
// If block has children (except
|
|
143
|
+
// If block has children (except types that handle their own)
|
|
144
144
|
if (block.has_children &&
|
|
145
|
-
!['table', 'toggle', 'synced_block', 'column_list', 'column'].includes(block.type)) {
|
|
145
|
+
!['table', 'toggle', 'synced_block', 'column_list', 'column', 'quote', 'callout'].includes(block.type)) {
|
|
146
|
+
const indent = ['bulleted_list_item', 'numbered_list_item', 'to_do'].includes(block.type)
|
|
147
|
+
? ' '
|
|
148
|
+
: undefined;
|
|
146
149
|
const children = await client.getBlockChildren(block.id);
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
// Indent child content for list items
|
|
150
|
-
if (['bulleted_list_item', 'numbered_list_item'].includes(block.type)) {
|
|
151
|
-
for (const cb of childBlocks) {
|
|
152
|
-
if (cb.blockType === 'text' && cb.text) {
|
|
153
|
-
cb.text = cb.text
|
|
154
|
-
.split('\n')
|
|
155
|
-
.map((line) => ' ' + line)
|
|
156
|
-
.join('\n');
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
results.push(...childBlocks);
|
|
161
|
-
}
|
|
150
|
+
const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, indent);
|
|
151
|
+
results.push(...childBlocks);
|
|
162
152
|
}
|
|
163
153
|
return results;
|
|
164
154
|
}
|
|
@@ -203,18 +193,26 @@ function convertToDo(block) {
|
|
|
203
193
|
const text = richTextToMarkdown(td.to_do.rich_text);
|
|
204
194
|
return [textBlock(`${checkbox} ${text}`)];
|
|
205
195
|
}
|
|
206
|
-
function convertQuote(block) {
|
|
196
|
+
async function convertQuote(block, client, pagePathMap, visitedSyncedBlocks) {
|
|
207
197
|
const q = block;
|
|
208
198
|
const text = richTextToMarkdown(q.quote.rich_text);
|
|
209
|
-
if (!text)
|
|
199
|
+
if (!text && !block.has_children)
|
|
210
200
|
return [];
|
|
211
201
|
const quoted = text
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
202
|
+
? text
|
|
203
|
+
.split('\n')
|
|
204
|
+
.map((line) => '> ' + line)
|
|
205
|
+
.join('\n')
|
|
206
|
+
: '';
|
|
207
|
+
const results = quoted ? [textBlock(quoted)] : [];
|
|
208
|
+
if (block.has_children) {
|
|
209
|
+
const children = await client.getBlockChildren(block.id);
|
|
210
|
+
const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, '> ');
|
|
211
|
+
results.push(...childBlocks);
|
|
212
|
+
}
|
|
213
|
+
return results;
|
|
216
214
|
}
|
|
217
|
-
function convertCallout(block) {
|
|
215
|
+
async function convertCallout(block, client, pagePathMap, visitedSyncedBlocks) {
|
|
218
216
|
const c = block;
|
|
219
217
|
const text = richTextToMarkdown(c.callout.rich_text);
|
|
220
218
|
const emoji = c.callout.icon?.emoji ?? '';
|
|
@@ -223,7 +221,13 @@ function convertCallout(block) {
|
|
|
223
221
|
.split('\n')
|
|
224
222
|
.map((line) => '> ' + line)
|
|
225
223
|
.join('\n');
|
|
226
|
-
|
|
224
|
+
const results = [textBlock(quoted)];
|
|
225
|
+
if (block.has_children) {
|
|
226
|
+
const children = await client.getBlockChildren(block.id);
|
|
227
|
+
const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, '> ');
|
|
228
|
+
results.push(...childBlocks);
|
|
229
|
+
}
|
|
230
|
+
return results;
|
|
227
231
|
}
|
|
228
232
|
async function convertTable(block, client) {
|
|
229
233
|
const tableBlock = block;
|
|
@@ -255,19 +259,8 @@ async function convertToggle(block, client, pagePathMap, visitedSyncedBlocks) {
|
|
|
255
259
|
const results = [textBlock(`**${header}**`)];
|
|
256
260
|
if (block.has_children) {
|
|
257
261
|
const children = await client.getBlockChildren(block.id);
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
// Indent toggle content
|
|
261
|
-
for (const cb of converted) {
|
|
262
|
-
if (cb.blockType === 'text' && cb.text) {
|
|
263
|
-
cb.text = cb.text
|
|
264
|
-
.split('\n')
|
|
265
|
-
.map((line) => '> ' + line)
|
|
266
|
-
.join('\n');
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
results.push(...converted);
|
|
270
|
-
}
|
|
262
|
+
const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, '> ');
|
|
263
|
+
results.push(...childBlocks);
|
|
271
264
|
}
|
|
272
265
|
return results;
|
|
273
266
|
}
|
|
@@ -372,12 +365,7 @@ async function convertSyncedBlock(block, client, pagePathMap, visitedSyncedBlock
|
|
|
372
365
|
visitedSyncedBlocks.add(sourceId);
|
|
373
366
|
try {
|
|
374
367
|
const children = await client.getBlockChildren(sourceId);
|
|
375
|
-
|
|
376
|
-
for (const child of children) {
|
|
377
|
-
const converted = await convertBlock(child, client, pagePathMap, visitedSyncedBlocks);
|
|
378
|
-
results.push(...converted);
|
|
379
|
-
}
|
|
380
|
-
return results;
|
|
368
|
+
return await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks);
|
|
381
369
|
}
|
|
382
370
|
finally {
|
|
383
371
|
visitedSyncedBlocks.delete(sourceId);
|
|
@@ -389,10 +377,8 @@ async function convertColumnList(block, client, pagePathMap, visitedSyncedBlocks
|
|
|
389
377
|
for (const column of children) {
|
|
390
378
|
if (column.type === 'column' && column.has_children) {
|
|
391
379
|
const columnChildren = await client.getBlockChildren(column.id);
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
results.push(...converted);
|
|
395
|
-
}
|
|
380
|
+
const converted = await convertAndMergeChildren(columnChildren, client, pagePathMap, visitedSyncedBlocks);
|
|
381
|
+
results.push(...converted);
|
|
396
382
|
}
|
|
397
383
|
}
|
|
398
384
|
return results;
|
|
@@ -429,12 +415,9 @@ export function richTextToMarkdown(richText) {
|
|
|
429
415
|
text = '*' + text + '*';
|
|
430
416
|
if (rt.annotations.strikethrough)
|
|
431
417
|
text = '~~' + text + '~~';
|
|
432
|
-
//
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
}
|
|
436
|
-
else if (rt.type === 'mention' && rt.mention) {
|
|
437
|
-
// Handle mention types
|
|
418
|
+
// Handle mentions BEFORE href — Notion sets href on mentions too
|
|
419
|
+
// (as a web URL), but we want notion:// protocol for resolution
|
|
420
|
+
if (rt.type === 'mention' && rt.mention) {
|
|
438
421
|
if (rt.mention.type === 'user' && rt.mention.user?.name) {
|
|
439
422
|
text = `@${rt.mention.user.name}`;
|
|
440
423
|
}
|
|
@@ -450,6 +433,9 @@ export function richTextToMarkdown(richText) {
|
|
|
450
433
|
text += ` → ${rt.mention.date.end}`;
|
|
451
434
|
}
|
|
452
435
|
}
|
|
436
|
+
else if (rt.href) {
|
|
437
|
+
text = `[${text}](${rt.href})`;
|
|
438
|
+
}
|
|
453
439
|
else if (rt.type === 'equation' && rt.equation) {
|
|
454
440
|
text = `$${rt.equation.expression}$`;
|
|
455
441
|
}
|
|
@@ -469,6 +455,96 @@ export function richTextToPlain(richText) {
|
|
|
469
455
|
function textBlock(text) {
|
|
470
456
|
return { blockType: 'text', text };
|
|
471
457
|
}
|
|
458
|
+
function detectListType(text) {
|
|
459
|
+
const firstLine = text.split('\n')[0];
|
|
460
|
+
if (/^\d+\.\s/.test(firstLine))
|
|
461
|
+
return 'ordered';
|
|
462
|
+
if (/^- \[[ x]\]\s/.test(firstLine))
|
|
463
|
+
return 'todo';
|
|
464
|
+
if (/^- /.test(firstLine))
|
|
465
|
+
return 'bullet';
|
|
466
|
+
return null;
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Merge consecutive list-item text blocks into single text blocks.
|
|
470
|
+
* This ensures the markdown→TipTap parser sees them as one list
|
|
471
|
+
* instead of creating multiple single-item lists.
|
|
472
|
+
*/
|
|
473
|
+
function mergeConsecutiveListBlocks(blocks) {
|
|
474
|
+
const result = [];
|
|
475
|
+
let accumulator = [];
|
|
476
|
+
let currentListType = null;
|
|
477
|
+
function flush() {
|
|
478
|
+
if (accumulator.length === 0)
|
|
479
|
+
return;
|
|
480
|
+
let merged = accumulator.join('\n');
|
|
481
|
+
if (currentListType === 'ordered') {
|
|
482
|
+
// Fix numbering: replace all leading `1.` with sequential numbers
|
|
483
|
+
let counter = 0;
|
|
484
|
+
merged = merged
|
|
485
|
+
.split('\n')
|
|
486
|
+
.map((line) => {
|
|
487
|
+
if (/^\d+\.\s/.test(line)) {
|
|
488
|
+
counter++;
|
|
489
|
+
return line.replace(/^\d+\./, `${counter}.`);
|
|
490
|
+
}
|
|
491
|
+
return line;
|
|
492
|
+
})
|
|
493
|
+
.join('\n');
|
|
494
|
+
}
|
|
495
|
+
result.push(textBlock(merged));
|
|
496
|
+
accumulator = [];
|
|
497
|
+
currentListType = null;
|
|
498
|
+
}
|
|
499
|
+
for (const block of blocks) {
|
|
500
|
+
if (block.blockType !== 'text' || !block.text) {
|
|
501
|
+
flush();
|
|
502
|
+
result.push(block);
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
505
|
+
const type = detectListType(block.text);
|
|
506
|
+
// Indented child content (starts with spaces) continues current list group
|
|
507
|
+
if (currentListType &&
|
|
508
|
+
block.text.split('\n').every((line) => line.startsWith(' '))) {
|
|
509
|
+
accumulator.push(block.text);
|
|
510
|
+
continue;
|
|
511
|
+
}
|
|
512
|
+
if (type === null) {
|
|
513
|
+
flush();
|
|
514
|
+
result.push(block);
|
|
515
|
+
continue;
|
|
516
|
+
}
|
|
517
|
+
if (type !== currentListType) {
|
|
518
|
+
flush();
|
|
519
|
+
currentListType = type;
|
|
520
|
+
}
|
|
521
|
+
accumulator.push(block.text);
|
|
522
|
+
}
|
|
523
|
+
flush();
|
|
524
|
+
return result;
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* Convert children blocks, merge consecutive list items, and optionally indent.
|
|
528
|
+
*/
|
|
529
|
+
async function convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, indent) {
|
|
530
|
+
const blocks = [];
|
|
531
|
+
for (const child of children) {
|
|
532
|
+
const converted = await convertBlock(child, client, pagePathMap, visitedSyncedBlocks);
|
|
533
|
+
blocks.push(...converted);
|
|
534
|
+
}
|
|
535
|
+
const merged = mergeConsecutiveListBlocks(blocks);
|
|
536
|
+
if (indent) {
|
|
537
|
+
for (const block of merged) {
|
|
538
|
+
if (block.blockType === 'text' && block.text) {
|
|
539
|
+
block.text = block.text
|
|
540
|
+
.split('\n')
|
|
541
|
+
.map((line) => indent + line)
|
|
542
|
+
.join('\n');
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
return merged;
|
|
547
|
+
}
|
|
472
548
|
function guessImageMediaType(url) {
|
|
473
549
|
const lower = url.toLowerCase();
|
|
474
550
|
if (lower.includes('.jpg') || lower.includes('.jpeg'))
|
|
@@ -45,8 +45,8 @@ describe('richTextToMarkdown', () => {
|
|
|
45
45
|
const result = richTextToMarkdown([rt]);
|
|
46
46
|
expect(result).toBe('[My Database](notion://abc123de-f456-abc1-23de-f456abc123de)');
|
|
47
47
|
});
|
|
48
|
-
it('renders database mention with href using
|
|
49
|
-
//
|
|
48
|
+
it('renders database mention with href using notion:// (mention takes precedence over href)', () => {
|
|
49
|
+
// Mentions should always use notion:// protocol, even when href is set
|
|
50
50
|
const rt = {
|
|
51
51
|
type: 'mention',
|
|
52
52
|
plain_text: 'My Database',
|
|
@@ -65,8 +65,7 @@ describe('richTextToMarkdown', () => {
|
|
|
65
65
|
},
|
|
66
66
|
};
|
|
67
67
|
const result = richTextToMarkdown([rt]);
|
|
68
|
-
|
|
69
|
-
expect(result).toBe('[My Database](https://www.notion.so/workspace/abc123de-f456-abc1-23de-f456abc123de)');
|
|
68
|
+
expect(result).toBe('[My Database](notion://abc123de-f456-abc1-23de-f456abc123de)');
|
|
70
69
|
});
|
|
71
70
|
});
|
|
72
71
|
describe('page mentions', () => {
|
|
@@ -91,7 +90,7 @@ describe('richTextToMarkdown', () => {
|
|
|
91
90
|
const result = richTextToMarkdown([rt]);
|
|
92
91
|
expect(result).toBe('[My Page](notion://abc123de-f456-abc1-23de-f456abc123de)');
|
|
93
92
|
});
|
|
94
|
-
it('renders page mention with href using
|
|
93
|
+
it('renders page mention with href using notion:// (mention takes precedence over href)', () => {
|
|
95
94
|
const rt = {
|
|
96
95
|
type: 'mention',
|
|
97
96
|
plain_text: 'My Page',
|
|
@@ -110,7 +109,7 @@ describe('richTextToMarkdown', () => {
|
|
|
110
109
|
},
|
|
111
110
|
};
|
|
112
111
|
const result = richTextToMarkdown([rt]);
|
|
113
|
-
expect(result).toBe('[My Page](
|
|
112
|
+
expect(result).toBe('[My Page](notion://abc123de-f456-abc1-23de-f456abc123de)');
|
|
114
113
|
});
|
|
115
114
|
});
|
|
116
115
|
describe('other mention types preserved', () => {
|
|
@@ -18,6 +18,9 @@ const NOTION_PROTOCOL_RE = /\[([^\]]*)\]\(notion:\/\/([a-f0-9-]{32,36})\)/g;
|
|
|
18
18
|
const NOTION_WEB_URL_RE = /\[([^\]]*)\]\(https?:\/\/(?:www\.)?notion\.so\/(?:[a-zA-Z0-9_-]+\/)*([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})(?:[?#][^)]*)?(?:\))/g;
|
|
19
19
|
// Fallback for IDs without dashes embedded in slug URLs (e.g., ...Page-Title-abc123def456...)
|
|
20
20
|
const NOTION_WEB_URL_SLUG_RE = /\[([^\]]*)\]\(https?:\/\/(?:www\.)?notion\.so\/(?:[a-zA-Z0-9_-]+\/)*[a-zA-Z0-9-]+-([a-f0-9]{32})(?:[?#][^)]*)?\)/g;
|
|
21
|
+
// Bare 32-char hex on notion.so (e.g., https://www.notion.so/3004bf42cfeb818ebe8fe2f4befdad52)
|
|
22
|
+
// Catches mention hrefs that leak through as web URLs
|
|
23
|
+
const NOTION_WEB_URL_BARE_RE = /\[([^\]]*)\]\(https?:\/\/(?:www\.)?notion\.so\/(?:[a-zA-Z0-9_-]+\/)*([a-f0-9]{32})(?:[?#][^)]*)?\)/g;
|
|
21
24
|
// 3. Unresolved placeholders from link_to_page conversion
|
|
22
25
|
const LINK_PLACEHOLDER_RE = /\*\(Link to Notion page: ([a-f0-9-]{32,36})\)\*/g;
|
|
23
26
|
// 4. Relation property markers
|
|
@@ -87,6 +90,21 @@ export function resolveNotionReferences(sections, mapping) {
|
|
|
87
90
|
}
|
|
88
91
|
return `[${displayText}](notion://${nid})`;
|
|
89
92
|
});
|
|
93
|
+
// Pass 3b: Bare 32-char hex Notion URLs (safety net for mention hrefs)
|
|
94
|
+
text = text.replace(NOTION_WEB_URL_BARE_RE, (_match, displayText, rawId) => {
|
|
95
|
+
const nid = normalizeId(rawId);
|
|
96
|
+
const kbPath = mapping.notionIdToKbPath.get(nid);
|
|
97
|
+
if (kbPath) {
|
|
98
|
+
references.push({
|
|
99
|
+
sectionIndex,
|
|
100
|
+
targetNotionId: nid,
|
|
101
|
+
targetKbPath: kbPath,
|
|
102
|
+
displayText: displayText || kbPath,
|
|
103
|
+
});
|
|
104
|
+
return `[${displayText || kbPath}](${kbPath})`;
|
|
105
|
+
}
|
|
106
|
+
return `[${displayText}](notion://${nid})`;
|
|
107
|
+
});
|
|
90
108
|
// Pass 4: Unresolved link_to_page placeholders
|
|
91
109
|
text = text.replace(LINK_PLACEHOLDER_RE, (_match, rawId) => {
|
|
92
110
|
const nid = normalizeId(rawId);
|
|
@@ -120,6 +120,24 @@ describe('resolveNotionReferences', () => {
|
|
|
120
120
|
const { sections: resolved } = resolveNotionReferences(sections, m);
|
|
121
121
|
expect(getText(resolved[0])).toBe('See [link](/docs/my-page)');
|
|
122
122
|
});
|
|
123
|
+
it('resolves bare 32-char hex Notion URLs (no slug prefix)', () => {
|
|
124
|
+
const sections = [
|
|
125
|
+
textSection('Intro', 'See [Dev Setup](https://www.notion.so/3004bf42cfeb818ebe8fe2f4befdad52)'),
|
|
126
|
+
];
|
|
127
|
+
const m = mapping({ '3004bf42cfeb818ebe8fe2f4befdad52': '/docs/dev-setup' });
|
|
128
|
+
const { sections: resolved, references } = resolveNotionReferences(sections, m);
|
|
129
|
+
expect(getText(resolved[0])).toBe('See [Dev Setup](/docs/dev-setup)');
|
|
130
|
+
expect(references).toHaveLength(1);
|
|
131
|
+
expect(references[0].targetNotionId).toBe('3004bf42cfeb818ebe8fe2f4befdad52');
|
|
132
|
+
});
|
|
133
|
+
it('normalizes unresolved bare hex Notion URLs to notion://', () => {
|
|
134
|
+
const sections = [
|
|
135
|
+
textSection('Intro', 'See [page](https://www.notion.so/aabbccdd11223344aabbccdd11223344)'),
|
|
136
|
+
];
|
|
137
|
+
const m = mapping({});
|
|
138
|
+
const { sections: resolved } = resolveNotionReferences(sections, m);
|
|
139
|
+
expect(getText(resolved[0])).toBe('See [page](notion://aabbccdd11223344aabbccdd11223344)');
|
|
140
|
+
});
|
|
123
141
|
});
|
|
124
142
|
describe('link_to_page placeholders', () => {
|
|
125
143
|
it('resolves placeholders to KB path links', () => {
|
package/package.json
CHANGED