@moxn/kb-migrate 0.4.2 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,7 @@ export async function blocksToSections(blocks, client, pagePathMap, options) {
23
23
  if (block.type === 'heading_2') {
24
24
  // Flush current section if it has content
25
25
  if (currentBlocks.length > 0) {
26
- sections.push({ name: currentSectionName, content: currentBlocks });
26
+ sections.push({ name: currentSectionName, content: mergeConsecutiveListBlocks(currentBlocks) });
27
27
  }
28
28
  const h2 = block;
29
29
  currentSectionName = richTextToPlain(h2.heading_2.rich_text) || 'Untitled';
@@ -36,7 +36,7 @@ export async function blocksToSections(blocks, client, pagePathMap, options) {
36
36
  }
37
37
  // Flush last section
38
38
  if (currentBlocks.length > 0) {
39
- sections.push({ name: currentSectionName, content: currentBlocks });
39
+ sections.push({ name: currentSectionName, content: mergeConsecutiveListBlocks(currentBlocks) });
40
40
  }
41
41
  // If no sections were created at all (empty page), return empty array
42
42
  return sections;
@@ -74,10 +74,10 @@ async function convertBlock(block, client, pagePathMap, visitedSyncedBlocks) {
74
74
  results.push(...convertToDo(block));
75
75
  break;
76
76
  case 'quote':
77
- results.push(...convertQuote(block));
77
+ results.push(...(await convertQuote(block, client, pagePathMap, visitedSyncedBlocks)));
78
78
  break;
79
79
  case 'callout':
80
- results.push(...convertCallout(block));
80
+ results.push(...(await convertCallout(block, client, pagePathMap, visitedSyncedBlocks)));
81
81
  break;
82
82
  case 'divider':
83
83
  results.push(textBlock('---'));
@@ -140,25 +140,15 @@ async function convertBlock(block, client, pagePathMap, visitedSyncedBlocks) {
140
140
  console.warn(` Skipping unsupported Notion block type: ${block.type}`);
141
141
  break;
142
142
  }
143
- // If block has children (except table, toggle, synced_block, column_list which handle their own)
143
+ // If block has children (except types that handle their own)
144
144
  if (block.has_children &&
145
- !['table', 'toggle', 'synced_block', 'column_list', 'column'].includes(block.type)) {
145
+ !['table', 'toggle', 'synced_block', 'column_list', 'column', 'quote', 'callout'].includes(block.type)) {
146
+ const indent = ['bulleted_list_item', 'numbered_list_item', 'to_do'].includes(block.type)
147
+ ? ' '
148
+ : undefined;
146
149
  const children = await client.getBlockChildren(block.id);
147
- for (const child of children) {
148
- const childBlocks = await convertBlock(child, client, pagePathMap, visitedSyncedBlocks);
149
- // Indent child content for list items
150
- if (['bulleted_list_item', 'numbered_list_item'].includes(block.type)) {
151
- for (const cb of childBlocks) {
152
- if (cb.blockType === 'text' && cb.text) {
153
- cb.text = cb.text
154
- .split('\n')
155
- .map((line) => ' ' + line)
156
- .join('\n');
157
- }
158
- }
159
- }
160
- results.push(...childBlocks);
161
- }
150
+ const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, indent);
151
+ results.push(...childBlocks);
162
152
  }
163
153
  return results;
164
154
  }
@@ -203,18 +193,26 @@ function convertToDo(block) {
203
193
  const text = richTextToMarkdown(td.to_do.rich_text);
204
194
  return [textBlock(`${checkbox} ${text}`)];
205
195
  }
206
- function convertQuote(block) {
196
+ async function convertQuote(block, client, pagePathMap, visitedSyncedBlocks) {
207
197
  const q = block;
208
198
  const text = richTextToMarkdown(q.quote.rich_text);
209
- if (!text)
199
+ if (!text && !block.has_children)
210
200
  return [];
211
201
  const quoted = text
212
- .split('\n')
213
- .map((line) => '> ' + line)
214
- .join('\n');
215
- return [textBlock(quoted)];
202
+ ? text
203
+ .split('\n')
204
+ .map((line) => '> ' + line)
205
+ .join('\n')
206
+ : '';
207
+ const results = quoted ? [textBlock(quoted)] : [];
208
+ if (block.has_children) {
209
+ const children = await client.getBlockChildren(block.id);
210
+ const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, '> ');
211
+ results.push(...childBlocks);
212
+ }
213
+ return results;
216
214
  }
217
- function convertCallout(block) {
215
+ async function convertCallout(block, client, pagePathMap, visitedSyncedBlocks) {
218
216
  const c = block;
219
217
  const text = richTextToMarkdown(c.callout.rich_text);
220
218
  const emoji = c.callout.icon?.emoji ?? '';
@@ -223,7 +221,13 @@ function convertCallout(block) {
223
221
  .split('\n')
224
222
  .map((line) => '> ' + line)
225
223
  .join('\n');
226
- return [textBlock(quoted)];
224
+ const results = [textBlock(quoted)];
225
+ if (block.has_children) {
226
+ const children = await client.getBlockChildren(block.id);
227
+ const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, '> ');
228
+ results.push(...childBlocks);
229
+ }
230
+ return results;
227
231
  }
228
232
  async function convertTable(block, client) {
229
233
  const tableBlock = block;
@@ -255,19 +259,8 @@ async function convertToggle(block, client, pagePathMap, visitedSyncedBlocks) {
255
259
  const results = [textBlock(`**${header}**`)];
256
260
  if (block.has_children) {
257
261
  const children = await client.getBlockChildren(block.id);
258
- for (const child of children) {
259
- const converted = await convertBlock(child, client, pagePathMap, visitedSyncedBlocks);
260
- // Indent toggle content
261
- for (const cb of converted) {
262
- if (cb.blockType === 'text' && cb.text) {
263
- cb.text = cb.text
264
- .split('\n')
265
- .map((line) => '> ' + line)
266
- .join('\n');
267
- }
268
- }
269
- results.push(...converted);
270
- }
262
+ const childBlocks = await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, '> ');
263
+ results.push(...childBlocks);
271
264
  }
272
265
  return results;
273
266
  }
@@ -372,12 +365,7 @@ async function convertSyncedBlock(block, client, pagePathMap, visitedSyncedBlock
372
365
  visitedSyncedBlocks.add(sourceId);
373
366
  try {
374
367
  const children = await client.getBlockChildren(sourceId);
375
- const results = [];
376
- for (const child of children) {
377
- const converted = await convertBlock(child, client, pagePathMap, visitedSyncedBlocks);
378
- results.push(...converted);
379
- }
380
- return results;
368
+ return await convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks);
381
369
  }
382
370
  finally {
383
371
  visitedSyncedBlocks.delete(sourceId);
@@ -389,10 +377,8 @@ async function convertColumnList(block, client, pagePathMap, visitedSyncedBlocks
389
377
  for (const column of children) {
390
378
  if (column.type === 'column' && column.has_children) {
391
379
  const columnChildren = await client.getBlockChildren(column.id);
392
- for (const child of columnChildren) {
393
- const converted = await convertBlock(child, client, pagePathMap, visitedSyncedBlocks);
394
- results.push(...converted);
395
- }
380
+ const converted = await convertAndMergeChildren(columnChildren, client, pagePathMap, visitedSyncedBlocks);
381
+ results.push(...converted);
396
382
  }
397
383
  }
398
384
  return results;
@@ -429,12 +415,9 @@ export function richTextToMarkdown(richText) {
429
415
  text = '*' + text + '*';
430
416
  if (rt.annotations.strikethrough)
431
417
  text = '~~' + text + '~~';
432
- // Apply link
433
- if (rt.href) {
434
- text = `[${text}](${rt.href})`;
435
- }
436
- else if (rt.type === 'mention' && rt.mention) {
437
- // Handle mention types
418
+ // Handle mentions BEFORE href — Notion sets href on mentions too
419
+ // (as a web URL), but we want notion:// protocol for resolution
420
+ if (rt.type === 'mention' && rt.mention) {
438
421
  if (rt.mention.type === 'user' && rt.mention.user?.name) {
439
422
  text = `@${rt.mention.user.name}`;
440
423
  }
@@ -450,6 +433,9 @@ export function richTextToMarkdown(richText) {
450
433
  text += ` → ${rt.mention.date.end}`;
451
434
  }
452
435
  }
436
+ else if (rt.href) {
437
+ text = `[${text}](${rt.href})`;
438
+ }
453
439
  else if (rt.type === 'equation' && rt.equation) {
454
440
  text = `$${rt.equation.expression}$`;
455
441
  }
@@ -469,6 +455,96 @@ export function richTextToPlain(richText) {
469
455
  function textBlock(text) {
470
456
  return { blockType: 'text', text };
471
457
  }
458
+ function detectListType(text) {
459
+ const firstLine = text.split('\n')[0];
460
+ if (/^\d+\.\s/.test(firstLine))
461
+ return 'ordered';
462
+ if (/^- \[[ x]\]\s/.test(firstLine))
463
+ return 'todo';
464
+ if (/^- /.test(firstLine))
465
+ return 'bullet';
466
+ return null;
467
+ }
468
+ /**
469
+ * Merge consecutive list-item text blocks into single text blocks.
470
+ * This ensures the markdown→TipTap parser sees them as one list
471
+ * instead of creating multiple single-item lists.
472
+ */
473
+ function mergeConsecutiveListBlocks(blocks) {
474
+ const result = [];
475
+ let accumulator = [];
476
+ let currentListType = null;
477
+ function flush() {
478
+ if (accumulator.length === 0)
479
+ return;
480
+ let merged = accumulator.join('\n');
481
+ if (currentListType === 'ordered') {
482
+ // Fix numbering: replace all leading `1.` with sequential numbers
483
+ let counter = 0;
484
+ merged = merged
485
+ .split('\n')
486
+ .map((line) => {
487
+ if (/^\d+\.\s/.test(line)) {
488
+ counter++;
489
+ return line.replace(/^\d+\./, `${counter}.`);
490
+ }
491
+ return line;
492
+ })
493
+ .join('\n');
494
+ }
495
+ result.push(textBlock(merged));
496
+ accumulator = [];
497
+ currentListType = null;
498
+ }
499
+ for (const block of blocks) {
500
+ if (block.blockType !== 'text' || !block.text) {
501
+ flush();
502
+ result.push(block);
503
+ continue;
504
+ }
505
+ const type = detectListType(block.text);
506
+ // Indented child content (starts with spaces) continues current list group
507
+ if (currentListType &&
508
+ block.text.split('\n').every((line) => line.startsWith(' '))) {
509
+ accumulator.push(block.text);
510
+ continue;
511
+ }
512
+ if (type === null) {
513
+ flush();
514
+ result.push(block);
515
+ continue;
516
+ }
517
+ if (type !== currentListType) {
518
+ flush();
519
+ currentListType = type;
520
+ }
521
+ accumulator.push(block.text);
522
+ }
523
+ flush();
524
+ return result;
525
+ }
526
+ /**
527
+ * Convert children blocks, merge consecutive list items, and optionally indent.
528
+ */
529
+ async function convertAndMergeChildren(children, client, pagePathMap, visitedSyncedBlocks, indent) {
530
+ const blocks = [];
531
+ for (const child of children) {
532
+ const converted = await convertBlock(child, client, pagePathMap, visitedSyncedBlocks);
533
+ blocks.push(...converted);
534
+ }
535
+ const merged = mergeConsecutiveListBlocks(blocks);
536
+ if (indent) {
537
+ for (const block of merged) {
538
+ if (block.blockType === 'text' && block.text) {
539
+ block.text = block.text
540
+ .split('\n')
541
+ .map((line) => indent + line)
542
+ .join('\n');
543
+ }
544
+ }
545
+ }
546
+ return merged;
547
+ }
472
548
  function guessImageMediaType(url) {
473
549
  const lower = url.toLowerCase();
474
550
  if (lower.includes('.jpg') || lower.includes('.jpeg'))
@@ -45,8 +45,8 @@ describe('richTextToMarkdown', () => {
45
45
  const result = richTextToMarkdown([rt]);
46
46
  expect(result).toBe('[My Database](notion://abc123de-f456-abc1-23de-f456abc123de)');
47
47
  });
48
- it('renders database mention with href using the href (href takes precedence)', () => {
49
- // When href is set on the rich text, line 641 handles it BEFORE the mention check
48
+ it('renders database mention with href using notion:// (mention takes precedence over href)', () => {
49
+ // Mentions should always use notion:// protocol, even when href is set
50
50
  const rt = {
51
51
  type: 'mention',
52
52
  plain_text: 'My Database',
@@ -65,8 +65,7 @@ describe('richTextToMarkdown', () => {
65
65
  },
66
66
  };
67
67
  const result = richTextToMarkdown([rt]);
68
- // href takes precedence — produces a notion.so link (resolved in post-processing)
69
- expect(result).toBe('[My Database](https://www.notion.so/workspace/abc123de-f456-abc1-23de-f456abc123de)');
68
+ expect(result).toBe('[My Database](notion://abc123de-f456-abc1-23de-f456abc123de)');
70
69
  });
71
70
  });
72
71
  describe('page mentions', () => {
@@ -91,7 +90,7 @@ describe('richTextToMarkdown', () => {
91
90
  const result = richTextToMarkdown([rt]);
92
91
  expect(result).toBe('[My Page](notion://abc123de-f456-abc1-23de-f456abc123de)');
93
92
  });
94
- it('renders page mention with href using the href', () => {
93
+ it('renders page mention with href using notion:// (mention takes precedence over href)', () => {
95
94
  const rt = {
96
95
  type: 'mention',
97
96
  plain_text: 'My Page',
@@ -110,7 +109,7 @@ describe('richTextToMarkdown', () => {
110
109
  },
111
110
  };
112
111
  const result = richTextToMarkdown([rt]);
113
- expect(result).toBe('[My Page](https://www.notion.so/workspace/abc123de-f456-abc1-23de-f456abc123de)');
112
+ expect(result).toBe('[My Page](notion://abc123de-f456-abc1-23de-f456abc123de)');
114
113
  });
115
114
  });
116
115
  describe('other mention types preserved', () => {
@@ -18,6 +18,9 @@ const NOTION_PROTOCOL_RE = /\[([^\]]*)\]\(notion:\/\/([a-f0-9-]{32,36})\)/g;
18
18
  const NOTION_WEB_URL_RE = /\[([^\]]*)\]\(https?:\/\/(?:www\.)?notion\.so\/(?:[a-zA-Z0-9_-]+\/)*([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})(?:[?#][^)]*)?(?:\))/g;
19
19
  // Fallback for IDs without dashes embedded in slug URLs (e.g., ...Page-Title-abc123def456...)
20
20
  const NOTION_WEB_URL_SLUG_RE = /\[([^\]]*)\]\(https?:\/\/(?:www\.)?notion\.so\/(?:[a-zA-Z0-9_-]+\/)*[a-zA-Z0-9-]+-([a-f0-9]{32})(?:[?#][^)]*)?\)/g;
21
+ // Bare 32-char hex on notion.so (e.g., https://www.notion.so/3004bf42cfeb818ebe8fe2f4befdad52)
22
+ // Catches mention hrefs that leak through as web URLs
23
+ const NOTION_WEB_URL_BARE_RE = /\[([^\]]*)\]\(https?:\/\/(?:www\.)?notion\.so\/(?:[a-zA-Z0-9_-]+\/)*([a-f0-9]{32})(?:[?#][^)]*)?\)/g;
21
24
  // 3. Unresolved placeholders from link_to_page conversion
22
25
  const LINK_PLACEHOLDER_RE = /\*\(Link to Notion page: ([a-f0-9-]{32,36})\)\*/g;
23
26
  // 4. Relation property markers
@@ -87,6 +90,21 @@ export function resolveNotionReferences(sections, mapping) {
87
90
  }
88
91
  return `[${displayText}](notion://${nid})`;
89
92
  });
93
+ // Pass 3b: Bare 32-char hex Notion URLs (safety net for mention hrefs)
94
+ text = text.replace(NOTION_WEB_URL_BARE_RE, (_match, displayText, rawId) => {
95
+ const nid = normalizeId(rawId);
96
+ const kbPath = mapping.notionIdToKbPath.get(nid);
97
+ if (kbPath) {
98
+ references.push({
99
+ sectionIndex,
100
+ targetNotionId: nid,
101
+ targetKbPath: kbPath,
102
+ displayText: displayText || kbPath,
103
+ });
104
+ return `[${displayText || kbPath}](${kbPath})`;
105
+ }
106
+ return `[${displayText}](notion://${nid})`;
107
+ });
90
108
  // Pass 4: Unresolved link_to_page placeholders
91
109
  text = text.replace(LINK_PLACEHOLDER_RE, (_match, rawId) => {
92
110
  const nid = normalizeId(rawId);
@@ -120,6 +120,24 @@ describe('resolveNotionReferences', () => {
120
120
  const { sections: resolved } = resolveNotionReferences(sections, m);
121
121
  expect(getText(resolved[0])).toBe('See [link](/docs/my-page)');
122
122
  });
123
+ it('resolves bare 32-char hex Notion URLs (no slug prefix)', () => {
124
+ const sections = [
125
+ textSection('Intro', 'See [Dev Setup](https://www.notion.so/3004bf42cfeb818ebe8fe2f4befdad52)'),
126
+ ];
127
+ const m = mapping({ '3004bf42cfeb818ebe8fe2f4befdad52': '/docs/dev-setup' });
128
+ const { sections: resolved, references } = resolveNotionReferences(sections, m);
129
+ expect(getText(resolved[0])).toBe('See [Dev Setup](/docs/dev-setup)');
130
+ expect(references).toHaveLength(1);
131
+ expect(references[0].targetNotionId).toBe('3004bf42cfeb818ebe8fe2f4befdad52');
132
+ });
133
+ it('normalizes unresolved bare hex Notion URLs to notion://', () => {
134
+ const sections = [
135
+ textSection('Intro', 'See [page](https://www.notion.so/aabbccdd11223344aabbccdd11223344)'),
136
+ ];
137
+ const m = mapping({});
138
+ const { sections: resolved } = resolveNotionReferences(sections, m);
139
+ expect(getText(resolved[0])).toBe('See [page](notion://aabbccdd11223344aabbccdd11223344)');
140
+ });
123
141
  });
124
142
  describe('link_to_page placeholders', () => {
125
143
  it('resolves placeholders to KB path links', () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxn/kb-migrate",
3
- "version": "0.4.2",
3
+ "version": "0.4.6",
4
4
  "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",