@moxn/kb-migrate 0.4.12 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,14 +62,68 @@ function stripInvalidLinks(text) {
62
62
  return displayText;
63
63
  });
64
64
  }
65
- function sectionsToMarkdown(sections, options) {
66
- const parts = [];
65
+ function notionImageBlock(url, alt) {
66
+ return {
67
+ object: 'block',
68
+ type: 'image',
69
+ image: {
70
+ type: 'external',
71
+ external: { url },
72
+ ...(alt ? { caption: [{ type: 'text', text: { content: alt } }] } : {}),
73
+ },
74
+ };
75
+ }
76
+ function notionPdfBlock(url, caption) {
77
+ return {
78
+ object: 'block',
79
+ type: 'pdf',
80
+ pdf: {
81
+ type: 'external',
82
+ external: { url },
83
+ ...(caption ? { caption: [{ type: 'text', text: { content: caption } }] } : {}),
84
+ },
85
+ };
86
+ }
87
+ function notionFileBlock(url, caption) {
88
+ return {
89
+ object: 'block',
90
+ type: 'file',
91
+ file: {
92
+ type: 'external',
93
+ external: { url },
94
+ caption: [{ type: 'text', text: { content: caption || 'file' } }],
95
+ },
96
+ };
97
+ }
98
+ /**
99
+ * Convert KB document sections to Notion blocks.
100
+ *
101
+ * Uses a block-by-block approach: text blocks go through martian for
102
+ * rich formatting, while media blocks (images, PDFs, files, CSVs) are
103
+ * converted directly to native Notion block types. This avoids the
104
+ * fragility of placeholder-based post-processing.
105
+ *
106
+ * Section names become H2 headings (mirrors the import convention).
107
+ */
108
+ function sectionsToNotionBlocks(sections, options) {
109
+ const allBlocks = [];
67
110
  const allReferences = [];
68
111
  const databaseIds = [];
69
- const media = [];
70
112
  const extractRefs = options?.extractReferences ?? false;
113
+ // Accumulate contiguous text blocks into markdown, then flush through martian
114
+ let pendingMarkdown = [];
115
+ function flushText() {
116
+ if (pendingMarkdown.length === 0)
117
+ return;
118
+ const md = pendingMarkdown.join('\n').trim();
119
+ if (md) {
120
+ allBlocks.push(...markdownToBlocks(md));
121
+ }
122
+ pendingMarkdown = [];
123
+ }
71
124
  for (const section of sections) {
72
- parts.push(`## ${section.name}\n`);
125
+ // Section heading — accumulate as markdown so martian handles it
126
+ pendingMarkdown.push(`## ${section.name}\n`);
73
127
  for (const block of section.content) {
74
128
  if (block.blockType === 'text' && block.text) {
75
129
  let text = stripCommentTags(block.text);
@@ -78,93 +132,40 @@ function sectionsToMarkdown(sections, options) {
78
132
  text = cleanedText;
79
133
  allReferences.push(...references);
80
134
  }
81
- // Strip relative/internal links that aren't valid URLs
82
- // (Notion rejects links without a protocol)
83
135
  text = stripInvalidLinks(text);
84
- parts.push(text);
85
- parts.push('');
136
+ pendingMarkdown.push(text);
137
+ pendingMarkdown.push('');
86
138
  }
87
139
  else if (block.blockType === 'image' && block.url) {
88
- const token = `MOXNMEDIA${media.length}PLACEHOLDER`;
89
- media.push({ token, type: 'image', url: block.url, alt: block.alt });
90
- parts.push(token);
91
- parts.push('');
140
+ // Flush any pending text, then add image directly
141
+ flushText();
142
+ allBlocks.push(notionImageBlock(block.url, block.alt));
92
143
  }
93
144
  else if (block.blockType === 'document' && block.url) {
94
- const token = `MOXNMEDIA${media.length}PLACEHOLDER`;
95
- media.push({ token, type: 'file', url: block.url, filename: block.filename });
96
- parts.push(token);
97
- parts.push('');
145
+ flushText();
146
+ // Use native pdf block for PDFs (inline viewer), file block for others
147
+ if (block.mimeType === 'application/pdf') {
148
+ allBlocks.push(notionPdfBlock(block.url, block.filename));
149
+ }
150
+ else {
151
+ allBlocks.push(notionFileBlock(block.url, block.filename));
152
+ }
98
153
  }
99
154
  else if (block.blockType === 'csv' && block.url) {
100
- const token = `MOXNMEDIA${media.length}PLACEHOLDER`;
101
- media.push({ token, type: 'embed', url: block.url, filename: block.filename || 'data.csv' });
102
- parts.push(token);
103
- parts.push('');
155
+ flushText();
156
+ allBlocks.push(notionFileBlock(block.url, block.filename || 'data.csv'));
104
157
  }
105
158
  else if (block.blockType === 'database_embed' && block.databaseId) {
106
- // Collect database ID for Pass 1.5 export
107
159
  databaseIds.push(block.databaseId);
108
- // Add a placeholder in the markdown
109
- parts.push(`> **[Database]** *(exported as inline database)*`);
110
- parts.push('');
160
+ // Keep database placeholder as text (handled in Pass 1.5)
161
+ pendingMarkdown.push(`> **[Database]** *(exported as inline database)*`);
162
+ pendingMarkdown.push('');
111
163
  }
112
164
  }
113
165
  }
114
- return { markdown: parts.join('\n').trim(), references: allReferences, databaseIds, media };
115
- }
116
- /**
117
- * Replace media placeholder paragraphs in Notion blocks with proper
118
- * image/file/embed blocks. Martian doesn't support images, so we
119
- * post-process the converted blocks.
120
- */
121
- function injectMediaBlocks(blocks, media) {
122
- if (media.length === 0)
123
- return blocks;
124
- // Build a lookup from token to media info
125
- const tokenMap = new Map(media.map((m) => [m.token, m]));
126
- return blocks.map((block) => {
127
- // Check if this is a paragraph containing a media placeholder
128
- const b = block;
129
- if (b.type !== 'paragraph' || !b.paragraph?.rich_text)
130
- return block;
131
- const text = b.paragraph.rich_text.map((rt) => rt.text?.content ?? '').join('').trim();
132
- const mediaInfo = tokenMap.get(text);
133
- if (!mediaInfo)
134
- return block;
135
- // Replace with proper Notion block
136
- if (mediaInfo.type === 'image') {
137
- return {
138
- object: 'block',
139
- type: 'image',
140
- image: {
141
- type: 'external',
142
- external: { url: mediaInfo.url },
143
- ...(mediaInfo.alt ? { caption: [{ type: 'text', text: { content: mediaInfo.alt } }] } : {}),
144
- },
145
- };
146
- }
147
- if (mediaInfo.type === 'file') {
148
- return {
149
- object: 'block',
150
- type: 'file',
151
- file: {
152
- type: 'external',
153
- external: { url: mediaInfo.url },
154
- caption: [{ type: 'text', text: { content: mediaInfo.filename || 'document' } }],
155
- },
156
- };
157
- }
158
- // For CSV/embeds, use a bookmark block (Notion doesn't have native CSV embed)
159
- return {
160
- object: 'block',
161
- type: 'bookmark',
162
- bookmark: {
163
- url: mediaInfo.url,
164
- caption: [{ type: 'text', text: { content: mediaInfo.filename || 'file' } }],
165
- },
166
- };
167
- });
166
+ // Flush remaining text
167
+ flushText();
168
+ return { blocks: allBlocks, references: allReferences, databaseIds };
168
169
  }
169
170
  // Max 100 blocks per API call
170
171
  const MAX_BLOCKS_PER_APPEND = 100;
@@ -243,7 +244,7 @@ export class NotionExportTarget extends ExportTarget {
243
244
  */
244
245
  async resolveAndAppendReferences(doc, notionPageId) {
245
246
  // Extract references from section content
246
- const { references } = sectionsToMarkdown(doc.sections, { extractReferences: true });
247
+ const { references } = sectionsToNotionBlocks(doc.sections, { extractReferences: true });
247
248
  if (references.length === 0) {
248
249
  return { resolved: 0, unresolved: 0 };
249
250
  }
@@ -413,8 +414,7 @@ export class NotionExportTarget extends ExportTarget {
413
414
  // Notion page creation / update
414
415
  // ============================================
415
416
  async createNotionPage(doc) {
416
- const { markdown, media } = sectionsToMarkdown(doc.sections, { extractReferences: true });
417
- const blocks = injectMediaBlocks(markdownToBlocks(markdown), media);
417
+ const { blocks } = sectionsToNotionBlocks(doc.sections, { extractReferences: true });
418
418
  // First batch: up to 100 blocks as children of the new page
419
419
  const firstBatch = blocks.slice(0, MAX_BLOCKS_PER_APPEND);
420
420
  const remainingBlocks = blocks.slice(MAX_BLOCKS_PER_APPEND);
@@ -446,8 +446,7 @@ export class NotionExportTarget extends ExportTarget {
446
446
  },
447
447
  });
448
448
  await this.clearPageContent(notionPageId);
449
- const { markdown, media } = sectionsToMarkdown(doc.sections, { extractReferences: true });
450
- const blocks = injectMediaBlocks(markdownToBlocks(markdown), media);
449
+ const { blocks } = sectionsToNotionBlocks(doc.sections, { extractReferences: true });
451
450
  await this.appendRemainingBlocks(notionPageId, blocks);
452
451
  }
453
452
  async clearPageContent(pageId) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxn/kb-migrate",
3
- "version": "0.4.12",
3
+ "version": "0.4.13",
4
4
  "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",