@moxn/kb-migrate 0.4.12 → 0.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js CHANGED
@@ -227,6 +227,19 @@ export class MoxnClient {
227
227
  rowCount: block.rowCount,
228
228
  };
229
229
  }
230
+ // Handle generic files - upload to storage
231
+ if (block.blockType === 'file' && block.type === 'file' && block.path) {
232
+ const data = await fs.readFile(block.path);
233
+ const filename = block.filename || block.path.split('/').pop();
234
+ const { key } = await this.uploadFile(data, block.mediaType || 'application/octet-stream', filename);
235
+ return {
236
+ blockType: block.blockType,
237
+ type: 'storage',
238
+ key,
239
+ mediaType: block.mediaType || 'application/octet-stream',
240
+ filename: block.filename,
241
+ };
242
+ }
230
243
  return block;
231
244
  }));
232
245
  }
@@ -108,6 +108,9 @@ async function convertBlock(block, client, pagePathMap, visitedSyncedBlocks, dat
108
108
  case 'video':
109
109
  results.push(...convertVideo(block));
110
110
  break;
111
+ case 'audio':
112
+ results.push(...convertAudio(block));
113
+ break;
111
114
  case 'link_to_page':
112
115
  results.push(...convertLinkToPage(block, pagePathMap));
113
116
  break;
@@ -346,7 +349,7 @@ function convertFile(block) {
346
349
  },
347
350
  ];
348
351
  }
349
- // CSV or other file — render as link
352
+ // CSV file
350
353
  if (filename.toLowerCase().endsWith('.csv')) {
351
354
  return [
352
355
  {
@@ -358,8 +361,16 @@ function convertFile(block) {
358
361
  },
359
362
  ];
360
363
  }
361
- // Generic file — render as markdown link
362
- return [textBlock(`[${filename}](${url})`)];
364
+ // Generic file — emit as file block
365
+ return [
366
+ {
367
+ blockType: 'file',
368
+ type: 'url',
369
+ url,
370
+ mediaType: guessFileMimeType(filename),
371
+ filename,
372
+ },
373
+ ];
363
374
  }
364
375
  function convertVideo(block) {
365
376
  const v = block;
@@ -367,7 +378,16 @@ function convertVideo(block) {
367
378
  if (!url)
368
379
  return [];
369
380
  const caption = richTextToPlain(v.video.caption ?? []);
370
- return [textBlock(`[${caption || 'Video'}](${url})`)];
381
+ const filename = caption || extractFilename(url) || 'video';
382
+ return [
383
+ {
384
+ blockType: 'file',
385
+ type: 'url',
386
+ url,
387
+ mediaType: guessVideoMimeType(url),
388
+ filename,
389
+ },
390
+ ];
371
391
  }
372
392
  function convertLinkToPage(block, pagePathMap) {
373
393
  const ltp = block;
@@ -573,6 +593,74 @@ async function convertAndMergeChildren(children, client, pagePathMap, visitedSyn
573
593
  }
574
594
  return merged;
575
595
  }
596
+ function convertAudio(block) {
597
+ const a = block;
598
+ const url = a.audio.type === 'external' ? a.audio.external?.url : a.audio.file?.url;
599
+ if (!url)
600
+ return [];
601
+ const caption = richTextToPlain(a.audio.caption ?? []);
602
+ const filename = caption || extractFilename(url) || 'audio';
603
+ return [
604
+ {
605
+ blockType: 'file',
606
+ type: 'url',
607
+ url,
608
+ mediaType: guessAudioMimeType(url),
609
+ filename,
610
+ },
611
+ ];
612
+ }
613
+ function guessFileMimeType(filename) {
614
+ const lower = filename.toLowerCase();
615
+ const ext = lower.split('.').pop();
616
+ const map = {
617
+ docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
618
+ xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
619
+ pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
620
+ doc: 'application/msword',
621
+ xls: 'application/vnd.ms-excel',
622
+ ppt: 'application/vnd.ms-powerpoint',
623
+ odt: 'application/vnd.oasis.opendocument.text',
624
+ ods: 'application/vnd.oasis.opendocument.spreadsheet',
625
+ rtf: 'application/rtf',
626
+ txt: 'text/plain',
627
+ md: 'text/markdown',
628
+ json: 'application/json',
629
+ xml: 'application/xml',
630
+ yaml: 'text/yaml',
631
+ yml: 'text/yaml',
632
+ html: 'text/html',
633
+ htm: 'text/html',
634
+ zip: 'application/zip',
635
+ gz: 'application/gzip',
636
+ tar: 'application/x-tar',
637
+ svg: 'image/svg+xml',
638
+ epub: 'application/epub+zip',
639
+ };
640
+ return (ext && map[ext]) || 'application/octet-stream';
641
+ }
642
+ function guessVideoMimeType(url) {
643
+ const lower = url.toLowerCase();
644
+ if (lower.includes('.mp4'))
645
+ return 'video/mp4';
646
+ if (lower.includes('.webm'))
647
+ return 'video/webm';
648
+ if (lower.includes('.mov'))
649
+ return 'video/quicktime';
650
+ return 'video/mp4';
651
+ }
652
+ function guessAudioMimeType(url) {
653
+ const lower = url.toLowerCase();
654
+ if (lower.includes('.mp3'))
655
+ return 'audio/mpeg';
656
+ if (lower.includes('.wav'))
657
+ return 'audio/wav';
658
+ if (lower.includes('.ogg'))
659
+ return 'audio/ogg';
660
+ if (lower.includes('.m4a'))
661
+ return 'audio/mp4';
662
+ return 'audio/mpeg';
663
+ }
576
664
  function guessImageMediaType(url) {
577
665
  const lower = url.toLowerCase();
578
666
  if (lower.includes('.jpg') || lower.includes('.jpeg'))
@@ -56,7 +56,7 @@ export class NotionMediaDownloader {
56
56
  filename: urlBlock.filename,
57
57
  });
58
58
  }
59
- else {
59
+ else if (urlBlock.blockType === 'csv') {
60
60
  results.push({
61
61
  blockType: 'csv',
62
62
  type: 'file',
@@ -67,6 +67,16 @@ export class NotionMediaDownloader {
67
67
  rowCount: urlBlock.rowCount,
68
68
  });
69
69
  }
70
+ else {
71
+ // file block
72
+ results.push({
73
+ blockType: 'file',
74
+ type: 'file',
75
+ path: localPath,
76
+ mediaType: urlBlock.mediaType,
77
+ filename: urlBlock.filename,
78
+ });
79
+ }
70
80
  }
71
81
  catch (error) {
72
82
  console.warn(` Warning: Failed to download ${block.blockType}: ${error instanceof Error ? error.message : error}`);
@@ -153,6 +163,23 @@ function mimeToExtension(mime) {
153
163
  'image/webp': '.webp',
154
164
  'application/pdf': '.pdf',
155
165
  'text/csv': '.csv',
166
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
167
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
168
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
169
+ 'application/msword': '.doc',
170
+ 'application/vnd.ms-excel': '.xls',
171
+ 'application/vnd.ms-powerpoint': '.ppt',
172
+ 'text/plain': '.txt',
173
+ 'text/markdown': '.md',
174
+ 'application/json': '.json',
175
+ 'application/xml': '.xml',
176
+ 'text/yaml': '.yaml',
177
+ 'text/html': '.html',
178
+ 'application/zip': '.zip',
179
+ 'application/gzip': '.gz',
180
+ 'application/x-tar': '.tar',
181
+ 'image/svg+xml': '.svg',
182
+ 'application/epub+zip': '.epub',
156
183
  };
157
184
  return map[mime] ?? null;
158
185
  }
@@ -62,14 +62,157 @@ function stripInvalidLinks(text) {
62
62
  return displayText;
63
63
  });
64
64
  }
65
- function sectionsToMarkdown(sections, options) {
66
- const parts = [];
65
+ function notionImageBlock(url, alt) {
66
+ return {
67
+ object: 'block',
68
+ type: 'image',
69
+ image: {
70
+ type: 'external',
71
+ external: { url },
72
+ ...(alt ? { caption: [{ type: 'text', text: { content: alt } }] } : {}),
73
+ },
74
+ };
75
+ }
76
+ function notionPdfBlock(url, caption) {
77
+ return {
78
+ object: 'block',
79
+ type: 'pdf',
80
+ pdf: {
81
+ type: 'external',
82
+ external: { url },
83
+ ...(caption ? { caption: [{ type: 'text', text: { content: caption } }] } : {}),
84
+ },
85
+ };
86
+ }
87
+ function notionFileBlock(url, caption) {
88
+ return {
89
+ object: 'block',
90
+ type: 'file',
91
+ file: {
92
+ type: 'external',
93
+ external: { url },
94
+ caption: [{ type: 'text', text: { content: caption || 'file' } }],
95
+ },
96
+ };
97
+ }
98
+ // ============================================
99
+ // Notion file_upload block helpers
100
+ // ============================================
101
+ function notionImageUploadBlock(fileUploadId, alt) {
102
+ return {
103
+ object: 'block',
104
+ type: 'image',
105
+ image: {
106
+ type: 'file_upload',
107
+ file_upload: { id: fileUploadId },
108
+ ...(alt ? { caption: [{ type: 'text', text: { content: alt } }] } : {}),
109
+ },
110
+ };
111
+ }
112
+ function notionPdfUploadBlock(fileUploadId, caption) {
113
+ return {
114
+ object: 'block',
115
+ type: 'pdf',
116
+ pdf: {
117
+ type: 'file_upload',
118
+ file_upload: { id: fileUploadId },
119
+ ...(caption ? { caption: [{ type: 'text', text: { content: caption } }] } : {}),
120
+ },
121
+ };
122
+ }
123
+ function notionFileUploadBlock(fileUploadId, caption) {
124
+ return {
125
+ object: 'block',
126
+ type: 'file',
127
+ file: {
128
+ type: 'file_upload',
129
+ file_upload: { id: fileUploadId },
130
+ caption: [{ type: 'text', text: { content: caption || 'file' } }],
131
+ },
132
+ };
133
+ }
134
+ /**
135
+ * Derive a filename from a storage key and mime type.
136
+ * Falls back to a generic name if the key has no useful basename.
137
+ */
138
+ function deriveFilename(storageKey, mimeType) {
139
+ const basename = storageKey.split('/').pop();
140
+ if (basename && basename.includes('.'))
141
+ return basename;
142
+ const ext = {
143
+ 'image/png': '.png',
144
+ 'image/jpeg': '.jpg',
145
+ 'image/gif': '.gif',
146
+ 'image/webp': '.webp',
147
+ 'application/pdf': '.pdf',
148
+ 'text/csv': '.csv',
149
+ };
150
+ return `file${ext[mimeType] || ''}`;
151
+ }
152
+ /**
153
+ * Upload a storage-backed file to Notion via the File Upload API.
154
+ *
155
+ * Downloads from signed URL, then uploads to Notion in single-part mode.
156
+ * Returns the file upload ID on success, or null on failure (graceful fallback).
157
+ */
158
+ async function uploadFileToNotion(client, url, filename, contentType) {
159
+ try {
160
+ // Download file from signed URL
161
+ const response = await fetch(url);
162
+ if (!response.ok) {
163
+ console.warn(` [upload] Download failed (${response.status}) for ${filename}`);
164
+ return null;
165
+ }
166
+ const buffer = Buffer.from(await response.arrayBuffer());
167
+ // Create file upload
168
+ await sleep(RATE_LIMIT_MS);
169
+ const upload = await client.fileUploads.create({
170
+ mode: 'single_part',
171
+ filename,
172
+ content_type: contentType,
173
+ });
174
+ // Send file data
175
+ await sleep(RATE_LIMIT_MS);
176
+ await client.fileUploads.send({
177
+ file_upload_id: upload.id,
178
+ file: { data: new Blob([buffer]), filename },
179
+ });
180
+ return upload.id;
181
+ }
182
+ catch (error) {
183
+ console.warn(` [upload] Failed for ${filename}: ${error instanceof Error ? error.message : error}`);
184
+ return null;
185
+ }
186
+ }
187
+ /**
188
+ * Convert KB document sections to Notion blocks.
189
+ *
190
+ * Uses a block-by-block approach: text blocks go through martian for
191
+ * rich formatting, while media blocks (images, PDFs, files, CSVs) are
192
+ * converted directly to native Notion block types. This avoids the
193
+ * fragility of placeholder-based post-processing.
194
+ *
195
+ * Section names become H2 headings (mirrors the import convention).
196
+ */
197
+ async function sectionsToNotionBlocks(sections, options) {
198
+ const allBlocks = [];
67
199
  const allReferences = [];
68
200
  const databaseIds = [];
69
- const media = [];
70
201
  const extractRefs = options?.extractReferences ?? false;
202
+ // Accumulate contiguous text blocks into markdown, then flush through martian
203
+ let pendingMarkdown = [];
204
+ function flushText() {
205
+ if (pendingMarkdown.length === 0)
206
+ return;
207
+ const md = pendingMarkdown.join('\n').trim();
208
+ if (md) {
209
+ allBlocks.push(...markdownToBlocks(md));
210
+ }
211
+ pendingMarkdown = [];
212
+ }
71
213
  for (const section of sections) {
72
- parts.push(`## ${section.name}\n`);
214
+ // Section heading — accumulate as markdown so martian handles it
215
+ pendingMarkdown.push(`## ${section.name}\n`);
73
216
  for (const block of section.content) {
74
217
  if (block.blockType === 'text' && block.text) {
75
218
  let text = stripCommentTags(block.text);
@@ -78,93 +221,65 @@ function sectionsToMarkdown(sections, options) {
78
221
  text = cleanedText;
79
222
  allReferences.push(...references);
80
223
  }
81
- // Strip relative/internal links that aren't valid URLs
82
- // (Notion rejects links without a protocol)
83
224
  text = stripInvalidLinks(text);
84
- parts.push(text);
85
- parts.push('');
225
+ pendingMarkdown.push(text);
226
+ pendingMarkdown.push('');
86
227
  }
87
228
  else if (block.blockType === 'image' && block.url) {
88
- const token = `MOXNMEDIA${media.length}PLACEHOLDER`;
89
- media.push({ token, type: 'image', url: block.url, alt: block.alt });
90
- parts.push(token);
91
- parts.push('');
229
+ flushText();
230
+ if (block.storageKey && options?.notionClient) {
231
+ const fname = block.filename || deriveFilename(block.storageKey, block.mimeType || 'image/png');
232
+ const uploadId = await uploadFileToNotion(options.notionClient, block.url, fname, block.mimeType || 'image/png');
233
+ allBlocks.push(uploadId ? notionImageUploadBlock(uploadId, block.alt) : notionImageBlock(block.url, block.alt));
234
+ }
235
+ else {
236
+ allBlocks.push(notionImageBlock(block.url, block.alt));
237
+ }
92
238
  }
93
239
  else if (block.blockType === 'document' && block.url) {
94
- const token = `MOXNMEDIA${media.length}PLACEHOLDER`;
95
- media.push({ token, type: 'file', url: block.url, filename: block.filename });
96
- parts.push(token);
97
- parts.push('');
240
+ flushText();
241
+ if (block.storageKey && options?.notionClient) {
242
+ const fname = block.filename || deriveFilename(block.storageKey, block.mimeType || 'application/octet-stream');
243
+ const uploadId = await uploadFileToNotion(options.notionClient, block.url, fname, block.mimeType || 'application/octet-stream');
244
+ if (uploadId) {
245
+ allBlocks.push(block.mimeType === 'application/pdf' ? notionPdfUploadBlock(uploadId, block.filename) : notionFileUploadBlock(uploadId, block.filename));
246
+ }
247
+ else if (block.mimeType === 'application/pdf') {
248
+ allBlocks.push(notionPdfBlock(block.url, block.filename));
249
+ }
250
+ else {
251
+ allBlocks.push(notionFileBlock(block.url, block.filename));
252
+ }
253
+ }
254
+ else if (block.mimeType === 'application/pdf') {
255
+ allBlocks.push(notionPdfBlock(block.url, block.filename));
256
+ }
257
+ else {
258
+ allBlocks.push(notionFileBlock(block.url, block.filename));
259
+ }
98
260
  }
99
261
  else if (block.blockType === 'csv' && block.url) {
100
- const token = `MOXNMEDIA${media.length}PLACEHOLDER`;
101
- media.push({ token, type: 'embed', url: block.url, filename: block.filename || 'data.csv' });
102
- parts.push(token);
103
- parts.push('');
262
+ flushText();
263
+ if (block.storageKey && options?.notionClient) {
264
+ const fname = block.filename || deriveFilename(block.storageKey, 'text/csv');
265
+ const uploadId = await uploadFileToNotion(options.notionClient, block.url, fname, 'text/csv');
266
+ allBlocks.push(uploadId ? notionFileUploadBlock(uploadId, block.filename || 'data.csv') : notionFileBlock(block.url, block.filename || 'data.csv'));
267
+ }
268
+ else {
269
+ allBlocks.push(notionFileBlock(block.url, block.filename || 'data.csv'));
270
+ }
104
271
  }
105
272
  else if (block.blockType === 'database_embed' && block.databaseId) {
106
- // Collect database ID for Pass 1.5 export
107
273
  databaseIds.push(block.databaseId);
108
- // Add a placeholder in the markdown
109
- parts.push(`> **[Database]** *(exported as inline database)*`);
110
- parts.push('');
274
+ // Keep database placeholder as text (handled in Pass 1.5)
275
+ pendingMarkdown.push(`> **[Database]** *(exported as inline database)*`);
276
+ pendingMarkdown.push('');
111
277
  }
112
278
  }
113
279
  }
114
- return { markdown: parts.join('\n').trim(), references: allReferences, databaseIds, media };
115
- }
116
- /**
117
- * Replace media placeholder paragraphs in Notion blocks with proper
118
- * image/file/embed blocks. Martian doesn't support images, so we
119
- * post-process the converted blocks.
120
- */
121
- function injectMediaBlocks(blocks, media) {
122
- if (media.length === 0)
123
- return blocks;
124
- // Build a lookup from token to media info
125
- const tokenMap = new Map(media.map((m) => [m.token, m]));
126
- return blocks.map((block) => {
127
- // Check if this is a paragraph containing a media placeholder
128
- const b = block;
129
- if (b.type !== 'paragraph' || !b.paragraph?.rich_text)
130
- return block;
131
- const text = b.paragraph.rich_text.map((rt) => rt.text?.content ?? '').join('').trim();
132
- const mediaInfo = tokenMap.get(text);
133
- if (!mediaInfo)
134
- return block;
135
- // Replace with proper Notion block
136
- if (mediaInfo.type === 'image') {
137
- return {
138
- object: 'block',
139
- type: 'image',
140
- image: {
141
- type: 'external',
142
- external: { url: mediaInfo.url },
143
- ...(mediaInfo.alt ? { caption: [{ type: 'text', text: { content: mediaInfo.alt } }] } : {}),
144
- },
145
- };
146
- }
147
- if (mediaInfo.type === 'file') {
148
- return {
149
- object: 'block',
150
- type: 'file',
151
- file: {
152
- type: 'external',
153
- external: { url: mediaInfo.url },
154
- caption: [{ type: 'text', text: { content: mediaInfo.filename || 'document' } }],
155
- },
156
- };
157
- }
158
- // For CSV/embeds, use a bookmark block (Notion doesn't have native CSV embed)
159
- return {
160
- object: 'block',
161
- type: 'bookmark',
162
- bookmark: {
163
- url: mediaInfo.url,
164
- caption: [{ type: 'text', text: { content: mediaInfo.filename || 'file' } }],
165
- },
166
- };
167
- });
280
+ // Flush remaining text
281
+ flushText();
282
+ return { blocks: allBlocks, references: allReferences, databaseIds };
168
283
  }
169
284
  // Max 100 blocks per API call
170
285
  const MAX_BLOCKS_PER_APPEND = 100;
@@ -242,8 +357,8 @@ export class NotionExportTarget extends ExportTarget {
242
357
  * Returns the number of references resolved.
243
358
  */
244
359
  async resolveAndAppendReferences(doc, notionPageId) {
245
- // Extract references from section content
246
- const { references } = sectionsToMarkdown(doc.sections, { extractReferences: true });
360
+ // Extract references from section content (no upload needed for reference pass)
361
+ const { references } = await sectionsToNotionBlocks(doc.sections, { extractReferences: true });
247
362
  if (references.length === 0) {
248
363
  return { resolved: 0, unresolved: 0 };
249
364
  }
@@ -413,8 +528,7 @@ export class NotionExportTarget extends ExportTarget {
413
528
  // Notion page creation / update
414
529
  // ============================================
415
530
  async createNotionPage(doc) {
416
- const { markdown, media } = sectionsToMarkdown(doc.sections, { extractReferences: true });
417
- const blocks = injectMediaBlocks(markdownToBlocks(markdown), media);
531
+ const { blocks } = await sectionsToNotionBlocks(doc.sections, { extractReferences: true, notionClient: this.client });
418
532
  // First batch: up to 100 blocks as children of the new page
419
533
  const firstBatch = blocks.slice(0, MAX_BLOCKS_PER_APPEND);
420
534
  const remainingBlocks = blocks.slice(MAX_BLOCKS_PER_APPEND);
@@ -446,8 +560,7 @@ export class NotionExportTarget extends ExportTarget {
446
560
  },
447
561
  });
448
562
  await this.clearPageContent(notionPageId);
449
- const { markdown, media } = sectionsToMarkdown(doc.sections, { extractReferences: true });
450
- const blocks = injectMediaBlocks(markdownToBlocks(markdown), media);
563
+ const { blocks } = await sectionsToNotionBlocks(doc.sections, { extractReferences: true, notionClient: this.client });
451
564
  await this.appendRemainingBlocks(notionPageId, blocks);
452
565
  }
453
566
  async clearPageContent(pageId) {
package/dist/types.d.ts CHANGED
@@ -8,7 +8,7 @@
8
8
  * to kb-migrate (local filesystem paths) — MoxnClient converts them to
9
9
  * `type: 'storage'` before sending to the KB API.
10
10
  */
11
- export type ContentBlock = TextBlock | ImageRemoteBlock | ImageFileBlock | DocumentRemoteBlock | DocumentFileBlock | CsvRemoteBlock | CsvFileBlock | DatabaseEmbedBlock;
11
+ export type ContentBlock = TextBlock | ImageRemoteBlock | ImageFileBlock | DocumentRemoteBlock | DocumentFileBlock | CsvRemoteBlock | CsvFileBlock | FileRemoteBlock | FileFileBlock | DatabaseEmbedBlock;
12
12
  export interface TextBlock {
13
13
  blockType: 'text';
14
14
  text: string;
@@ -65,6 +65,22 @@ export interface CsvFileBlock {
65
65
  headers?: string[];
66
66
  rowCount?: number;
67
67
  }
68
+ export interface FileRemoteBlock {
69
+ blockType: 'file';
70
+ type: 'base64' | 'url' | 'storage';
71
+ mediaType: string;
72
+ base64?: string;
73
+ url?: string;
74
+ key?: string;
75
+ filename?: string;
76
+ }
77
+ export interface FileFileBlock {
78
+ blockType: 'file';
79
+ type: 'file';
80
+ path: string;
81
+ mediaType: string;
82
+ filename?: string;
83
+ }
68
84
  export interface DatabaseEmbedBlock {
69
85
  blockType: 'database_embed';
70
86
  databaseId: string;
@@ -187,7 +203,7 @@ export interface ConflictError {
187
203
  * A content block as returned by the KB API
188
204
  */
189
205
  export interface ExportContentBlock {
190
- blockType: 'text' | 'image' | 'document' | 'csv' | 'database_embed';
206
+ blockType: 'text' | 'image' | 'document' | 'csv' | 'file' | 'database_embed';
191
207
  text?: string;
192
208
  url?: string;
193
209
  mimeType?: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxn/kb-migrate",
3
- "version": "0.4.12",
3
+ "version": "0.4.14",
4
4
  "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",