@moxn/kb-migrate 0.4.13 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js CHANGED
@@ -227,6 +227,19 @@ export class MoxnClient {
227
227
  rowCount: block.rowCount,
228
228
  };
229
229
  }
230
+ // Handle generic files - upload to storage
231
+ if (block.blockType === 'file' && block.type === 'file' && block.path) {
232
+ const data = await fs.readFile(block.path);
233
+ const filename = block.filename || block.path.split('/').pop();
234
+ const { key } = await this.uploadFile(data, block.mediaType || 'application/octet-stream', filename);
235
+ return {
236
+ blockType: block.blockType,
237
+ type: 'storage',
238
+ key,
239
+ mediaType: block.mediaType || 'application/octet-stream',
240
+ filename: block.filename,
241
+ };
242
+ }
230
243
  return block;
231
244
  }));
232
245
  }
@@ -108,6 +108,9 @@ async function convertBlock(block, client, pagePathMap, visitedSyncedBlocks, dat
108
108
  case 'video':
109
109
  results.push(...convertVideo(block));
110
110
  break;
111
+ case 'audio':
112
+ results.push(...convertAudio(block));
113
+ break;
111
114
  case 'link_to_page':
112
115
  results.push(...convertLinkToPage(block, pagePathMap));
113
116
  break;
@@ -346,7 +349,7 @@ function convertFile(block) {
346
349
  },
347
350
  ];
348
351
  }
349
- // CSV or other file — render as link
352
+ // CSV file
350
353
  if (filename.toLowerCase().endsWith('.csv')) {
351
354
  return [
352
355
  {
@@ -358,8 +361,16 @@ function convertFile(block) {
358
361
  },
359
362
  ];
360
363
  }
361
- // Generic file — render as markdown link
362
- return [textBlock(`[${filename}](${url})`)];
364
+ // Generic file — emit as file block
365
+ return [
366
+ {
367
+ blockType: 'file',
368
+ type: 'url',
369
+ url,
370
+ mediaType: guessFileMimeType(filename),
371
+ filename,
372
+ },
373
+ ];
363
374
  }
364
375
  function convertVideo(block) {
365
376
  const v = block;
@@ -367,7 +378,16 @@ function convertVideo(block) {
367
378
  if (!url)
368
379
  return [];
369
380
  const caption = richTextToPlain(v.video.caption ?? []);
370
- return [textBlock(`[${caption || 'Video'}](${url})`)];
381
+ const filename = caption || extractFilename(url) || 'video';
382
+ return [
383
+ {
384
+ blockType: 'file',
385
+ type: 'url',
386
+ url,
387
+ mediaType: guessVideoMimeType(url),
388
+ filename,
389
+ },
390
+ ];
371
391
  }
372
392
  function convertLinkToPage(block, pagePathMap) {
373
393
  const ltp = block;
@@ -573,6 +593,74 @@ async function convertAndMergeChildren(children, client, pagePathMap, visitedSyn
573
593
  }
574
594
  return merged;
575
595
  }
596
+ function convertAudio(block) {
597
+ const a = block;
598
+ const url = a.audio.type === 'external' ? a.audio.external?.url : a.audio.file?.url;
599
+ if (!url)
600
+ return [];
601
+ const caption = richTextToPlain(a.audio.caption ?? []);
602
+ const filename = caption || extractFilename(url) || 'audio';
603
+ return [
604
+ {
605
+ blockType: 'file',
606
+ type: 'url',
607
+ url,
608
+ mediaType: guessAudioMimeType(url),
609
+ filename,
610
+ },
611
+ ];
612
+ }
613
+ function guessFileMimeType(filename) {
614
+ const lower = filename.toLowerCase();
615
+ const ext = lower.split('.').pop();
616
+ const map = {
617
+ docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
618
+ xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
619
+ pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
620
+ doc: 'application/msword',
621
+ xls: 'application/vnd.ms-excel',
622
+ ppt: 'application/vnd.ms-powerpoint',
623
+ odt: 'application/vnd.oasis.opendocument.text',
624
+ ods: 'application/vnd.oasis.opendocument.spreadsheet',
625
+ rtf: 'application/rtf',
626
+ txt: 'text/plain',
627
+ md: 'text/markdown',
628
+ json: 'application/json',
629
+ xml: 'application/xml',
630
+ yaml: 'text/yaml',
631
+ yml: 'text/yaml',
632
+ html: 'text/html',
633
+ htm: 'text/html',
634
+ zip: 'application/zip',
635
+ gz: 'application/gzip',
636
+ tar: 'application/x-tar',
637
+ svg: 'image/svg+xml',
638
+ epub: 'application/epub+zip',
639
+ };
640
+ return (ext && map[ext]) || 'application/octet-stream';
641
+ }
642
+ function guessVideoMimeType(url) {
643
+ const lower = url.toLowerCase();
644
+ if (lower.includes('.mp4'))
645
+ return 'video/mp4';
646
+ if (lower.includes('.webm'))
647
+ return 'video/webm';
648
+ if (lower.includes('.mov'))
649
+ return 'video/quicktime';
650
+ return 'video/mp4';
651
+ }
652
+ function guessAudioMimeType(url) {
653
+ const lower = url.toLowerCase();
654
+ if (lower.includes('.mp3'))
655
+ return 'audio/mpeg';
656
+ if (lower.includes('.wav'))
657
+ return 'audio/wav';
658
+ if (lower.includes('.ogg'))
659
+ return 'audio/ogg';
660
+ if (lower.includes('.m4a'))
661
+ return 'audio/mp4';
662
+ return 'audio/mpeg';
663
+ }
576
664
  function guessImageMediaType(url) {
577
665
  const lower = url.toLowerCase();
578
666
  if (lower.includes('.jpg') || lower.includes('.jpeg'))
@@ -56,7 +56,7 @@ export class NotionMediaDownloader {
56
56
  filename: urlBlock.filename,
57
57
  });
58
58
  }
59
- else {
59
+ else if (urlBlock.blockType === 'csv') {
60
60
  results.push({
61
61
  blockType: 'csv',
62
62
  type: 'file',
@@ -67,6 +67,16 @@ export class NotionMediaDownloader {
67
67
  rowCount: urlBlock.rowCount,
68
68
  });
69
69
  }
70
+ else {
71
+ // file block
72
+ results.push({
73
+ blockType: 'file',
74
+ type: 'file',
75
+ path: localPath,
76
+ mediaType: urlBlock.mediaType,
77
+ filename: urlBlock.filename,
78
+ });
79
+ }
70
80
  }
71
81
  catch (error) {
72
82
  console.warn(` Warning: Failed to download ${block.blockType}: ${error instanceof Error ? error.message : error}`);
@@ -153,6 +163,23 @@ function mimeToExtension(mime) {
153
163
  'image/webp': '.webp',
154
164
  'application/pdf': '.pdf',
155
165
  'text/csv': '.csv',
166
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
167
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
168
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
169
+ 'application/msword': '.doc',
170
+ 'application/vnd.ms-excel': '.xls',
171
+ 'application/vnd.ms-powerpoint': '.ppt',
172
+ 'text/plain': '.txt',
173
+ 'text/markdown': '.md',
174
+ 'application/json': '.json',
175
+ 'application/xml': '.xml',
176
+ 'text/yaml': '.yaml',
177
+ 'text/html': '.html',
178
+ 'application/zip': '.zip',
179
+ 'application/gzip': '.gz',
180
+ 'application/x-tar': '.tar',
181
+ 'image/svg+xml': '.svg',
182
+ 'application/epub+zip': '.epub',
156
183
  };
157
184
  return map[mime] ?? null;
158
185
  }
@@ -95,6 +95,95 @@ function notionFileBlock(url, caption) {
95
95
  },
96
96
  };
97
97
  }
98
+ // ============================================
99
+ // Notion file_upload block helpers
100
+ // ============================================
101
+ function notionImageUploadBlock(fileUploadId, alt) {
102
+ return {
103
+ object: 'block',
104
+ type: 'image',
105
+ image: {
106
+ type: 'file_upload',
107
+ file_upload: { id: fileUploadId },
108
+ ...(alt ? { caption: [{ type: 'text', text: { content: alt } }] } : {}),
109
+ },
110
+ };
111
+ }
112
+ function notionPdfUploadBlock(fileUploadId, caption) {
113
+ return {
114
+ object: 'block',
115
+ type: 'pdf',
116
+ pdf: {
117
+ type: 'file_upload',
118
+ file_upload: { id: fileUploadId },
119
+ ...(caption ? { caption: [{ type: 'text', text: { content: caption } }] } : {}),
120
+ },
121
+ };
122
+ }
123
+ function notionFileUploadBlock(fileUploadId, caption) {
124
+ return {
125
+ object: 'block',
126
+ type: 'file',
127
+ file: {
128
+ type: 'file_upload',
129
+ file_upload: { id: fileUploadId },
130
+ caption: [{ type: 'text', text: { content: caption || 'file' } }],
131
+ },
132
+ };
133
+ }
134
+ /**
135
+ * Derive a filename from a storage key and mime type.
136
+ * Falls back to a generic name if the key has no useful basename.
137
+ */
138
+ function deriveFilename(storageKey, mimeType) {
139
+ const basename = storageKey.split('/').pop();
140
+ if (basename && basename.includes('.'))
141
+ return basename;
142
+ const ext = {
143
+ 'image/png': '.png',
144
+ 'image/jpeg': '.jpg',
145
+ 'image/gif': '.gif',
146
+ 'image/webp': '.webp',
147
+ 'application/pdf': '.pdf',
148
+ 'text/csv': '.csv',
149
+ };
150
+ return `file${ext[mimeType] || ''}`;
151
+ }
152
+ /**
153
+ * Upload a storage-backed file to Notion via the File Upload API.
154
+ *
155
+ * Downloads from signed URL, then uploads to Notion in single-part mode.
156
+ * Returns the file upload ID on success, or null on failure (graceful fallback).
157
+ */
158
+ async function uploadFileToNotion(client, url, filename, contentType) {
159
+ try {
160
+ // Download file from signed URL
161
+ const response = await fetch(url);
162
+ if (!response.ok) {
163
+ console.warn(` [upload] Download failed (${response.status}) for ${filename}`);
164
+ return null;
165
+ }
166
+ const buffer = Buffer.from(await response.arrayBuffer());
167
+ // Create file upload
168
+ await sleep(RATE_LIMIT_MS);
169
+ const upload = await client.fileUploads.create({
170
+ mode: 'single_part',
171
+ filename,
172
+ content_type: contentType,
173
+ });
174
+ // Send file data
175
+ await sleep(RATE_LIMIT_MS);
176
+ await client.fileUploads.send({
177
+ file_upload_id: upload.id,
178
+ file: { data: new Blob([buffer], { type: contentType }), filename },
179
+ });
180
+ return upload.id;
181
+ }
182
+ catch (error) {
183
+ console.warn(` [upload] Failed for ${filename}: ${error instanceof Error ? error.message : error}`);
184
+ return null;
185
+ }
186
+ }
98
187
  /**
99
188
  * Convert KB document sections to Notion blocks.
100
189
  *
@@ -105,7 +194,7 @@ function notionFileBlock(url, caption) {
105
194
  *
106
195
  * Section names become H2 headings (mirrors the import convention).
107
196
  */
108
- function sectionsToNotionBlocks(sections, options) {
197
+ async function sectionsToNotionBlocks(sections, options) {
109
198
  const allBlocks = [];
110
199
  const allReferences = [];
111
200
  const databaseIds = [];
@@ -137,14 +226,32 @@ function sectionsToNotionBlocks(sections, options) {
137
226
  pendingMarkdown.push('');
138
227
  }
139
228
  else if (block.blockType === 'image' && block.url) {
140
- // Flush any pending text, then add image directly
141
229
  flushText();
142
- allBlocks.push(notionImageBlock(block.url, block.alt));
230
+ if (block.storageKey && options?.notionClient) {
231
+ const fname = block.filename || deriveFilename(block.storageKey, block.mimeType || 'image/png');
232
+ const uploadId = await uploadFileToNotion(options.notionClient, block.url, fname, block.mimeType || 'image/png');
233
+ allBlocks.push(uploadId ? notionImageUploadBlock(uploadId, block.alt) : notionImageBlock(block.url, block.alt));
234
+ }
235
+ else {
236
+ allBlocks.push(notionImageBlock(block.url, block.alt));
237
+ }
143
238
  }
144
239
  else if (block.blockType === 'document' && block.url) {
145
240
  flushText();
146
- // Use native pdf block for PDFs (inline viewer), file block for others
147
- if (block.mimeType === 'application/pdf') {
241
+ if (block.storageKey && options?.notionClient) {
242
+ const fname = block.filename || deriveFilename(block.storageKey, block.mimeType || 'application/octet-stream');
243
+ const uploadId = await uploadFileToNotion(options.notionClient, block.url, fname, block.mimeType || 'application/octet-stream');
244
+ if (uploadId) {
245
+ allBlocks.push(block.mimeType === 'application/pdf' ? notionPdfUploadBlock(uploadId, block.filename) : notionFileUploadBlock(uploadId, block.filename));
246
+ }
247
+ else if (block.mimeType === 'application/pdf') {
248
+ allBlocks.push(notionPdfBlock(block.url, block.filename));
249
+ }
250
+ else {
251
+ allBlocks.push(notionFileBlock(block.url, block.filename));
252
+ }
253
+ }
254
+ else if (block.mimeType === 'application/pdf') {
148
255
  allBlocks.push(notionPdfBlock(block.url, block.filename));
149
256
  }
150
257
  else {
@@ -153,7 +260,14 @@ function sectionsToNotionBlocks(sections, options) {
153
260
  }
154
261
  else if (block.blockType === 'csv' && block.url) {
155
262
  flushText();
156
- allBlocks.push(notionFileBlock(block.url, block.filename || 'data.csv'));
263
+ if (block.storageKey && options?.notionClient) {
264
+ const fname = block.filename || deriveFilename(block.storageKey, 'text/csv');
265
+ const uploadId = await uploadFileToNotion(options.notionClient, block.url, fname, 'text/csv');
266
+ allBlocks.push(uploadId ? notionFileUploadBlock(uploadId, block.filename || 'data.csv') : notionFileBlock(block.url, block.filename || 'data.csv'));
267
+ }
268
+ else {
269
+ allBlocks.push(notionFileBlock(block.url, block.filename || 'data.csv'));
270
+ }
157
271
  }
158
272
  else if (block.blockType === 'database_embed' && block.databaseId) {
159
273
  databaseIds.push(block.databaseId);
@@ -243,8 +357,8 @@ export class NotionExportTarget extends ExportTarget {
243
357
  * Returns the number of references resolved.
244
358
  */
245
359
  async resolveAndAppendReferences(doc, notionPageId) {
246
- // Extract references from section content
247
- const { references } = sectionsToNotionBlocks(doc.sections, { extractReferences: true });
360
+ // Extract references from section content (no upload needed for reference pass)
361
+ const { references } = await sectionsToNotionBlocks(doc.sections, { extractReferences: true });
248
362
  if (references.length === 0) {
249
363
  return { resolved: 0, unresolved: 0 };
250
364
  }
@@ -414,7 +528,7 @@ export class NotionExportTarget extends ExportTarget {
414
528
  // Notion page creation / update
415
529
  // ============================================
416
530
  async createNotionPage(doc) {
417
- const { blocks } = sectionsToNotionBlocks(doc.sections, { extractReferences: true });
531
+ const { blocks } = await sectionsToNotionBlocks(doc.sections, { extractReferences: true, notionClient: this.client });
418
532
  // First batch: up to 100 blocks as children of the new page
419
533
  const firstBatch = blocks.slice(0, MAX_BLOCKS_PER_APPEND);
420
534
  const remainingBlocks = blocks.slice(MAX_BLOCKS_PER_APPEND);
@@ -446,7 +560,7 @@ export class NotionExportTarget extends ExportTarget {
446
560
  },
447
561
  });
448
562
  await this.clearPageContent(notionPageId);
449
- const { blocks } = sectionsToNotionBlocks(doc.sections, { extractReferences: true });
563
+ const { blocks } = await sectionsToNotionBlocks(doc.sections, { extractReferences: true, notionClient: this.client });
450
564
  await this.appendRemainingBlocks(notionPageId, blocks);
451
565
  }
452
566
  async clearPageContent(pageId) {
package/dist/types.d.ts CHANGED
@@ -8,7 +8,7 @@
8
8
  * to kb-migrate (local filesystem paths) — MoxnClient converts them to
9
9
  * `type: 'storage'` before sending to the KB API.
10
10
  */
11
- export type ContentBlock = TextBlock | ImageRemoteBlock | ImageFileBlock | DocumentRemoteBlock | DocumentFileBlock | CsvRemoteBlock | CsvFileBlock | DatabaseEmbedBlock;
11
+ export type ContentBlock = TextBlock | ImageRemoteBlock | ImageFileBlock | DocumentRemoteBlock | DocumentFileBlock | CsvRemoteBlock | CsvFileBlock | FileRemoteBlock | FileFileBlock | DatabaseEmbedBlock;
12
12
  export interface TextBlock {
13
13
  blockType: 'text';
14
14
  text: string;
@@ -65,6 +65,22 @@ export interface CsvFileBlock {
65
65
  headers?: string[];
66
66
  rowCount?: number;
67
67
  }
68
+ export interface FileRemoteBlock {
69
+ blockType: 'file';
70
+ type: 'base64' | 'url' | 'storage';
71
+ mediaType: string;
72
+ base64?: string;
73
+ url?: string;
74
+ key?: string;
75
+ filename?: string;
76
+ }
77
+ export interface FileFileBlock {
78
+ blockType: 'file';
79
+ type: 'file';
80
+ path: string;
81
+ mediaType: string;
82
+ filename?: string;
83
+ }
68
84
  export interface DatabaseEmbedBlock {
69
85
  blockType: 'database_embed';
70
86
  databaseId: string;
@@ -187,7 +203,7 @@ export interface ConflictError {
187
203
  * A content block as returned by the KB API
188
204
  */
189
205
  export interface ExportContentBlock {
190
- blockType: 'text' | 'image' | 'document' | 'csv' | 'database_embed';
206
+ blockType: 'text' | 'image' | 'document' | 'csv' | 'file' | 'database_embed';
191
207
  text?: string;
192
208
  url?: string;
193
209
  mimeType?: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxn/kb-migrate",
3
- "version": "0.4.13",
3
+ "version": "0.4.15",
4
4
  "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -82,6 +82,7 @@
82
82
  "prepublishOnly": "npm run build"
83
83
  },
84
84
  "dependencies": {
85
+ "@moxn/kb-migrate": "^0.4.14",
85
86
  "@notionhq/client": "^5.9.0",
86
87
  "@tryfabric/martian": "^1.2.4",
87
88
  "commander": "^12.0.0",