@moxn/kb-migrate 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,486 @@
1
+ /**
2
+ * NotionExportTarget — exports KB documents to Notion pages.
3
+ *
4
+ * Converts KB section content (markdown) to Notion blocks using @tryfabric/martian,
5
+ * then creates or updates Notion pages via the Notion API.
6
+ *
7
+ * Supports a two-pass export algorithm:
8
+ * - Pass 1: Create all pages (without cross-references)
9
+ * - Pass 2: Append reference blocks using the KB doc ID -> Notion page ID mapping
10
+ *
11
+ * Conflict detection uses the MoxnClient's notion-mapping API endpoints
12
+ * (when available) to find existing Notion pages for a KB document.
13
+ */
14
+ import { Client } from '@notionhq/client';
15
+ import { markdownToBlocks } from '@tryfabric/martian';
16
+ import { ExportTarget, } from './base.js';
17
+ // ============================================
18
+ // Helpers
19
+ // ============================================
20
+ const RATE_LIMIT_MS = 350;
21
+ function sleep(ms) {
22
+ return new Promise((resolve) => setTimeout(resolve, ms));
23
+ }
24
+ /** Strip <moxn:comment> tags from text, preserving the inner text. */
25
+ function stripCommentTags(text) {
26
+ return text.replace(/<moxn:comment[^>]*>([\s\S]*?)<\/moxn:comment>/g, '$1');
27
+ }
28
+ /**
29
+ * Extract KB path references from markdown text.
30
+ * Matches markdown links where the URL starts with / (KB internal paths).
31
+ * Returns the extracted references and the text with references removed.
32
+ */
33
+ function extractKBPathReferences(text) {
34
+ const references = [];
35
+ // Match [display text](/kb/path) - links starting with /
36
+ const KB_PATH_RE = /\[([^\]]+)\]\((\/[^)]+)\)/g;
37
+ const cleanedText = text.replace(KB_PATH_RE, (match, displayText, path) => {
38
+ // Only treat as KB reference if it looks like a KB document path
39
+ // (starts with / and doesn't look like an external URL fragment)
40
+ if (path.startsWith('/') && !path.startsWith('//')) {
41
+ references.push({ targetKbPath: path, displayText });
42
+ // Replace with plain text (the link text) since we'll add references separately
43
+ return displayText;
44
+ }
45
+ return match;
46
+ });
47
+ return { cleanedText, references };
48
+ }
49
+ /**
50
+ * Convert a KB document's sections into a single markdown string.
51
+ * Section names become H2 headings (mirrors the import convention).
52
+ *
53
+ * If extractReferences is true, also extracts KB path references from
54
+ * the content and returns them separately (for two-pass export).
55
+ */
56
+ function sectionsToMarkdown(sections, options) {
57
+ const parts = [];
58
+ const allReferences = [];
59
+ const databaseIds = [];
60
+ const extractRefs = options?.extractReferences ?? false;
61
+ for (const section of sections) {
62
+ parts.push(`## ${section.name}\n`);
63
+ for (const block of section.content) {
64
+ if (block.blockType === 'text' && block.text) {
65
+ let text = stripCommentTags(block.text);
66
+ if (extractRefs) {
67
+ const { cleanedText, references } = extractKBPathReferences(text);
68
+ text = cleanedText;
69
+ allReferences.push(...references);
70
+ }
71
+ parts.push(text);
72
+ parts.push('');
73
+ }
74
+ else if (block.blockType === 'image' && block.url) {
75
+ parts.push(`![${block.alt || ''}](${block.url})`);
76
+ parts.push('');
77
+ }
78
+ else if (block.blockType === 'document' && block.url) {
79
+ parts.push(`[${block.filename || 'document'}](${block.url})`);
80
+ parts.push('');
81
+ }
82
+ else if (block.blockType === 'csv' && block.url) {
83
+ parts.push(`[${block.filename || 'data.csv'}](${block.url})`);
84
+ parts.push('');
85
+ }
86
+ else if (block.blockType === 'database_embed' && block.databaseId) {
87
+ // Collect database ID for Pass 1.5 export
88
+ databaseIds.push(block.databaseId);
89
+ // Add a placeholder in the markdown
90
+ parts.push(`> **[Database]** *(exported as inline database)*`);
91
+ parts.push('');
92
+ }
93
+ }
94
+ }
95
+ return { markdown: parts.join('\n').trim(), references: allReferences, databaseIds };
96
+ }
97
+ // Max 100 blocks per API call
98
+ const MAX_BLOCKS_PER_APPEND = 100;
99
+ // ============================================
100
+ // Target
101
+ // ============================================
102
+ export class NotionExportTarget extends ExportTarget {
103
+ client;
104
+ /** Cache: kbDocumentId -> notionPageId (from mapping API) */
105
+ mappingCache = new Map();
106
+ /** Cache: notionPageId -> true (pages we've verified exist) */
107
+ verifiedPages = new Set();
108
+ /** Map from KB path prefix -> Notion page ID (for nested page creation) */
109
+ pathToNotionId = new Map();
110
+ /** KB path -> KB document ID mapping (built during two-pass export) */
111
+ kbPathToDocId = new Map();
112
+ /** KB doc ID -> Notion page ID mapping (built during pass 1) */
113
+ docIdToNotionPageId = new Map();
114
+ constructor(config) {
115
+ super(config);
116
+ this.client = new Client({ auth: config.notionToken });
117
+ }
118
+ get targetType() {
119
+ return 'notion';
120
+ }
121
+ get targetLocation() {
122
+ return `Notion (parent: ${this.config.parentPageId})`;
123
+ }
124
+ // ============================================
125
+ // Validation
126
+ // ============================================
127
+ async validate() {
128
+ console.log('Validating Notion API token...');
129
+ try {
130
+ await this.client.users.me({});
131
+ console.log(' Token valid.');
132
+ }
133
+ catch (error) {
134
+ throw new Error(`Notion token invalid: ${error instanceof Error ? error.message : error}`);
135
+ }
136
+ console.log('Verifying parent page...');
137
+ try {
138
+ await sleep(RATE_LIMIT_MS);
139
+ await this.client.pages.retrieve({ page_id: this.config.parentPageId });
140
+ console.log(' Parent page accessible.');
141
+ }
142
+ catch (error) {
143
+ throw new Error(`Parent page not accessible: ${error instanceof Error ? error.message : error}`);
144
+ }
145
+ }
146
+ // ============================================
147
+ // Two-Pass Export Support
148
+ // ============================================
149
+ /**
150
+ * Register a KB document in the path -> ID index.
151
+ * Called before pass 1 so references can be resolved in pass 2.
152
+ */
153
+ registerDocument(doc) {
154
+ this.kbPathToDocId.set(doc.path, doc.id);
155
+ }
156
+ /**
157
+ * After pass 1, record a successful export mapping.
158
+ * Called by the runner when a doc is created/updated successfully.
159
+ */
160
+ registerExportedPage(kbDocumentId, notionPageId) {
161
+ this.docIdToNotionPageId.set(kbDocumentId, notionPageId);
162
+ }
163
+ /**
164
+ * Pass 2: Resolve references for a document and append link blocks to its Notion page.
165
+ *
166
+ * Finds KB path links in the document content, resolves them to Notion page IDs
167
+ * (from the current export batch or pre-existing mappings), and appends
168
+ * mention/link blocks to the Notion page.
169
+ *
170
+ * Returns the number of references resolved.
171
+ */
172
+ async resolveAndAppendReferences(doc, notionPageId) {
173
+ // Extract references from section content
174
+ const { references } = sectionsToMarkdown(doc.sections, { extractReferences: true });
175
+ if (references.length === 0) {
176
+ return { resolved: 0, unresolved: 0 };
177
+ }
178
+ // Deduplicate references by target path
179
+ const seen = new Set();
180
+ const uniqueRefs = references.filter((ref) => {
181
+ if (seen.has(ref.targetKbPath))
182
+ return false;
183
+ seen.add(ref.targetKbPath);
184
+ return true;
185
+ });
186
+ const referenceBlocks = [];
187
+ let resolved = 0;
188
+ let unresolved = 0;
189
+ for (const ref of uniqueRefs) {
190
+ const targetNotionPageId = await this.resolveKBPathToNotionPage(ref.targetKbPath);
191
+ if (targetNotionPageId) {
192
+ // Create a mention block linking to the Notion page
193
+ referenceBlocks.push({
194
+ object: 'block',
195
+ type: 'paragraph',
196
+ paragraph: {
197
+ rich_text: [
198
+ { type: 'text', text: { content: 'See also: ' } },
199
+ {
200
+ type: 'mention',
201
+ mention: {
202
+ type: 'page',
203
+ page: { id: targetNotionPageId },
204
+ },
205
+ },
206
+ ],
207
+ },
208
+ });
209
+ resolved++;
210
+ }
211
+ else {
212
+ // Graceful degradation: plain text reference
213
+ referenceBlocks.push({
214
+ object: 'block',
215
+ type: 'paragraph',
216
+ paragraph: {
217
+ rich_text: [
218
+ {
219
+ type: 'text',
220
+ text: { content: `See also: ${ref.displayText} (${ref.targetKbPath})` },
221
+ annotations: { italic: true, color: 'gray' },
222
+ },
223
+ ],
224
+ },
225
+ });
226
+ unresolved++;
227
+ }
228
+ }
229
+ if (referenceBlocks.length > 0) {
230
+ // Add a divider before references section
231
+ const blocksToAppend = [
232
+ { object: 'block', type: 'divider', divider: {} },
233
+ {
234
+ object: 'block',
235
+ type: 'heading_3',
236
+ heading_3: {
237
+ rich_text: [{ type: 'text', text: { content: 'References' } }],
238
+ },
239
+ },
240
+ ...referenceBlocks,
241
+ ];
242
+ await this.appendRemainingBlocks(notionPageId, blocksToAppend);
243
+ }
244
+ return { resolved, unresolved };
245
+ }
246
+ /**
247
+ * Resolve a KB path to a Notion page ID.
248
+ *
249
+ * Checks three sources in order:
250
+ * 1. Current export batch (docIdToNotionPageId from pass 1)
251
+ * 2. Local mapping cache
252
+ * 3. Remote notion-mapping API
253
+ */
254
+ async resolveKBPathToNotionPage(kbPath) {
255
+ // 1. Try to find the KB doc ID from path
256
+ const kbDocId = this.kbPathToDocId.get(kbPath);
257
+ if (kbDocId) {
258
+ // 2. Check if it was exported in this batch
259
+ const batchNotionId = this.docIdToNotionPageId.get(kbDocId);
260
+ if (batchNotionId)
261
+ return batchNotionId;
262
+ // 3. Check mapping cache / API
263
+ const mappedNotionId = await this.findExistingNotionPage(kbDocId);
264
+ if (mappedNotionId)
265
+ return mappedNotionId;
266
+ }
267
+ return null;
268
+ }
269
+ // ============================================
270
+ // Export (Pass 1)
271
+ // ============================================
272
+ async exportDocument(doc, listItem, dryRun) {
273
+ const startTime = Date.now();
274
+ try {
275
+ const existingNotionPageId = await this.findExistingNotionPage(doc.id);
276
+ if (existingNotionPageId) {
277
+ if (this.config.conflictStrategy === 'skip') {
278
+ this.registerExportedPage(doc.id, existingNotionPageId);
279
+ return {
280
+ documentId: doc.id,
281
+ documentPath: doc.path,
282
+ status: 'skipped',
283
+ externalId: existingNotionPageId,
284
+ duration: Date.now() - startTime,
285
+ };
286
+ }
287
+ if (dryRun) {
288
+ return {
289
+ documentId: doc.id,
290
+ documentPath: doc.path,
291
+ status: 'updated',
292
+ externalId: existingNotionPageId,
293
+ duration: Date.now() - startTime,
294
+ };
295
+ }
296
+ await this.updateNotionPage(existingNotionPageId, doc);
297
+ await this.recordMapping(doc.id, existingNotionPageId);
298
+ this.registerExportedPage(doc.id, existingNotionPageId);
299
+ return {
300
+ documentId: doc.id,
301
+ documentPath: doc.path,
302
+ status: 'updated',
303
+ externalId: existingNotionPageId,
304
+ duration: Date.now() - startTime,
305
+ };
306
+ }
307
+ if (dryRun) {
308
+ return {
309
+ documentId: doc.id,
310
+ documentPath: doc.path,
311
+ status: 'created',
312
+ duration: Date.now() - startTime,
313
+ };
314
+ }
315
+ const notionPageId = await this.createNotionPage(doc);
316
+ await this.recordMapping(doc.id, notionPageId);
317
+ this.registerExportedPage(doc.id, notionPageId);
318
+ return {
319
+ documentId: doc.id,
320
+ documentPath: doc.path,
321
+ status: 'created',
322
+ externalId: notionPageId,
323
+ duration: Date.now() - startTime,
324
+ };
325
+ }
326
+ catch (error) {
327
+ console.error(`Export failed for ${doc.path}: ${error instanceof Error ? error.message : error}`);
328
+ return {
329
+ documentId: doc.id,
330
+ documentPath: doc.path,
331
+ status: 'failed',
332
+ error: error instanceof Error ? error.message : String(error),
333
+ duration: Date.now() - startTime,
334
+ };
335
+ }
336
+ }
337
+ async cleanup() {
338
+ // No-op
339
+ }
340
+ // ============================================
341
+ // Notion page creation / update
342
+ // ============================================
343
+ async createNotionPage(doc) {
344
+ const { markdown } = sectionsToMarkdown(doc.sections, { extractReferences: true });
345
+ const blocks = markdownToBlocks(markdown);
346
+ // First batch: up to 100 blocks as children of the new page
347
+ const firstBatch = blocks.slice(0, MAX_BLOCKS_PER_APPEND);
348
+ const remainingBlocks = blocks.slice(MAX_BLOCKS_PER_APPEND);
349
+ await sleep(RATE_LIMIT_MS);
350
+ const response = await this.client.pages.create({
351
+ parent: { page_id: this.config.parentPageId },
352
+ properties: {
353
+ title: {
354
+ type: 'title',
355
+ title: [{ type: 'text', text: { content: doc.name } }],
356
+ },
357
+ },
358
+ children: firstBatch,
359
+ });
360
+ const pageId = response.id;
361
+ // Append remaining blocks in batches
362
+ await this.appendRemainingBlocks(pageId, remainingBlocks);
363
+ return pageId;
364
+ }
365
+ async updateNotionPage(notionPageId, doc) {
366
+ await sleep(RATE_LIMIT_MS);
367
+ await this.client.pages.update({
368
+ page_id: notionPageId,
369
+ properties: {
370
+ title: {
371
+ type: 'title',
372
+ title: [{ type: 'text', text: { content: doc.name } }],
373
+ },
374
+ },
375
+ });
376
+ await this.clearPageContent(notionPageId);
377
+ const { markdown } = sectionsToMarkdown(doc.sections, { extractReferences: true });
378
+ const blocks = markdownToBlocks(markdown);
379
+ await this.appendRemainingBlocks(notionPageId, blocks);
380
+ }
381
+ async clearPageContent(pageId) {
382
+ await sleep(RATE_LIMIT_MS);
383
+ const children = await this.client.blocks.children.list({
384
+ block_id: pageId,
385
+ page_size: 100,
386
+ });
387
+ for (const block of children.results) {
388
+ try {
389
+ await sleep(RATE_LIMIT_MS);
390
+ const deletePromise = this.client.blocks.delete({ block_id: block.id });
391
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Block delete timeout')), 10000));
392
+ await Promise.race([deletePromise, timeoutPromise]);
393
+ }
394
+ catch {
395
+ // Continue with other blocks even if one fails
396
+ }
397
+ }
398
+ // Handle pagination
399
+ if (children.has_more && children.next_cursor) {
400
+ let cursor = children.next_cursor;
401
+ while (cursor) {
402
+ try {
403
+ await sleep(RATE_LIMIT_MS);
404
+ const more = await this.client.blocks.children.list({
405
+ block_id: pageId,
406
+ start_cursor: cursor,
407
+ page_size: 100,
408
+ });
409
+ for (const block of more.results) {
410
+ try {
411
+ await sleep(RATE_LIMIT_MS);
412
+ const deletePromise = this.client.blocks.delete({ block_id: block.id });
413
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Block delete timeout')), 10000));
414
+ await Promise.race([deletePromise, timeoutPromise]);
415
+ }
416
+ catch {
417
+ // Continue on failure
418
+ }
419
+ }
420
+ cursor = more.has_more ? more.next_cursor : null;
421
+ }
422
+ catch {
423
+ break;
424
+ }
425
+ }
426
+ }
427
+ }
428
+ async appendRemainingBlocks(pageId, blocks) {
429
+ for (let i = 0; i < blocks.length; i += MAX_BLOCKS_PER_APPEND) {
430
+ const batch = blocks.slice(i, i + MAX_BLOCKS_PER_APPEND);
431
+ await sleep(RATE_LIMIT_MS);
432
+ await this.client.blocks.children.append({
433
+ block_id: pageId,
434
+ children: batch,
435
+ });
436
+ }
437
+ }
438
+ // ============================================
439
+ // Mapping lookups
440
+ // ============================================
441
+ async findExistingNotionPage(kbDocumentId) {
442
+ // Check cache first
443
+ const cached = this.mappingCache.get(kbDocumentId);
444
+ if (cached)
445
+ return cached;
446
+ // Try notion-mapping API
447
+ try {
448
+ const response = await fetch(`${this.config.apiUrl}/api/v1/kb/notion-mappings/by-document/${kbDocumentId}`, {
449
+ headers: { 'x-api-key': this.config.apiKey },
450
+ });
451
+ if (response.ok) {
452
+ const data = (await response.json());
453
+ if (data.mapping?.notionPageId) {
454
+ this.mappingCache.set(kbDocumentId, data.mapping.notionPageId);
455
+ return data.mapping.notionPageId;
456
+ }
457
+ }
458
+ }
459
+ catch {
460
+ // Mapping API might not be deployed yet - that's OK, treat as no mapping
461
+ }
462
+ return null;
463
+ }
464
+ async recordMapping(kbDocumentId, notionPageId) {
465
+ this.mappingCache.set(kbDocumentId, notionPageId);
466
+ // Try to save mapping via API (best-effort)
467
+ try {
468
+ await fetch(`${this.config.apiUrl}/api/v1/kb/notion-mappings`, {
469
+ method: 'POST',
470
+ headers: {
471
+ 'Content-Type': 'application/json',
472
+ 'x-api-key': this.config.apiKey,
473
+ },
474
+ body: JSON.stringify({
475
+ kbDocumentId,
476
+ notionPageId,
477
+ importSource: 'export',
478
+ syncDirection: 'kb_to_notion',
479
+ }),
480
+ });
481
+ }
482
+ catch {
483
+ // Best-effort — mapping API might not be available
484
+ }
485
+ }
486
+ }
package/dist/types.d.ts CHANGED
@@ -8,7 +8,7 @@
8
8
  * to kb-migrate (local filesystem paths) — MoxnClient converts them to
9
9
  * `type: 'storage'` before sending to the KB API.
10
10
  */
11
- export type ContentBlock = TextBlock | ImageRemoteBlock | ImageFileBlock | DocumentRemoteBlock | DocumentFileBlock | CsvRemoteBlock | CsvFileBlock;
11
+ export type ContentBlock = TextBlock | ImageRemoteBlock | ImageFileBlock | DocumentRemoteBlock | DocumentFileBlock | CsvRemoteBlock | CsvFileBlock | DatabaseEmbedBlock;
12
12
  export interface TextBlock {
13
13
  blockType: 'text';
14
14
  text: string;
@@ -65,6 +65,10 @@ export interface CsvFileBlock {
65
65
  headers?: string[];
66
66
  rowCount?: number;
67
67
  }
68
+ export interface DatabaseEmbedBlock {
69
+ blockType: 'database_embed';
70
+ databaseId: string;
71
+ }
68
72
  /**
69
73
  * A section to be created in a document
70
74
  */
@@ -98,6 +102,12 @@ export interface ExtractedDocument {
98
102
  sourcePath: string;
99
103
  /** Cross-references discovered during resolution */
100
104
  references?: ExtractedReference[];
105
+ /** Source-specific metadata (e.g., Notion page ID for bidirectional sync) */
106
+ metadata?: {
107
+ notionPageId?: string;
108
+ notionTitle?: string;
109
+ [key: string]: unknown;
110
+ };
101
111
  }
102
112
  /**
103
113
  * Status of a single document migration
@@ -162,6 +172,8 @@ export interface MigrationOptions {
162
172
  aiAccess?: 'edit' | 'read' | 'none';
163
173
  /** Convenience flag: 'team' = read, 'private' = none */
164
174
  visibility?: 'team' | 'private';
175
+ /** Date filter for source documents */
176
+ dateFilter?: import('./date-filter.js').DateFilter;
165
177
  }
166
178
  /**
167
179
  * Error response from API when document already exists
@@ -175,13 +187,14 @@ export interface ConflictError {
175
187
  * A content block as returned by the KB API
176
188
  */
177
189
  export interface ExportContentBlock {
178
- blockType: 'text' | 'image' | 'document' | 'csv';
190
+ blockType: 'text' | 'image' | 'document' | 'csv' | 'database_embed';
179
191
  text?: string;
180
192
  url?: string;
181
193
  mimeType?: string;
182
194
  alt?: string;
183
195
  filename?: string;
184
196
  storageKey?: string;
197
+ databaseId?: string;
185
198
  }
186
199
  /**
187
200
  * A section within a document (API response)
@@ -201,6 +214,7 @@ export interface DocumentListItem {
201
214
  name: string;
202
215
  description: string | null;
203
216
  createdAt: string;
217
+ updatedAt: string | null;
204
218
  }
205
219
  /**
206
220
  * Full document detail with sections
@@ -271,4 +285,6 @@ export interface ExportOptions {
271
285
  pdfDir: string;
272
286
  csvDir: string;
273
287
  dryRun: boolean;
288
+ /** Date filter for exported documents */
289
+ dateFilter?: import('./date-filter.js').DateFilter;
274
290
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxn/kb-migrate",
3
- "version": "0.4.6",
3
+ "version": "0.4.8",
4
4
  "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -49,6 +49,26 @@
49
49
  "./client": {
50
50
  "types": "./dist/client.d.ts",
51
51
  "default": "./dist/client.js"
52
+ },
53
+ "./date-filter": {
54
+ "types": "./dist/date-filter.d.ts",
55
+ "default": "./dist/date-filter.js"
56
+ },
57
+ "./targets/base": {
58
+ "types": "./dist/targets/base.d.ts",
59
+ "default": "./dist/targets/base.js"
60
+ },
61
+ "./targets/notion": {
62
+ "types": "./dist/targets/notion.d.ts",
63
+ "default": "./dist/targets/notion.js"
64
+ },
65
+ "./targets": {
66
+ "types": "./dist/targets/index.d.ts",
67
+ "default": "./dist/targets/index.js"
68
+ },
69
+ "./export-notion": {
70
+ "types": "./dist/export-notion.d.ts",
71
+ "default": "./dist/export-notion.js"
52
72
  }
53
73
  },
54
74
  "bin": {
@@ -62,6 +82,8 @@
62
82
  "prepublishOnly": "npm run build"
63
83
  },
64
84
  "dependencies": {
85
+ "@notionhq/client": "^2.3.0",
86
+ "@tryfabric/martian": "^1.2.4",
65
87
  "commander": "^12.0.0",
66
88
  "glob": "^10.0.0",
67
89
  "remark-parse": "^11.0.0",