npm - @moxn/kb-migrate - Versions diffs - 0.3.0 → 0.4.0 - Mend

@moxn/kb-migrate 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/client.d.ts +42 -0
package/dist/client.js +115 -0
package/dist/index.js +209 -6
package/dist/sources/index.d.ts +1 -0
package/dist/sources/index.js +1 -3
package/dist/sources/notion-api.d.ts +240 -0
package/dist/sources/notion-api.js +196 -0
package/dist/sources/notion-blocks.d.ts +30 -0
package/dist/sources/notion-blocks.js +505 -0
package/dist/sources/notion-databases.d.ts +59 -0
package/dist/sources/notion-databases.js +266 -0
package/dist/sources/notion-media.d.ts +30 -0
package/dist/sources/notion-media.js +133 -0
package/dist/sources/notion.d.ts +66 -0
package/dist/sources/notion.js +390 -0
package/dist/types.d.ts +2 -0
package/package.json +1 -1

package/dist/sources/notion.js ADDED Viewed

@@ -0,0 +1,390 @@
+/**
+ * NotionSource — imports Notion workspace content into Moxn KB.
+ *
+ * Two-pass architecture:
+ *   Pass 1 (validate): Discover all pages/databases via search API, build tree, compute paths
+ *   Pass 2 (extract):  Walk tree depth-first, fetch blocks, convert to sections
+ *
+ * Databases are imported after all pages: creates KB database + columns, links entries.
+ */
+import { MigrationSource } from './base.js';
+import { NotionApiClient } from './notion-api.js';
+import { blocksToSections, getPageTitle, normalizeId, } from './notion-blocks.js';
+import { NotionMediaDownloader } from './notion-media.js';
+import { parseDatabaseSchema, parseEntryValues, renderPropertiesSection, } from './notion-databases.js';
+const MAX_DOCUMENT_COUNT = 10_000;
+// ============================================
+// Source
+// ============================================
+export class NotionSource extends MigrationSource {
+    client;
+    mediaDownloader;
+    // Populated during validate()
+    pageTree = [];
+    allPages = []; // flat list, depth-first order
+    pagePathMap = new Map();
+    databases = [];
+    databaseEntryPageIds = new Set();
+    _documentCount = 0;
+    constructor(config) {
+        super(config);
+        this.client = new NotionApiClient(config.token);
+        this.mediaDownloader = new NotionMediaDownloader();
+    }
+    get sourceType() {
+        return 'notion';
+    }
+    get sourceLocation() {
+        return this.config.rootPageId
+            ? `Notion (subtree: ${this.config.rootPageId})`
+            : 'Notion (full workspace)';
+    }
+    // ============================================
+    // Pass 1: Discovery (validate)
+    // ============================================
+    async validate() {
+        // 1. Test token
+        console.log('Validating Notion API token...');
+        await this.client.validateToken();
+        console.log('  Token valid.');
+        // 2. Discover all pages
+        console.log('Discovering pages...');
+        const allNotionPages = await this.client.searchPages();
+        console.log(`  Found ${allNotionPages.length} pages in workspace.`);
+        // 3. Discover all databases
+        console.log('Discovering databases...');
+        const allNotionDatabases = await this.client.searchDatabases();
+        console.log(`  Found ${allNotionDatabases.length} databases.`);
+        // 4. Build page tree
+        this.buildPageTree(allNotionPages);
+        // 5. Process databases — identify entries and build schemas
+        await this.processDatabases(allNotionDatabases);
+        // 6. Count and validate
+        this._documentCount = this.allPages.length;
+        // Add database-only entries (pages that are in databases but not in page tree)
+        for (const dbInfo of this.databases) {
+            for (const entry of dbInfo.entries) {
+                const nid = normalizeId(entry.id);
+                if (!this.pagePathMap.has(nid)) {
+                    this._documentCount++;
+                }
+            }
+        }
+        if (this._documentCount > MAX_DOCUMENT_COUNT) {
+            throw new Error(`Workspace has ${this._documentCount} documents, exceeding the ${MAX_DOCUMENT_COUNT} limit. ` +
+                'Use --root-page-id to import a subtree.');
+        }
+        console.log(`  ${this.allPages.length} pages + ${this.databases.length} databases ready for import.`);
+        // 7. Initialize media downloader
+        await this.mediaDownloader.init();
+    }
+    async getDocumentCount() {
+        return this._documentCount;
+    }
+    /**
+     * Get database import info for the migration runner.
+     * Called after all pages are imported to create databases and link entries.
+     */
+    getDatabaseImports() {
+        return this.databases.map((dbInfo) => ({
+            notionDatabaseId: normalizeId(dbInfo.database.id),
+            schema: dbInfo.schema,
+            entries: dbInfo.entries.map((entry) => {
+                const nid = normalizeId(entry.id);
+                const title = getPageTitle(entry);
+                const slug = slugify(title);
+                const kbPath = this.pagePathMap.get(nid) ?? slug;
+                return {
+                    page: entry,
+                    kbPath,
+                };
+            }),
+        }));
+    }
+    // ============================================
+    // Pass 2: Extraction
+    // ============================================
+    async *extract() {
+        // Walk pages depth-first
+        for (const node of this.allPages) {
+            // Skip database entries that will be created during database import
+            // unless they also appear in the page tree (child_page)
+            if (node.isDatabaseEntry)
+                continue;
+            const doc = await this.extractPage(node);
+            if (doc)
+                yield doc;
+        }
+        // Extract database-only entries (pages not in page tree)
+        for (const dbInfo of this.databases) {
+            for (const entry of dbInfo.entries) {
+                const nid = normalizeId(entry.id);
+                // Only extract if not already extracted via page tree
+                if (this.pagePathMap.has(nid)) {
+                    // Page was already in the tree — check if it was yielded
+                    const treeNode = this.allPages.find((n) => normalizeId(n.page.id) === nid);
+                    if (treeNode && !treeNode.isDatabaseEntry) {
+                        // Already yielded as a page
+                        continue;
+                    }
+                }
+                const doc = await this.extractDatabaseEntry(entry, dbInfo);
+                if (doc)
+                    yield doc;
+            }
+        }
+    }
+    /** Clean up temp files after migration completes. */
+    async cleanup() {
+        await this.mediaDownloader.cleanup();
+    }
+    // ============================================
+    // Tree building
+    // ============================================
+    buildPageTree(pages) {
+        // Build lookup maps
+        const pageById = new Map();
+        const childrenByParent = new Map();
+        for (const page of pages) {
+            const nid = normalizeId(page.id);
+            pageById.set(nid, page);
+            const parentId = this.getParentPageId(page);
+            if (parentId) {
+                const npid = normalizeId(parentId);
+                if (!childrenByParent.has(npid)) {
+                    childrenByParent.set(npid, []);
+                }
+                childrenByParent.get(npid).push(page);
+            }
+        }
+        // Identify database entries
+        for (const page of pages) {
+            if (page.parent.type === 'database_id' && page.parent.database_id) {
+                this.databaseEntryPageIds.add(normalizeId(page.id));
+            }
+        }
+        // Find root pages
+        const rootPages = [];
+        if (this.config.rootPageId) {
+            // Subtree mode: start from specified page
+            const rootId = normalizeId(this.config.rootPageId);
+            const rootPage = pageById.get(rootId);
+            if (rootPage) {
+                rootPages.push(rootPage);
+            }
+            else {
+                // Root page itself wasn't in search results — search for its children
+                const children = childrenByParent.get(rootId) ?? [];
+                rootPages.push(...children);
+            }
+        }
+        else {
+            // Full workspace mode: pages with workspace parent or no parent in our set
+            for (const page of pages) {
+                if (page.parent.type === 'workspace') {
+                    rootPages.push(page);
+                }
+                else {
+                    const parentId = this.getParentPageId(page);
+                    if (parentId && !pageById.has(normalizeId(parentId))) {
+                        // Parent is not in our page set — treat as root
+                        if (page.parent.type !== 'database_id') {
+                            rootPages.push(page);
+                        }
+                    }
+                }
+            }
+        }
+        // Build tree recursively
+        const buildNode = (page, parentPath, depth, siblingSlugCounts) => {
+            if (this.config.maxDepth !== undefined && depth > this.config.maxDepth) {
+                return null;
+            }
+            const title = getPageTitle(page);
+            let slug = slugify(title);
+            // Deduplicate sibling slugs
+            const existing = siblingSlugCounts.get(slug) ?? 0;
+            siblingSlugCounts.set(slug, existing + 1);
+            if (existing > 0) {
+                slug = `${slug}-${existing + 1}`;
+            }
+            const kbPath = parentPath ? `${parentPath}/${slug}` : slug;
+            const nid = normalizeId(page.id);
+            const isDatabaseEntry = this.databaseEntryPageIds.has(nid);
+            const node = {
+                page,
+                title,
+                slug,
+                kbPath,
+                isDatabaseEntry,
+                parentDatabaseId: page.parent.type === 'database_id' ? page.parent.database_id : undefined,
+                children: [],
+            };
+            // Register in path map
+            this.pagePathMap.set(nid, kbPath);
+            // Process children
+            const childPages = childrenByParent.get(nid) ?? [];
+            const childSlugCounts = new Map();
+            for (const childPage of childPages) {
+                const childNode = buildNode(childPage, kbPath, depth + 1, childSlugCounts);
+                if (childNode) {
+                    node.children.push(childNode);
+                }
+            }
+            return node;
+        };
+        // Build roots
+        const rootSlugCounts = new Map();
+        for (const rootPage of rootPages) {
+            const node = buildNode(rootPage, '', 0, rootSlugCounts);
+            if (node) {
+                this.pageTree.push(node);
+            }
+        }
+        // Flatten tree to depth-first list
+        const flatten = (nodes) => {
+            for (const node of nodes) {
+                this.allPages.push(node);
+                flatten(node.children);
+            }
+        };
+        flatten(this.pageTree);
+    }
+    getParentPageId(page) {
+        if (page.parent.type === 'page_id')
+            return page.parent.page_id ?? null;
+        if (page.parent.type === 'block_id')
+            return page.parent.block_id ?? null;
+        return null;
+    }
+    // ============================================
+    // Database processing
+    // ============================================
+    async processDatabases(databases) {
+        for (const db of databases) {
+            // If rootPageId is set, only include databases whose parent is in our tree
+            if (this.config.rootPageId) {
+                const parentId = this.getDatabaseParentId(db);
+                if (parentId && !this.pagePathMap.has(normalizeId(parentId))) {
+                    // Also check if the root page itself is the parent
+                    if (normalizeId(parentId) !== normalizeId(this.config.rootPageId)) {
+                        continue;
+                    }
+                }
+            }
+            const schema = parseDatabaseSchema(db);
+            // Query all entries
+            const entries = await this.client.queryDatabase(db.id);
+            this.databases.push({ database: db, schema, entries });
+            // Register entry pages in databaseEntryPageIds
+            for (const entry of entries) {
+                this.databaseEntryPageIds.add(normalizeId(entry.id));
+            }
+        }
+    }
+    getDatabaseParentId(db) {
+        if (db.parent.type === 'page_id')
+            return db.parent.page_id ?? null;
+        if (db.parent.type === 'block_id')
+            return db.parent.block_id ?? null;
+        return null;
+    }
+    // ============================================
+    // Page extraction
+    // ============================================
+    async extractPage(node) {
+        try {
+            const blocks = await this.client.getBlockChildren(node.page.id);
+            if (blocks.length === 0) {
+                console.log(`  Skipping empty page: ${node.title}`);
+                return null;
+            }
+            let sections = await blocksToSections(blocks, this.client, this.pagePathMap);
+            // Download Notion-hosted media files
+            sections = await this.downloadSectionMedia(sections);
+            if (sections.length === 0) {
+                console.log(`  Skipping page with no content: ${node.title}`);
+                return null;
+            }
+            return {
+                relativePath: node.kbPath,
+                name: node.title,
+                sections,
+                sourcePath: `notion://${node.page.id}`,
+            };
+        }
+        catch (error) {
+            console.error(`  Error extracting page "${node.title}": ${error instanceof Error ? error.message : error}`);
+            return null;
+        }
+    }
+    async extractDatabaseEntry(entry, dbInfo) {
+        try {
+            const values = parseEntryValues(entry, dbInfo.schema);
+            const title = values.title;
+            const slug = slugify(title);
+            // Build sections: properties table + page content
+            const sections = [];
+            // Add properties section if there are unmapped values
+            const propSection = renderPropertiesSection(values);
+            if (propSection) {
+                sections.push(propSection);
+            }
+            // Get page blocks
+            const blocks = await this.client.getBlockChildren(entry.id);
+            if (blocks.length > 0) {
+                const contentSections = await blocksToSections(blocks, this.client, this.pagePathMap);
+                sections.push(...contentSections);
+            }
+            // Download media
+            const processedSections = await this.downloadSectionMedia(sections);
+            const nid = normalizeId(entry.id);
+            const kbPath = this.pagePathMap.get(nid) ?? slug;
+            return {
+                relativePath: kbPath,
+                name: title,
+                sections: processedSections.length > 0
+                    ? processedSections
+                    : [
+                        {
+                            name: 'Content',
+                            content: [{ blockType: 'text', text: '*(Empty entry)*' }],
+                        },
+                    ],
+                sourcePath: `notion://${entry.id}`,
+            };
+        }
+        catch (error) {
+            console.error(`  Error extracting database entry: ${error instanceof Error ? error.message : error}`);
+            return null;
+        }
+    }
+    // ============================================
+    // Media handling
+    // ============================================
+    async downloadSectionMedia(sections) {
+        const result = [];
+        for (const section of sections) {
+            const processedContent = await this.mediaDownloader.processContentBlocks(section.content);
+            result.push({ name: section.name, content: processedContent });
+        }
+        return result;
+    }
+}
+// ============================================
+// Path helpers
+// ============================================
+/**
+ * Slugify a page title for use as a KB path segment.
+ * Lowercase, spaces→hyphens, strip special chars, ltree-compatible.
+ */
+export function slugify(title) {
+    return (title
+        .toLowerCase()
+        .trim()
+        .replace(/[^\w\s-]/g, '') // Remove special chars (keep word chars, spaces, hyphens)
+        .replace(/[\s_]+/g, '-') // Spaces and underscores → hyphens (ltree forbids _ and .)
+        .replace(/-+/g, '-') // Collapse multiple hyphens
+        .replace(/^-|-$/g, '') || // Trim leading/trailing hyphens
+        'untitled');
+}

package/dist/types.d.ts CHANGED Viewed

@@ -102,6 +102,8 @@ export interface MigrationOptions {
     defaultPermission?: 'edit' | 'read' | 'none';
     /** AI access permission for documents */
     aiAccess?: 'edit' | 'read' | 'none';
+    /** Convenience flag: 'team' = read, 'private' = none */
+    visibility?: 'team' | 'private';
 }
 /**
  * Error response from API when document already exists

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@moxn/kb-migrate",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
   "type": "module",
   "main": "dist/index.js",