@moxn/kb-migrate 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,390 @@
1
+ /**
2
+ * NotionSource — imports Notion workspace content into Moxn KB.
3
+ *
4
+ * Two-pass architecture:
5
+ * Pass 1 (validate): Discover all pages/databases via search API, build tree, compute paths
6
+ * Pass 2 (extract): Walk tree depth-first, fetch blocks, convert to sections
7
+ *
8
+ * Databases are imported after all pages: creates KB database + columns, links entries.
9
+ */
10
+ import { MigrationSource } from './base.js';
11
+ import { NotionApiClient } from './notion-api.js';
12
+ import { blocksToSections, getPageTitle, normalizeId, } from './notion-blocks.js';
13
+ import { NotionMediaDownloader } from './notion-media.js';
14
+ import { parseDatabaseSchema, parseEntryValues, renderPropertiesSection, } from './notion-databases.js';
15
+ const MAX_DOCUMENT_COUNT = 10_000;
16
+ // ============================================
17
+ // Source
18
+ // ============================================
19
+ export class NotionSource extends MigrationSource {
20
+ client;
21
+ mediaDownloader;
22
+ // Populated during validate()
23
+ pageTree = [];
24
+ allPages = []; // flat list, depth-first order
25
+ pagePathMap = new Map();
26
+ databases = [];
27
+ databaseEntryPageIds = new Set();
28
+ _documentCount = 0;
29
+ constructor(config) {
30
+ super(config);
31
+ this.client = new NotionApiClient(config.token);
32
+ this.mediaDownloader = new NotionMediaDownloader();
33
+ }
34
+ get sourceType() {
35
+ return 'notion';
36
+ }
37
+ get sourceLocation() {
38
+ return this.config.rootPageId
39
+ ? `Notion (subtree: ${this.config.rootPageId})`
40
+ : 'Notion (full workspace)';
41
+ }
42
+ // ============================================
43
+ // Pass 1: Discovery (validate)
44
+ // ============================================
45
+ async validate() {
46
+ // 1. Test token
47
+ console.log('Validating Notion API token...');
48
+ await this.client.validateToken();
49
+ console.log(' Token valid.');
50
+ // 2. Discover all pages
51
+ console.log('Discovering pages...');
52
+ const allNotionPages = await this.client.searchPages();
53
+ console.log(` Found ${allNotionPages.length} pages in workspace.`);
54
+ // 3. Discover all databases
55
+ console.log('Discovering databases...');
56
+ const allNotionDatabases = await this.client.searchDatabases();
57
+ console.log(` Found ${allNotionDatabases.length} databases.`);
58
+ // 4. Build page tree
59
+ this.buildPageTree(allNotionPages);
60
+ // 5. Process databases — identify entries and build schemas
61
+ await this.processDatabases(allNotionDatabases);
62
+ // 6. Count and validate
63
+ this._documentCount = this.allPages.length;
64
+ // Add database-only entries (pages that are in databases but not in page tree)
65
+ for (const dbInfo of this.databases) {
66
+ for (const entry of dbInfo.entries) {
67
+ const nid = normalizeId(entry.id);
68
+ if (!this.pagePathMap.has(nid)) {
69
+ this._documentCount++;
70
+ }
71
+ }
72
+ }
73
+ if (this._documentCount > MAX_DOCUMENT_COUNT) {
74
+ throw new Error(`Workspace has ${this._documentCount} documents, exceeding the ${MAX_DOCUMENT_COUNT} limit. ` +
75
+ 'Use --root-page-id to import a subtree.');
76
+ }
77
+ console.log(` ${this.allPages.length} pages + ${this.databases.length} databases ready for import.`);
78
+ // 7. Initialize media downloader
79
+ await this.mediaDownloader.init();
80
+ }
81
+ async getDocumentCount() {
82
+ return this._documentCount;
83
+ }
84
+ /**
85
+ * Get database import info for the migration runner.
86
+ * Called after all pages are imported to create databases and link entries.
87
+ */
88
+ getDatabaseImports() {
89
+ return this.databases.map((dbInfo) => ({
90
+ notionDatabaseId: normalizeId(dbInfo.database.id),
91
+ schema: dbInfo.schema,
92
+ entries: dbInfo.entries.map((entry) => {
93
+ const nid = normalizeId(entry.id);
94
+ const title = getPageTitle(entry);
95
+ const slug = slugify(title);
96
+ const kbPath = this.pagePathMap.get(nid) ?? slug;
97
+ return {
98
+ page: entry,
99
+ kbPath,
100
+ };
101
+ }),
102
+ }));
103
+ }
104
+ // ============================================
105
+ // Pass 2: Extraction
106
+ // ============================================
107
+ async *extract() {
108
+ // Walk pages depth-first
109
+ for (const node of this.allPages) {
110
+ // Skip database entries that will be created during database import
111
+ // unless they also appear in the page tree (child_page)
112
+ if (node.isDatabaseEntry)
113
+ continue;
114
+ const doc = await this.extractPage(node);
115
+ if (doc)
116
+ yield doc;
117
+ }
118
+ // Extract database-only entries (pages not in page tree)
119
+ for (const dbInfo of this.databases) {
120
+ for (const entry of dbInfo.entries) {
121
+ const nid = normalizeId(entry.id);
122
+ // Only extract if not already extracted via page tree
123
+ if (this.pagePathMap.has(nid)) {
124
+ // Page was already in the tree — check if it was yielded
125
+ const treeNode = this.allPages.find((n) => normalizeId(n.page.id) === nid);
126
+ if (treeNode && !treeNode.isDatabaseEntry) {
127
+ // Already yielded as a page
128
+ continue;
129
+ }
130
+ }
131
+ const doc = await this.extractDatabaseEntry(entry, dbInfo);
132
+ if (doc)
133
+ yield doc;
134
+ }
135
+ }
136
+ }
137
+ /** Clean up temp files after migration completes. */
138
+ async cleanup() {
139
+ await this.mediaDownloader.cleanup();
140
+ }
141
+ // ============================================
142
+ // Tree building
143
+ // ============================================
144
+ buildPageTree(pages) {
145
+ // Build lookup maps
146
+ const pageById = new Map();
147
+ const childrenByParent = new Map();
148
+ for (const page of pages) {
149
+ const nid = normalizeId(page.id);
150
+ pageById.set(nid, page);
151
+ const parentId = this.getParentPageId(page);
152
+ if (parentId) {
153
+ const npid = normalizeId(parentId);
154
+ if (!childrenByParent.has(npid)) {
155
+ childrenByParent.set(npid, []);
156
+ }
157
+ childrenByParent.get(npid).push(page);
158
+ }
159
+ }
160
+ // Identify database entries
161
+ for (const page of pages) {
162
+ if (page.parent.type === 'database_id' && page.parent.database_id) {
163
+ this.databaseEntryPageIds.add(normalizeId(page.id));
164
+ }
165
+ }
166
+ // Find root pages
167
+ const rootPages = [];
168
+ if (this.config.rootPageId) {
169
+ // Subtree mode: start from specified page
170
+ const rootId = normalizeId(this.config.rootPageId);
171
+ const rootPage = pageById.get(rootId);
172
+ if (rootPage) {
173
+ rootPages.push(rootPage);
174
+ }
175
+ else {
176
+ // Root page itself wasn't in search results — search for its children
177
+ const children = childrenByParent.get(rootId) ?? [];
178
+ rootPages.push(...children);
179
+ }
180
+ }
181
+ else {
182
+ // Full workspace mode: pages with workspace parent or no parent in our set
183
+ for (const page of pages) {
184
+ if (page.parent.type === 'workspace') {
185
+ rootPages.push(page);
186
+ }
187
+ else {
188
+ const parentId = this.getParentPageId(page);
189
+ if (parentId && !pageById.has(normalizeId(parentId))) {
190
+ // Parent is not in our page set — treat as root
191
+ if (page.parent.type !== 'database_id') {
192
+ rootPages.push(page);
193
+ }
194
+ }
195
+ }
196
+ }
197
+ }
198
+ // Build tree recursively
199
+ const buildNode = (page, parentPath, depth, siblingSlugCounts) => {
200
+ if (this.config.maxDepth !== undefined && depth > this.config.maxDepth) {
201
+ return null;
202
+ }
203
+ const title = getPageTitle(page);
204
+ let slug = slugify(title);
205
+ // Deduplicate sibling slugs
206
+ const existing = siblingSlugCounts.get(slug) ?? 0;
207
+ siblingSlugCounts.set(slug, existing + 1);
208
+ if (existing > 0) {
209
+ slug = `${slug}-${existing + 1}`;
210
+ }
211
+ const kbPath = parentPath ? `${parentPath}/${slug}` : slug;
212
+ const nid = normalizeId(page.id);
213
+ const isDatabaseEntry = this.databaseEntryPageIds.has(nid);
214
+ const node = {
215
+ page,
216
+ title,
217
+ slug,
218
+ kbPath,
219
+ isDatabaseEntry,
220
+ parentDatabaseId: page.parent.type === 'database_id' ? page.parent.database_id : undefined,
221
+ children: [],
222
+ };
223
+ // Register in path map
224
+ this.pagePathMap.set(nid, kbPath);
225
+ // Process children
226
+ const childPages = childrenByParent.get(nid) ?? [];
227
+ const childSlugCounts = new Map();
228
+ for (const childPage of childPages) {
229
+ const childNode = buildNode(childPage, kbPath, depth + 1, childSlugCounts);
230
+ if (childNode) {
231
+ node.children.push(childNode);
232
+ }
233
+ }
234
+ return node;
235
+ };
236
+ // Build roots
237
+ const rootSlugCounts = new Map();
238
+ for (const rootPage of rootPages) {
239
+ const node = buildNode(rootPage, '', 0, rootSlugCounts);
240
+ if (node) {
241
+ this.pageTree.push(node);
242
+ }
243
+ }
244
+ // Flatten tree to depth-first list
245
+ const flatten = (nodes) => {
246
+ for (const node of nodes) {
247
+ this.allPages.push(node);
248
+ flatten(node.children);
249
+ }
250
+ };
251
+ flatten(this.pageTree);
252
+ }
253
+ getParentPageId(page) {
254
+ if (page.parent.type === 'page_id')
255
+ return page.parent.page_id ?? null;
256
+ if (page.parent.type === 'block_id')
257
+ return page.parent.block_id ?? null;
258
+ return null;
259
+ }
260
+ // ============================================
261
+ // Database processing
262
+ // ============================================
263
+ async processDatabases(databases) {
264
+ for (const db of databases) {
265
+ // If rootPageId is set, only include databases whose parent is in our tree
266
+ if (this.config.rootPageId) {
267
+ const parentId = this.getDatabaseParentId(db);
268
+ if (parentId && !this.pagePathMap.has(normalizeId(parentId))) {
269
+ // Also check if the root page itself is the parent
270
+ if (normalizeId(parentId) !== normalizeId(this.config.rootPageId)) {
271
+ continue;
272
+ }
273
+ }
274
+ }
275
+ const schema = parseDatabaseSchema(db);
276
+ // Query all entries
277
+ const entries = await this.client.queryDatabase(db.id);
278
+ this.databases.push({ database: db, schema, entries });
279
+ // Register entry pages in databaseEntryPageIds
280
+ for (const entry of entries) {
281
+ this.databaseEntryPageIds.add(normalizeId(entry.id));
282
+ }
283
+ }
284
+ }
285
+ getDatabaseParentId(db) {
286
+ if (db.parent.type === 'page_id')
287
+ return db.parent.page_id ?? null;
288
+ if (db.parent.type === 'block_id')
289
+ return db.parent.block_id ?? null;
290
+ return null;
291
+ }
292
+ // ============================================
293
+ // Page extraction
294
+ // ============================================
295
+ async extractPage(node) {
296
+ try {
297
+ const blocks = await this.client.getBlockChildren(node.page.id);
298
+ if (blocks.length === 0) {
299
+ console.log(` Skipping empty page: ${node.title}`);
300
+ return null;
301
+ }
302
+ let sections = await blocksToSections(blocks, this.client, this.pagePathMap);
303
+ // Download Notion-hosted media files
304
+ sections = await this.downloadSectionMedia(sections);
305
+ if (sections.length === 0) {
306
+ console.log(` Skipping page with no content: ${node.title}`);
307
+ return null;
308
+ }
309
+ return {
310
+ relativePath: node.kbPath,
311
+ name: node.title,
312
+ sections,
313
+ sourcePath: `notion://${node.page.id}`,
314
+ };
315
+ }
316
+ catch (error) {
317
+ console.error(` Error extracting page "${node.title}": ${error instanceof Error ? error.message : error}`);
318
+ return null;
319
+ }
320
+ }
321
+ async extractDatabaseEntry(entry, dbInfo) {
322
+ try {
323
+ const values = parseEntryValues(entry, dbInfo.schema);
324
+ const title = values.title;
325
+ const slug = slugify(title);
326
+ // Build sections: properties table + page content
327
+ const sections = [];
328
+ // Add properties section if there are unmapped values
329
+ const propSection = renderPropertiesSection(values);
330
+ if (propSection) {
331
+ sections.push(propSection);
332
+ }
333
+ // Get page blocks
334
+ const blocks = await this.client.getBlockChildren(entry.id);
335
+ if (blocks.length > 0) {
336
+ const contentSections = await blocksToSections(blocks, this.client, this.pagePathMap);
337
+ sections.push(...contentSections);
338
+ }
339
+ // Download media
340
+ const processedSections = await this.downloadSectionMedia(sections);
341
+ const nid = normalizeId(entry.id);
342
+ const kbPath = this.pagePathMap.get(nid) ?? slug;
343
+ return {
344
+ relativePath: kbPath,
345
+ name: title,
346
+ sections: processedSections.length > 0
347
+ ? processedSections
348
+ : [
349
+ {
350
+ name: 'Content',
351
+ content: [{ blockType: 'text', text: '*(Empty entry)*' }],
352
+ },
353
+ ],
354
+ sourcePath: `notion://${entry.id}`,
355
+ };
356
+ }
357
+ catch (error) {
358
+ console.error(` Error extracting database entry: ${error instanceof Error ? error.message : error}`);
359
+ return null;
360
+ }
361
+ }
362
+ // ============================================
363
+ // Media handling
364
+ // ============================================
365
+ async downloadSectionMedia(sections) {
366
+ const result = [];
367
+ for (const section of sections) {
368
+ const processedContent = await this.mediaDownloader.processContentBlocks(section.content);
369
+ result.push({ name: section.name, content: processedContent });
370
+ }
371
+ return result;
372
+ }
373
+ }
374
+ // ============================================
375
+ // Path helpers
376
+ // ============================================
377
+ /**
378
+ * Slugify a page title for use as a KB path segment.
379
+ * Lowercase, spaces→hyphens, strip special chars, ltree-compatible.
380
+ */
381
+ export function slugify(title) {
382
+ return (title
383
+ .toLowerCase()
384
+ .trim()
385
+ .replace(/[^\w\s-]/g, '') // Remove special chars (keep word chars, spaces, hyphens)
386
+ .replace(/[\s_]+/g, '-') // Spaces and underscores → hyphens (ltree forbids _ and .)
387
+ .replace(/-+/g, '-') // Collapse multiple hyphens
388
+ .replace(/^-|-$/g, '') || // Trim leading/trailing hyphens
389
+ 'untitled');
390
+ }
package/dist/types.d.ts CHANGED
@@ -102,6 +102,8 @@ export interface MigrationOptions {
102
102
  defaultPermission?: 'edit' | 'read' | 'none';
103
103
  /** AI access permission for documents */
104
104
  aiAccess?: 'edit' | 'read' | 'none';
105
+ /** Convenience flag: 'team' = read, 'private' = none */
106
+ visibility?: 'team' | 'private';
105
107
  }
106
108
  /**
107
109
  * Error response from API when document already exists
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxn/kb-migrate",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",