@moxn/kb-migrate 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ export interface LocalSourceConfig extends SourceConfig {
16
16
  */
17
17
  export declare class LocalSource extends MigrationSource<LocalSourceConfig> {
18
18
  private files;
19
+ private skippedCollisions;
19
20
  get sourceType(): string;
20
21
  get sourceLocation(): string;
21
22
  validate(): Promise<void>;
@@ -25,6 +26,11 @@ export declare class LocalSource extends MigrationSource<LocalSourceConfig> {
25
26
  private extractDocument;
26
27
  private parseMarkdownSections;
27
28
  private nodesToContentBlocks;
29
+ /**
30
+ * Extract image nodes from paragraph children, returning image blocks
31
+ * and whether non-image text content exists.
32
+ */
33
+ private extractImagesFromParagraph;
28
34
  private imageToBlock;
29
35
  private guessImageType;
30
36
  private extensionToMediaType;
@@ -14,6 +14,7 @@ import { MigrationSource } from './base.js';
14
14
  */
15
15
  export class LocalSource extends MigrationSource {
16
16
  files = null;
17
+ skippedCollisions = new Set();
17
18
  get sourceType() {
18
19
  return 'local';
19
20
  }
@@ -30,13 +31,16 @@ export class LocalSource extends MigrationSource {
30
31
  if (!this.files) {
31
32
  await this.discoverFiles();
32
33
  }
33
- return this.files.length;
34
+ return this.files.length - this.skippedCollisions.size;
34
35
  }
35
36
  async *extract() {
36
37
  if (!this.files) {
37
38
  await this.discoverFiles();
38
39
  }
39
40
  for (const file of this.files) {
41
+ if (this.skippedCollisions.has(file)) {
42
+ continue;
43
+ }
40
44
  const doc = await this.extractDocument(file);
41
45
  if (doc) {
42
46
  yield doc;
@@ -55,7 +59,29 @@ export class LocalSource extends MigrationSource {
55
59
  allFiles.push(...matches);
56
60
  }
57
61
  // Deduplicate and sort
58
- this.files = [...new Set(allFiles)].sort();
62
+ const uniqueFiles = [...new Set(allFiles)].sort();
63
+ // Detect KB path collisions (e.g., doc.md and doc.mdx both map to /doc)
64
+ const pathToFiles = new Map();
65
+ for (const file of uniqueFiles) {
66
+ const parsed = path.parse(file);
67
+ const dirParts = parsed.dir ? parsed.dir.split(path.sep) : [];
68
+ const kbPath = [...dirParts, parsed.name].join('/').replace(/ /g, '-');
69
+ const existing = pathToFiles.get(kbPath) || [];
70
+ existing.push(file);
71
+ pathToFiles.set(kbPath, existing);
72
+ }
73
+ this.skippedCollisions = new Set();
74
+ for (const [kbPath, files] of pathToFiles) {
75
+ if (files.length > 1) {
76
+ console.warn(` ⚠ Path collision: multiple files map to KB path "/${kbPath}": ${files.join(', ')}`);
77
+ console.warn(` → Keeping "${files[0]}", skipping ${files.length - 1} duplicate(s)`);
78
+ // Skip all but the first file (alphabetically first since files are sorted)
79
+ for (let i = 1; i < files.length; i++) {
80
+ this.skippedCollisions.add(files[i]);
81
+ }
82
+ }
83
+ }
84
+ this.files = uniqueFiles;
59
85
  }
60
86
  async extractDocument(relativePath) {
61
87
  const fullPath = path.join(this.config.directory, relativePath);
@@ -63,7 +89,12 @@ export class LocalSource extends MigrationSource {
63
89
  // Parse relative path to create document path
64
90
  const parsed = path.parse(relativePath);
65
91
  const dirParts = parsed.dir ? parsed.dir.split(path.sep) : [];
66
- const docPath = [...dirParts, parsed.name].join('/');
92
+ const rawPath = [...dirParts, parsed.name].join('/');
93
+ // Sanitize: replace spaces with hyphens (server rejects paths with spaces)
94
+ const docPath = rawPath.replace(/ /g, '-');
95
+ if (docPath !== rawPath) {
96
+ console.warn(` ⚠ Path sanitized: "${rawPath}" → "${docPath}" (spaces replaced with hyphens)`);
97
+ }
67
98
  // Derive name from filename
68
99
  const name = parsed.name
69
100
  .split(/[-_]/)
@@ -158,12 +189,27 @@ export class LocalSource extends MigrationSource {
158
189
  };
159
190
  for (const node of nodes) {
160
191
  if (node.type === 'image') {
192
+ // Top-level image (rare in standard markdown, but possible)
161
193
  flushTextBuffer();
162
194
  const imageBlock = this.imageToBlock(node, baseDir);
163
195
  if (imageBlock) {
164
196
  blocks.push(imageBlock);
165
197
  }
166
198
  }
199
+ else if (node.type === 'paragraph') {
200
+ // Walk paragraph children to extract images separately
201
+ const paragraph = node;
202
+ const { images, hasText } = this.extractImagesFromParagraph(paragraph.children, baseDir);
203
+ if (hasText) {
204
+ // Render the paragraph text (images in text become markdown syntax via nodeToMarkdown)
205
+ textBuffer += this.nodeToMarkdown(node) + '\n\n';
206
+ }
207
+ // Emit extracted image blocks
208
+ if (images.length > 0) {
209
+ flushTextBuffer();
210
+ blocks.push(...images);
211
+ }
212
+ }
167
213
  else {
168
214
  textBuffer += this.nodeToMarkdown(node) + '\n\n';
169
215
  }
@@ -171,6 +217,30 @@ export class LocalSource extends MigrationSource {
171
217
  flushTextBuffer();
172
218
  return blocks;
173
219
  }
220
+ /**
221
+ * Extract image nodes from paragraph children, returning image blocks
222
+ * and whether non-image text content exists.
223
+ */
224
+ extractImagesFromParagraph(children, baseDir) {
225
+ const images = [];
226
+ let hasText = false;
227
+ for (const child of children) {
228
+ if (child.type === 'image') {
229
+ const imageBlock = this.imageToBlock(child, baseDir);
230
+ if (imageBlock) {
231
+ images.push(imageBlock);
232
+ }
233
+ }
234
+ else {
235
+ // Check if the child has any meaningful text
236
+ const text = this.nodeToMarkdown(child).trim();
237
+ if (text) {
238
+ hasText = true;
239
+ }
240
+ }
241
+ }
242
+ return { images, hasText };
243
+ }
174
244
  imageToBlock(node, baseDir) {
175
245
  const url = node.url;
176
246
  // Handle external URLs
@@ -272,8 +342,13 @@ export class LocalSource extends MigrationSource {
272
342
  case 'heading':
273
343
  const heading = node;
274
344
  const hashes = '#'.repeat(heading.depth);
275
- const headingText = heading.children.map((c) => this.nodeToMarkdown(c)).join('');
345
+ const headingText = heading.children
346
+ .map((c) => this.nodeToMarkdown(c))
347
+ .join('');
276
348
  return `${hashes} ${headingText}`;
349
+ case 'image':
350
+ const img = node;
351
+ return img.alt ? `![${img.alt}](${img.url})` : `![](${img.url})`;
277
352
  case 'thematicBreak':
278
353
  return '---';
279
354
  case 'table':
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxn/kb-migrate",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Migration tool for importing documents into Moxn Knowledge Base from local files, Notion, Google Docs, and more",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",