@moxn/kb-migrate 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/sources/local.d.ts +6 -0
- package/dist/sources/local.js +79 -4
- package/package.json +1 -1
package/dist/sources/local.d.ts
CHANGED
|
@@ -16,6 +16,7 @@ export interface LocalSourceConfig extends SourceConfig {
|
|
|
16
16
|
*/
|
|
17
17
|
export declare class LocalSource extends MigrationSource<LocalSourceConfig> {
|
|
18
18
|
private files;
|
|
19
|
+
private skippedCollisions;
|
|
19
20
|
get sourceType(): string;
|
|
20
21
|
get sourceLocation(): string;
|
|
21
22
|
validate(): Promise<void>;
|
|
@@ -25,6 +26,11 @@ export declare class LocalSource extends MigrationSource<LocalSourceConfig> {
|
|
|
25
26
|
private extractDocument;
|
|
26
27
|
private parseMarkdownSections;
|
|
27
28
|
private nodesToContentBlocks;
|
|
29
|
+
/**
|
|
30
|
+
* Extract image nodes from paragraph children, returning image blocks
|
|
31
|
+
* and whether non-image text content exists.
|
|
32
|
+
*/
|
|
33
|
+
private extractImagesFromParagraph;
|
|
28
34
|
private imageToBlock;
|
|
29
35
|
private guessImageType;
|
|
30
36
|
private extensionToMediaType;
|
package/dist/sources/local.js
CHANGED
|
@@ -14,6 +14,7 @@ import { MigrationSource } from './base.js';
|
|
|
14
14
|
*/
|
|
15
15
|
export class LocalSource extends MigrationSource {
|
|
16
16
|
files = null;
|
|
17
|
+
skippedCollisions = new Set();
|
|
17
18
|
get sourceType() {
|
|
18
19
|
return 'local';
|
|
19
20
|
}
|
|
@@ -30,13 +31,16 @@ export class LocalSource extends MigrationSource {
|
|
|
30
31
|
if (!this.files) {
|
|
31
32
|
await this.discoverFiles();
|
|
32
33
|
}
|
|
33
|
-
return this.files.length;
|
|
34
|
+
return this.files.length - this.skippedCollisions.size;
|
|
34
35
|
}
|
|
35
36
|
async *extract() {
|
|
36
37
|
if (!this.files) {
|
|
37
38
|
await this.discoverFiles();
|
|
38
39
|
}
|
|
39
40
|
for (const file of this.files) {
|
|
41
|
+
if (this.skippedCollisions.has(file)) {
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
40
44
|
const doc = await this.extractDocument(file);
|
|
41
45
|
if (doc) {
|
|
42
46
|
yield doc;
|
|
@@ -55,7 +59,29 @@ export class LocalSource extends MigrationSource {
|
|
|
55
59
|
allFiles.push(...matches);
|
|
56
60
|
}
|
|
57
61
|
// Deduplicate and sort
|
|
58
|
-
|
|
62
|
+
const uniqueFiles = [...new Set(allFiles)].sort();
|
|
63
|
+
// Detect KB path collisions (e.g., doc.md and doc.mdx both map to /doc)
|
|
64
|
+
const pathToFiles = new Map();
|
|
65
|
+
for (const file of uniqueFiles) {
|
|
66
|
+
const parsed = path.parse(file);
|
|
67
|
+
const dirParts = parsed.dir ? parsed.dir.split(path.sep) : [];
|
|
68
|
+
const kbPath = [...dirParts, parsed.name].join('/').replace(/ /g, '-');
|
|
69
|
+
const existing = pathToFiles.get(kbPath) || [];
|
|
70
|
+
existing.push(file);
|
|
71
|
+
pathToFiles.set(kbPath, existing);
|
|
72
|
+
}
|
|
73
|
+
this.skippedCollisions = new Set();
|
|
74
|
+
for (const [kbPath, files] of pathToFiles) {
|
|
75
|
+
if (files.length > 1) {
|
|
76
|
+
console.warn(` ⚠ Path collision: multiple files map to KB path "/${kbPath}": ${files.join(', ')}`);
|
|
77
|
+
console.warn(` → Keeping "${files[0]}", skipping ${files.length - 1} duplicate(s)`);
|
|
78
|
+
// Skip all but the first file (alphabetically first since files are sorted)
|
|
79
|
+
for (let i = 1; i < files.length; i++) {
|
|
80
|
+
this.skippedCollisions.add(files[i]);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
this.files = uniqueFiles;
|
|
59
85
|
}
|
|
60
86
|
async extractDocument(relativePath) {
|
|
61
87
|
const fullPath = path.join(this.config.directory, relativePath);
|
|
@@ -63,7 +89,12 @@ export class LocalSource extends MigrationSource {
|
|
|
63
89
|
// Parse relative path to create document path
|
|
64
90
|
const parsed = path.parse(relativePath);
|
|
65
91
|
const dirParts = parsed.dir ? parsed.dir.split(path.sep) : [];
|
|
66
|
-
const
|
|
92
|
+
const rawPath = [...dirParts, parsed.name].join('/');
|
|
93
|
+
// Sanitize: replace spaces with hyphens (server rejects paths with spaces)
|
|
94
|
+
const docPath = rawPath.replace(/ /g, '-');
|
|
95
|
+
if (docPath !== rawPath) {
|
|
96
|
+
console.warn(` ⚠ Path sanitized: "${rawPath}" → "${docPath}" (spaces replaced with hyphens)`);
|
|
97
|
+
}
|
|
67
98
|
// Derive name from filename
|
|
68
99
|
const name = parsed.name
|
|
69
100
|
.split(/[-_]/)
|
|
@@ -158,12 +189,27 @@ export class LocalSource extends MigrationSource {
|
|
|
158
189
|
};
|
|
159
190
|
for (const node of nodes) {
|
|
160
191
|
if (node.type === 'image') {
|
|
192
|
+
// Top-level image (rare in standard markdown, but possible)
|
|
161
193
|
flushTextBuffer();
|
|
162
194
|
const imageBlock = this.imageToBlock(node, baseDir);
|
|
163
195
|
if (imageBlock) {
|
|
164
196
|
blocks.push(imageBlock);
|
|
165
197
|
}
|
|
166
198
|
}
|
|
199
|
+
else if (node.type === 'paragraph') {
|
|
200
|
+
// Walk paragraph children to extract images separately
|
|
201
|
+
const paragraph = node;
|
|
202
|
+
const { images, hasText } = this.extractImagesFromParagraph(paragraph.children, baseDir);
|
|
203
|
+
if (hasText) {
|
|
204
|
+
// Render the paragraph text (images in text become markdown syntax via nodeToMarkdown)
|
|
205
|
+
textBuffer += this.nodeToMarkdown(node) + '\n\n';
|
|
206
|
+
}
|
|
207
|
+
// Emit extracted image blocks
|
|
208
|
+
if (images.length > 0) {
|
|
209
|
+
flushTextBuffer();
|
|
210
|
+
blocks.push(...images);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
167
213
|
else {
|
|
168
214
|
textBuffer += this.nodeToMarkdown(node) + '\n\n';
|
|
169
215
|
}
|
|
@@ -171,6 +217,30 @@ export class LocalSource extends MigrationSource {
|
|
|
171
217
|
flushTextBuffer();
|
|
172
218
|
return blocks;
|
|
173
219
|
}
|
|
220
|
+
/**
|
|
221
|
+
* Extract image nodes from paragraph children, returning image blocks
|
|
222
|
+
* and whether non-image text content exists.
|
|
223
|
+
*/
|
|
224
|
+
extractImagesFromParagraph(children, baseDir) {
|
|
225
|
+
const images = [];
|
|
226
|
+
let hasText = false;
|
|
227
|
+
for (const child of children) {
|
|
228
|
+
if (child.type === 'image') {
|
|
229
|
+
const imageBlock = this.imageToBlock(child, baseDir);
|
|
230
|
+
if (imageBlock) {
|
|
231
|
+
images.push(imageBlock);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
// Check if the child has any meaningful text
|
|
236
|
+
const text = this.nodeToMarkdown(child).trim();
|
|
237
|
+
if (text) {
|
|
238
|
+
hasText = true;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return { images, hasText };
|
|
243
|
+
}
|
|
174
244
|
imageToBlock(node, baseDir) {
|
|
175
245
|
const url = node.url;
|
|
176
246
|
// Handle external URLs
|
|
@@ -272,8 +342,13 @@ export class LocalSource extends MigrationSource {
|
|
|
272
342
|
case 'heading':
|
|
273
343
|
const heading = node;
|
|
274
344
|
const hashes = '#'.repeat(heading.depth);
|
|
275
|
-
const headingText = heading.children
|
|
345
|
+
const headingText = heading.children
|
|
346
|
+
.map((c) => this.nodeToMarkdown(c))
|
|
347
|
+
.join('');
|
|
276
348
|
return `${hashes} ${headingText}`;
|
|
349
|
+
case 'image':
|
|
350
|
+
const img = node;
|
|
351
|
+
return img.alt ? `` : ``;
|
|
277
352
|
case 'thematicBreak':
|
|
278
353
|
return '---';
|
|
279
354
|
case 'table':
|
package/package.json
CHANGED