agentic-knowledge-mcp 1.4.0 ā 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/packages/cli/dist/commands/init.js +21 -202
- package/packages/cli/package.json +1 -1
- package/packages/content-loader/dist/docset-init.d.ts +29 -0
- package/packages/content-loader/dist/docset-init.js +196 -0
- package/packages/content-loader/dist/index.d.ts +2 -0
- package/packages/content-loader/dist/index.js +1 -0
- package/packages/content-loader/package.json +2 -1
- package/packages/core/dist/index.d.ts +1 -0
- package/packages/core/dist/index.js +2 -0
- package/packages/core/dist/paths/symlinks.js +13 -15
- package/packages/core/dist/search/searcher.d.ts +53 -0
- package/packages/core/dist/search/searcher.js +359 -0
- package/packages/core/dist/types.d.ts +50 -0
- package/packages/core/package.json +3 -2
- package/packages/mcp-server/dist/server.js +154 -61
- package/packages/mcp-server/package.json +2 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentic-knowledge-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "A Model Context Protocol server for agentic knowledge guidance with web-based documentation loading and intelligent search instructions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "packages/cli/dist/index.js",
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"agentic-knowledge": "packages/cli/dist/index.js"
|
|
9
9
|
},
|
|
10
10
|
"engines": {
|
|
11
|
-
"node": ">=
|
|
11
|
+
"node": ">=20.0.0",
|
|
12
12
|
"pnpm": ">=9.0.0"
|
|
13
13
|
},
|
|
14
14
|
"files": [
|
|
@@ -29,9 +29,9 @@
|
|
|
29
29
|
"commander": "^12.0.0",
|
|
30
30
|
"js-yaml": "4.1.0",
|
|
31
31
|
"ora": "^8.0.1",
|
|
32
|
-
"@codemcp/knowledge": "1.
|
|
33
|
-
"@codemcp/knowledge-
|
|
34
|
-
"@codemcp/knowledge
|
|
32
|
+
"@codemcp/knowledge-content-loader": "1.6.0",
|
|
33
|
+
"@codemcp/knowledge-core": "1.6.0",
|
|
34
|
+
"@codemcp/knowledge": "1.6.0"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@eslint/js": "^9.34.0",
|
|
@@ -3,10 +3,8 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import { Command } from "commander";
|
|
5
5
|
import chalk from "chalk";
|
|
6
|
-
import {
|
|
7
|
-
import
|
|
8
|
-
import { ConfigManager, calculateLocalPath, ensureKnowledgeGitignoreSync, discoverDirectoryPatterns, safelyClearDirectory, getDirectoryInfo, } from "@codemcp/knowledge-core";
|
|
9
|
-
import { GitRepoLoader, ArchiveLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
|
|
6
|
+
import { ConfigManager, ensureKnowledgeGitignoreSync, discoverDirectoryPatterns, } from "@codemcp/knowledge-core";
|
|
7
|
+
import { initDocset, } from "@codemcp/knowledge-content-loader";
|
|
10
8
|
export const initCommand = new Command("init")
|
|
11
9
|
.description("Initialize sources for a docset from configuration")
|
|
12
10
|
.argument("<docset-id>", "ID of the docset to initialize")
|
|
@@ -16,216 +14,37 @@ export const initCommand = new Command("init")
|
|
|
16
14
|
.action(async (docsetId, options) => {
|
|
17
15
|
console.log(chalk.blue("š Agentic Knowledge Integration Test"));
|
|
18
16
|
try {
|
|
19
|
-
// Use ConfigManager for all config operations
|
|
20
17
|
const configManager = new ConfigManager();
|
|
21
18
|
const { config, configPath } = await configManager.loadConfig(process.cwd());
|
|
22
|
-
// Ensure .knowledge/.gitignore exists and contains docsets/ ignore rule
|
|
23
19
|
ensureKnowledgeGitignoreSync(configPath);
|
|
24
20
|
const docset = config.docsets.find((d) => d.id === docsetId);
|
|
25
21
|
if (!docset) {
|
|
26
22
|
throw new Error(`Docset '${docsetId}' not found in configuration. Available: ${config.docsets.map((d) => d.id).join(", ")}`);
|
|
27
23
|
}
|
|
28
|
-
if (!docset.sources || docset.sources.length === 0) {
|
|
29
|
-
throw new Error(`Docset '${docsetId}' has no sources configured`);
|
|
30
|
-
}
|
|
31
24
|
console.log(chalk.green(`ā
Found docset: ${docset.name}`));
|
|
32
25
|
console.log(chalk.gray(`š Description: ${docset.description}`));
|
|
33
26
|
console.log(chalk.gray(`š Sources: ${docset.sources.length}`));
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
48
|
-
if (existsAlready && !options.force) {
|
|
49
|
-
console.log(chalk.yellow("ā ļø Directory already exists. Use --force to overwrite."));
|
|
50
|
-
const files = await fs.readdir(localPath);
|
|
51
|
-
console.log(chalk.gray(`Existing files: ${files.slice(0, 5).join(", ")}${files.length > 5 ? "..." : ""}`));
|
|
27
|
+
const result = await initDocset(docsetId, docset, configPath, {
|
|
28
|
+
force: options.force,
|
|
29
|
+
onSourceProgress: (sourceResult) => {
|
|
30
|
+
const icon = sourceResult.type === "git_repo"
|
|
31
|
+
? "Copied"
|
|
32
|
+
: sourceResult.type === "local_folder"
|
|
33
|
+
? "Created symlinks:"
|
|
34
|
+
: "Extracted";
|
|
35
|
+
console.log(chalk.green(` ā
${icon} ā ${sourceResult.message}`));
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
if (result.alreadyInitialized) {
|
|
39
|
+
console.log(chalk.yellow("ā ļø Directory already exists and is initialized. Use --force to overwrite."));
|
|
52
40
|
return;
|
|
53
41
|
}
|
|
54
|
-
// Clear directory for force re-initialization
|
|
55
|
-
if (existsAlready && options.force) {
|
|
56
|
-
// Get info about what we're clearing (for logging)
|
|
57
|
-
const dirInfo = await getDirectoryInfo(localPath);
|
|
58
|
-
console.log(chalk.yellow("šļø Clearing existing directory..."));
|
|
59
|
-
console.log(chalk.gray(` Removing: ${dirInfo.files} files, ${dirInfo.directories} dirs, ${dirInfo.symlinks} symlinks`));
|
|
60
|
-
if (dirInfo.symlinks > 0) {
|
|
61
|
-
console.log(chalk.gray(" ā ļø Note: Symlinks will be removed, but source files are preserved"));
|
|
62
|
-
}
|
|
63
|
-
// Safely clear directory (preserves source files for symlinked folders)
|
|
64
|
-
await safelyClearDirectory(localPath);
|
|
65
|
-
}
|
|
66
|
-
// Create target directory
|
|
67
|
-
await fs.mkdir(localPath, { recursive: true });
|
|
68
|
-
let totalFiles = 0;
|
|
69
|
-
const allDiscoveredPaths = [];
|
|
70
|
-
// Process each source
|
|
71
|
-
for (const [index, source] of docset.sources.entries()) {
|
|
72
|
-
console.log(chalk.yellow(`\nš Loading source ${index + 1}/${docset.sources.length}: ${source.type === "git_repo" ? source.url : source.paths?.join(", ")}`));
|
|
73
|
-
if (source.type === "git_repo") {
|
|
74
|
-
// Use GitRepoLoader for all Git operations (REQ-19)
|
|
75
|
-
const loader = new GitRepoLoader();
|
|
76
|
-
console.log(chalk.gray(` Using GitRepoLoader for smart content filtering`));
|
|
77
|
-
const webSourceConfig = {
|
|
78
|
-
url: source.url,
|
|
79
|
-
type: WebSourceType.GIT_REPO,
|
|
80
|
-
options: {
|
|
81
|
-
branch: source.branch || "main",
|
|
82
|
-
paths: source.paths || [],
|
|
83
|
-
},
|
|
84
|
-
};
|
|
85
|
-
// Validate configuration
|
|
86
|
-
const validation = loader.validateConfig(webSourceConfig);
|
|
87
|
-
if (validation !== true) {
|
|
88
|
-
throw new Error(`Invalid Git repository configuration: ${validation}`);
|
|
89
|
-
}
|
|
90
|
-
// Load content using GitRepoLoader
|
|
91
|
-
const result = await loader.load(webSourceConfig, localPath);
|
|
92
|
-
if (!result.success) {
|
|
93
|
-
throw new Error(`Git repository loading failed: ${result.error}`);
|
|
94
|
-
}
|
|
95
|
-
// Collect discovered paths for config update
|
|
96
|
-
allDiscoveredPaths.push(...result.files);
|
|
97
|
-
totalFiles += result.files.length;
|
|
98
|
-
console.log(chalk.green(` ā
Copied ${result.files.length} files using smart filtering`));
|
|
99
|
-
// Create source metadata
|
|
100
|
-
const metadata = {
|
|
101
|
-
source_url: source.url,
|
|
102
|
-
source_type: source.type,
|
|
103
|
-
downloaded_at: new Date().toISOString(),
|
|
104
|
-
files_count: result.files.length,
|
|
105
|
-
files: result.files,
|
|
106
|
-
docset_id: docsetId,
|
|
107
|
-
content_hash: result.contentHash,
|
|
108
|
-
};
|
|
109
|
-
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
|
|
110
|
-
}
|
|
111
|
-
else if (source.type === "local_folder") {
|
|
112
|
-
// Handle local folder initialization
|
|
113
|
-
console.log(chalk.gray(` Creating symlinks for local folder`));
|
|
114
|
-
if (!source.paths || source.paths.length === 0) {
|
|
115
|
-
throw new Error(`Local folder source has no paths configured`);
|
|
116
|
-
}
|
|
117
|
-
// Import symlink utilities
|
|
118
|
-
const { createSymlinks } = await import("@codemcp/knowledge-core");
|
|
119
|
-
// Note: directory is already cleared above if --force is used,
|
|
120
|
-
// so no need to call removeSymlinks here
|
|
121
|
-
const configDir = path.dirname(configPath);
|
|
122
|
-
const projectRoot = path.dirname(configDir);
|
|
123
|
-
// Verify source paths exist
|
|
124
|
-
const validatedPaths = [];
|
|
125
|
-
for (const sourcePath of source.paths) {
|
|
126
|
-
const absolutePath = path.isAbsolute(sourcePath)
|
|
127
|
-
? sourcePath
|
|
128
|
-
: path.resolve(projectRoot, sourcePath);
|
|
129
|
-
try {
|
|
130
|
-
const stat = await fs.stat(absolutePath);
|
|
131
|
-
if (!stat.isDirectory()) {
|
|
132
|
-
throw new Error(`Path is not a directory: ${sourcePath}`);
|
|
133
|
-
}
|
|
134
|
-
validatedPaths.push(sourcePath);
|
|
135
|
-
}
|
|
136
|
-
catch {
|
|
137
|
-
throw new Error(`Local folder path does not exist: ${sourcePath}`);
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
// Create symlinks
|
|
141
|
-
await createSymlinks(validatedPaths, localPath, projectRoot);
|
|
142
|
-
console.log(chalk.green(` ā
Created ${validatedPaths.length} symlink(s)`));
|
|
143
|
-
// Count files in symlinked directories for metadata
|
|
144
|
-
let fileCount = 0;
|
|
145
|
-
const files = [];
|
|
146
|
-
async function countFilesRecursive(dir) {
|
|
147
|
-
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
148
|
-
for (const entry of entries) {
|
|
149
|
-
const fullPath = path.join(dir, entry.name);
|
|
150
|
-
if (entry.isDirectory()) {
|
|
151
|
-
await countFilesRecursive(fullPath);
|
|
152
|
-
}
|
|
153
|
-
else if (entry.isFile()) {
|
|
154
|
-
fileCount++;
|
|
155
|
-
files.push(path.relative(localPath, fullPath));
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
await countFilesRecursive(localPath);
|
|
160
|
-
totalFiles += fileCount;
|
|
161
|
-
// Create source metadata
|
|
162
|
-
const metadata = {
|
|
163
|
-
source_paths: validatedPaths,
|
|
164
|
-
source_type: source.type,
|
|
165
|
-
initialized_at: new Date().toISOString(),
|
|
166
|
-
files_count: fileCount,
|
|
167
|
-
files: files,
|
|
168
|
-
docset_id: docsetId,
|
|
169
|
-
};
|
|
170
|
-
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
|
|
171
|
-
}
|
|
172
|
-
else if (source.type === "archive") {
|
|
173
|
-
// Handle archive file initialization (zip, tar.gz, etc.)
|
|
174
|
-
const loader = new ArchiveLoader();
|
|
175
|
-
const sourceUrl = source.url || source.path || "";
|
|
176
|
-
console.log(chalk.gray(` Using ArchiveLoader for archive extraction`));
|
|
177
|
-
const webSourceConfig = {
|
|
178
|
-
url: sourceUrl,
|
|
179
|
-
type: WebSourceType.ARCHIVE,
|
|
180
|
-
options: {
|
|
181
|
-
paths: source.paths || [],
|
|
182
|
-
},
|
|
183
|
-
};
|
|
184
|
-
// Validate configuration
|
|
185
|
-
const validation = loader.validateConfig(webSourceConfig);
|
|
186
|
-
if (validation !== true) {
|
|
187
|
-
throw new Error(`Invalid archive source configuration: ${validation}`);
|
|
188
|
-
}
|
|
189
|
-
// Load content using ArchiveLoader
|
|
190
|
-
const result = await loader.load(webSourceConfig, localPath);
|
|
191
|
-
if (!result.success) {
|
|
192
|
-
throw new Error(`Archive loading failed: ${result.error}`);
|
|
193
|
-
}
|
|
194
|
-
// Collect discovered paths for config update
|
|
195
|
-
allDiscoveredPaths.push(...result.files);
|
|
196
|
-
totalFiles += result.files.length;
|
|
197
|
-
console.log(chalk.green(` ā
Extracted ${result.files.length} files from archive`));
|
|
198
|
-
// Create source metadata
|
|
199
|
-
const metadata = {
|
|
200
|
-
source_url: sourceUrl,
|
|
201
|
-
source_type: source.type,
|
|
202
|
-
downloaded_at: new Date().toISOString(),
|
|
203
|
-
files_count: result.files.length,
|
|
204
|
-
files: result.files,
|
|
205
|
-
docset_id: docsetId,
|
|
206
|
-
content_hash: result.contentHash,
|
|
207
|
-
};
|
|
208
|
-
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
|
|
209
|
-
}
|
|
210
|
-
else {
|
|
211
|
-
console.log(chalk.red(` ā Source type '${source.type}' not yet supported`));
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
// Create overall metadata
|
|
215
|
-
const overallMetadata = {
|
|
216
|
-
docset_id: docsetId,
|
|
217
|
-
docset_name: docset.name,
|
|
218
|
-
initialized_at: new Date().toISOString(),
|
|
219
|
-
total_files: totalFiles,
|
|
220
|
-
sources_count: docset.sources.length,
|
|
221
|
-
};
|
|
222
|
-
await fs.writeFile(path.join(localPath, ".agentic-metadata.json"), JSON.stringify(overallMetadata, null, 2));
|
|
223
42
|
// Update configuration with discovered paths (only if --discover-paths flag used)
|
|
224
|
-
|
|
43
|
+
const allFiles = result.sourceResults.flatMap((r) => r.files);
|
|
44
|
+
if (allFiles.length > 0 && options.discoverPaths) {
|
|
225
45
|
console.log(chalk.yellow(`\nš Discovering directory patterns from extracted files...`));
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
console.log(chalk.gray(` Found ${allDiscoveredPaths.length} files ā ${directoryPatterns.length} patterns`));
|
|
46
|
+
const directoryPatterns = discoverDirectoryPatterns(allFiles);
|
|
47
|
+
console.log(chalk.gray(` Found ${allFiles.length} files ā ${directoryPatterns.length} patterns`));
|
|
229
48
|
try {
|
|
230
49
|
await configManager.updateDocsetPaths(docsetId, directoryPatterns);
|
|
231
50
|
console.log(chalk.green(` ā
Updated config with discovered patterns: ${directoryPatterns.slice(0, 5).join(", ")}${directoryPatterns.length > 5 ? "..." : ""}`));
|
|
@@ -235,8 +54,8 @@ export const initCommand = new Command("init")
|
|
|
235
54
|
}
|
|
236
55
|
}
|
|
237
56
|
console.log(chalk.green(`\nš Successfully initialized docset '${docsetId}'`));
|
|
238
|
-
console.log(chalk.gray(`š Location: ${localPath}`));
|
|
239
|
-
console.log(chalk.gray(`š Total files: ${totalFiles}`));
|
|
57
|
+
console.log(chalk.gray(`š Location: ${result.localPath}`));
|
|
58
|
+
console.log(chalk.gray(`š Total files: ${result.totalFiles}`));
|
|
240
59
|
console.log(chalk.gray(`š Sources processed: ${docset.sources.length}`));
|
|
241
60
|
}
|
|
242
61
|
catch (error) {
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Docset initialization logic shared between CLI and MCP server.
|
|
3
|
+
*/
|
|
4
|
+
import { type DocsetConfig } from "@codemcp/knowledge-core";
|
|
5
|
+
export interface SourceResult {
|
|
6
|
+
index: number;
|
|
7
|
+
type: string;
|
|
8
|
+
filesCount: number;
|
|
9
|
+
files: string[];
|
|
10
|
+
message: string;
|
|
11
|
+
contentHash?: string;
|
|
12
|
+
}
|
|
13
|
+
export interface InitDocsetResult {
|
|
14
|
+
localPath: string;
|
|
15
|
+
totalFiles: number;
|
|
16
|
+
sourceResults: SourceResult[];
|
|
17
|
+
/** True when already initialized and force was not set */
|
|
18
|
+
alreadyInitialized: boolean;
|
|
19
|
+
}
|
|
20
|
+
export interface InitDocsetOptions {
|
|
21
|
+
force?: boolean;
|
|
22
|
+
/** Called after each source is processed so callers can show progress */
|
|
23
|
+
onSourceProgress?: (result: SourceResult) => void;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Download / symlink all sources for a docset and write metadata files.
|
|
27
|
+
* Does not load config or produce console output ā callers handle both.
|
|
28
|
+
*/
|
|
29
|
+
export declare function initDocset(docsetId: string, docset: DocsetConfig, configPath: string, options?: InitDocsetOptions): Promise<InitDocsetResult>;
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Docset initialization logic shared between CLI and MCP server.
|
|
3
|
+
*/
|
|
4
|
+
import { promises as fs } from "node:fs";
|
|
5
|
+
import { existsSync } from "node:fs";
|
|
6
|
+
import * as path from "node:path";
|
|
7
|
+
import { calculateLocalPath, safelyClearDirectory, createSymlinks, } from "@codemcp/knowledge-core";
|
|
8
|
+
import { GitRepoLoader } from "./content/git-repo-loader.js";
|
|
9
|
+
import { ArchiveLoader } from "./content/archive-loader.js";
|
|
10
|
+
import { WebSourceType } from "./types.js";
|
|
11
|
+
/**
|
|
12
|
+
* Download / symlink all sources for a docset and write metadata files.
|
|
13
|
+
* Does not load config or produce console output ā callers handle both.
|
|
14
|
+
*/
|
|
15
|
+
export async function initDocset(docsetId, docset, configPath, options = {}) {
|
|
16
|
+
const { force = false, onSourceProgress } = options;
|
|
17
|
+
if (!docset.sources || docset.sources.length === 0) {
|
|
18
|
+
throw new Error(`Docset '${docsetId}' has no sources configured`);
|
|
19
|
+
}
|
|
20
|
+
const localPath = calculateLocalPath(docset, configPath);
|
|
21
|
+
let existsAlready = false;
|
|
22
|
+
try {
|
|
23
|
+
const stat = await fs.stat(localPath);
|
|
24
|
+
if (stat.isDirectory())
|
|
25
|
+
existsAlready = true;
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
// not yet created
|
|
29
|
+
}
|
|
30
|
+
if (existsAlready && !force) {
|
|
31
|
+
const metadataPath = path.join(localPath, ".agentic-metadata.json");
|
|
32
|
+
if (existsSync(metadataPath)) {
|
|
33
|
+
return {
|
|
34
|
+
localPath,
|
|
35
|
+
totalFiles: 0,
|
|
36
|
+
sourceResults: [],
|
|
37
|
+
alreadyInitialized: true,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (existsAlready && force) {
|
|
42
|
+
await safelyClearDirectory(localPath);
|
|
43
|
+
}
|
|
44
|
+
await fs.mkdir(localPath, { recursive: true });
|
|
45
|
+
const configDir = path.dirname(configPath);
|
|
46
|
+
const projectRoot = path.dirname(configDir);
|
|
47
|
+
let totalFiles = 0;
|
|
48
|
+
const sourceResults = [];
|
|
49
|
+
for (const [index, source] of docset.sources.entries()) {
|
|
50
|
+
let result;
|
|
51
|
+
if (source.type === "git_repo") {
|
|
52
|
+
const loader = new GitRepoLoader();
|
|
53
|
+
const webSourceConfig = {
|
|
54
|
+
url: source.url,
|
|
55
|
+
type: WebSourceType.GIT_REPO,
|
|
56
|
+
options: {
|
|
57
|
+
branch: source.branch || "main",
|
|
58
|
+
paths: source.paths || [],
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
const validation = loader.validateConfig(webSourceConfig);
|
|
62
|
+
if (validation !== true) {
|
|
63
|
+
throw new Error(`Invalid Git repository configuration: ${validation}`);
|
|
64
|
+
}
|
|
65
|
+
const loadResult = await loader.load(webSourceConfig, localPath);
|
|
66
|
+
if (!loadResult.success) {
|
|
67
|
+
throw new Error(`Git repository loading failed: ${loadResult.error}`);
|
|
68
|
+
}
|
|
69
|
+
result = {
|
|
70
|
+
index,
|
|
71
|
+
type: "git_repo",
|
|
72
|
+
filesCount: loadResult.files.length,
|
|
73
|
+
files: loadResult.files,
|
|
74
|
+
message: `${loadResult.files.length} files loaded from ${source.url}`,
|
|
75
|
+
contentHash: loadResult.contentHash,
|
|
76
|
+
};
|
|
77
|
+
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify({
|
|
78
|
+
source_url: source.url,
|
|
79
|
+
source_type: source.type,
|
|
80
|
+
downloaded_at: new Date().toISOString(),
|
|
81
|
+
files_count: loadResult.files.length,
|
|
82
|
+
files: loadResult.files,
|
|
83
|
+
docset_id: docsetId,
|
|
84
|
+
content_hash: loadResult.contentHash,
|
|
85
|
+
}, null, 2));
|
|
86
|
+
}
|
|
87
|
+
else if (source.type === "local_folder") {
|
|
88
|
+
if (!source.paths || source.paths.length === 0) {
|
|
89
|
+
throw new Error(`Local folder source ${index + 1} has no paths configured`);
|
|
90
|
+
}
|
|
91
|
+
const validatedPaths = [];
|
|
92
|
+
for (const sourcePath of source.paths) {
|
|
93
|
+
const absolutePath = path.isAbsolute(sourcePath)
|
|
94
|
+
? sourcePath
|
|
95
|
+
: path.resolve(projectRoot, sourcePath);
|
|
96
|
+
try {
|
|
97
|
+
const stat = await fs.stat(absolutePath);
|
|
98
|
+
if (!stat.isDirectory()) {
|
|
99
|
+
throw new Error(`Path is not a directory: ${sourcePath}`);
|
|
100
|
+
}
|
|
101
|
+
validatedPaths.push(sourcePath);
|
|
102
|
+
}
|
|
103
|
+
catch {
|
|
104
|
+
throw new Error(`Local folder path does not exist: ${sourcePath}`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
await createSymlinks(validatedPaths, localPath, projectRoot);
|
|
108
|
+
let fileCount = 0;
|
|
109
|
+
const files = [];
|
|
110
|
+
async function countFiles(dir) {
|
|
111
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
112
|
+
for (const entry of entries) {
|
|
113
|
+
const fullPath = path.join(dir, entry.name);
|
|
114
|
+
if (entry.isDirectory()) {
|
|
115
|
+
await countFiles(fullPath);
|
|
116
|
+
}
|
|
117
|
+
else if (entry.isFile()) {
|
|
118
|
+
fileCount++;
|
|
119
|
+
files.push(path.relative(localPath, fullPath));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
await countFiles(localPath);
|
|
124
|
+
result = {
|
|
125
|
+
index,
|
|
126
|
+
type: "local_folder",
|
|
127
|
+
filesCount: fileCount,
|
|
128
|
+
files,
|
|
129
|
+
message: `${validatedPaths.length} symlink(s) created, ${fileCount} files accessible`,
|
|
130
|
+
};
|
|
131
|
+
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify({
|
|
132
|
+
source_paths: validatedPaths,
|
|
133
|
+
source_type: source.type,
|
|
134
|
+
initialized_at: new Date().toISOString(),
|
|
135
|
+
files_count: fileCount,
|
|
136
|
+
files,
|
|
137
|
+
docset_id: docsetId,
|
|
138
|
+
}, null, 2));
|
|
139
|
+
}
|
|
140
|
+
else if (source.type === "archive") {
|
|
141
|
+
const loader = new ArchiveLoader();
|
|
142
|
+
const sourceUrl = source.url || source.path || "";
|
|
143
|
+
const webSourceConfig = {
|
|
144
|
+
url: sourceUrl,
|
|
145
|
+
type: WebSourceType.ARCHIVE,
|
|
146
|
+
options: { paths: source.paths || [] },
|
|
147
|
+
};
|
|
148
|
+
const validation = loader.validateConfig(webSourceConfig);
|
|
149
|
+
if (validation !== true) {
|
|
150
|
+
throw new Error(`Invalid archive source configuration: ${validation}`);
|
|
151
|
+
}
|
|
152
|
+
const loadResult = await loader.load(webSourceConfig, localPath);
|
|
153
|
+
if (!loadResult.success) {
|
|
154
|
+
throw new Error(`Archive loading failed: ${loadResult.error}`);
|
|
155
|
+
}
|
|
156
|
+
result = {
|
|
157
|
+
index,
|
|
158
|
+
type: "archive",
|
|
159
|
+
filesCount: loadResult.files.length,
|
|
160
|
+
files: loadResult.files,
|
|
161
|
+
message: `${loadResult.files.length} files extracted from ${sourceUrl}`,
|
|
162
|
+
contentHash: loadResult.contentHash,
|
|
163
|
+
};
|
|
164
|
+
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify({
|
|
165
|
+
source_url: sourceUrl,
|
|
166
|
+
source_type: source.type,
|
|
167
|
+
downloaded_at: new Date().toISOString(),
|
|
168
|
+
files_count: loadResult.files.length,
|
|
169
|
+
files: loadResult.files,
|
|
170
|
+
docset_id: docsetId,
|
|
171
|
+
content_hash: loadResult.contentHash,
|
|
172
|
+
}, null, 2));
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
result = {
|
|
176
|
+
index,
|
|
177
|
+
type: source.type,
|
|
178
|
+
filesCount: 0,
|
|
179
|
+
files: [],
|
|
180
|
+
message: `source type '${source.type}' not supported, skipped`,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
totalFiles += result.filesCount;
|
|
184
|
+
sourceResults.push(result);
|
|
185
|
+
onSourceProgress?.(result);
|
|
186
|
+
}
|
|
187
|
+
// Write the overall metadata file ā this is what search_docs checks for
|
|
188
|
+
await fs.writeFile(path.join(localPath, ".agentic-metadata.json"), JSON.stringify({
|
|
189
|
+
docset_id: docsetId,
|
|
190
|
+
docset_name: docset.name,
|
|
191
|
+
initialized_at: new Date().toISOString(),
|
|
192
|
+
total_files: totalFiles,
|
|
193
|
+
sources_count: docset.sources.length,
|
|
194
|
+
}, null, 2));
|
|
195
|
+
return { localPath, totalFiles, sourceResults, alreadyInitialized: false };
|
|
196
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codemcp/knowledge-content-loader",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "Web content loading and metadata management for agentic knowledge system",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"typecheck": "tsc --noEmit"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
+
"@codemcp/knowledge-core": "workspace:*",
|
|
32
33
|
"adm-zip": "0.5.16",
|
|
33
34
|
"simple-git": "^3.22.0",
|
|
34
35
|
"tar": "7.5.9"
|
|
@@ -13,3 +13,4 @@ export { createSymlinks, removeSymlinks } from "./paths/symlinks.js";
|
|
|
13
13
|
export { discoverDirectoryPatterns, discoverMinimalPatterns, } from "./paths/discovery.js";
|
|
14
14
|
export { safelyClearDirectory, containsSymlinks, getDirectoryInfo, } from "./paths/cleanup.js";
|
|
15
15
|
export { processTemplate, getEffectiveTemplate, validateTemplate, extractVariables, createTemplateContext, createStructuredResponse, } from "./templates/processor.js";
|
|
16
|
+
export { buildFileIndex, searchDocset, formatSearchResult, type DocsetIndex, } from "./search/searcher.js";
|
|
@@ -20,3 +20,5 @@ export { discoverDirectoryPatterns, discoverMinimalPatterns, } from "./paths/dis
|
|
|
20
20
|
export { safelyClearDirectory, containsSymlinks, getDirectoryInfo, } from "./paths/cleanup.js";
|
|
21
21
|
// Export template processing
|
|
22
22
|
export { processTemplate, getEffectiveTemplate, validateTemplate, extractVariables, createTemplateContext, createStructuredResponse, } from "./templates/processor.js";
|
|
23
|
+
// Export search functionality
|
|
24
|
+
export { buildFileIndex, searchDocset, formatSearchResult, } from "./search/searcher.js";
|
|
@@ -12,32 +12,31 @@ import { KnowledgeError, ErrorType } from "../types.js";
|
|
|
12
12
|
*/
|
|
13
13
|
export async function createSymlinks(sourcePaths, targetDir, projectRoot) {
|
|
14
14
|
try {
|
|
15
|
-
// Ensure target directory exists
|
|
16
15
|
await fs.mkdir(targetDir, { recursive: true });
|
|
17
16
|
for (const sourcePath of sourcePaths) {
|
|
18
|
-
// Resolve source path to absolute
|
|
19
17
|
const absoluteSourcePath = isAbsolute(sourcePath)
|
|
20
18
|
? sourcePath
|
|
21
19
|
: resolve(projectRoot, sourcePath);
|
|
22
|
-
// Check if source exists
|
|
23
20
|
try {
|
|
24
21
|
await fs.access(absoluteSourcePath);
|
|
25
22
|
}
|
|
26
23
|
catch {
|
|
27
24
|
throw new Error(`Source path does not exist: ${absoluteSourcePath}`);
|
|
28
25
|
}
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
26
|
+
// Link each entry inside the source directory directly into targetDir
|
|
27
|
+
// so that the docset root is flat ā consistent with git_repo / archive.
|
|
28
|
+
const entries = await fs.readdir(absoluteSourcePath);
|
|
29
|
+
for (const entry of entries) {
|
|
30
|
+
const symlinkPath = join(targetDir, entry);
|
|
31
|
+
const entryAbsPath = join(absoluteSourcePath, entry);
|
|
32
|
+
try {
|
|
33
|
+
await fs.unlink(symlinkPath);
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
// ignore ā entry doesn't exist yet
|
|
37
|
+
}
|
|
38
|
+
await fs.symlink(entryAbsPath, symlinkPath);
|
|
38
39
|
}
|
|
39
|
-
// Create symlink
|
|
40
|
-
await fs.symlink(absoluteSourcePath, symlinkPath);
|
|
41
40
|
}
|
|
42
41
|
}
|
|
43
42
|
catch (error) {
|
|
@@ -55,7 +54,6 @@ export async function validateSymlinks(targetDir) {
|
|
|
55
54
|
for (const entry of entries) {
|
|
56
55
|
if (entry.isSymbolicLink()) {
|
|
57
56
|
const symlinkPath = join(targetDir, entry.name);
|
|
58
|
-
// Check if symlink target exists
|
|
59
57
|
try {
|
|
60
58
|
await fs.access(symlinkPath);
|
|
61
59
|
}
|