agentic-knowledge-mcp 1.0.16 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -7
- package/packages/cli/dist/commands/init.js +39 -1
- package/packages/cli/dist/commands/refresh.js +76 -0
- package/packages/cli/package.json +7 -5
- package/packages/content-loader/dist/content/file-filter.d.ts +15 -0
- package/packages/content-loader/dist/content/file-filter.js +80 -0
- package/packages/content-loader/dist/content/git-repo-loader.d.ts +0 -12
- package/packages/content-loader/dist/content/git-repo-loader.js +2 -77
- package/packages/content-loader/dist/content/index.d.ts +2 -0
- package/packages/content-loader/dist/content/index.js +2 -0
- package/packages/content-loader/dist/content/zip-loader.d.ts +79 -0
- package/packages/content-loader/dist/content/zip-loader.js +348 -0
- package/packages/content-loader/dist/types.d.ts +12 -3
- package/packages/content-loader/dist/types.js +2 -0
- package/packages/content-loader/package.json +5 -1
- package/packages/core/dist/config/loader.js +24 -0
- package/packages/core/dist/paths/calculator.js +8 -0
- package/packages/core/dist/types.d.ts +13 -1
- package/packages/core/package.json +3 -1
- package/packages/mcp-server/package.json +7 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentic-knowledge-mcp",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.18",
|
|
4
4
|
"description": "A Model Context Protocol server for agentic knowledge guidance with web-based documentation loading and intelligent search instructions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "packages/cli/dist/index.js",
|
|
@@ -23,17 +23,18 @@
|
|
|
23
23
|
"url": "git+https://github.com/mrsimpson/agentic-knowledge-mcp.git"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
|
-
"@codemcp/knowledge-core": "1.0.15",
|
|
27
|
-
"@codemcp/knowledge-content-loader": "1.0.15",
|
|
28
|
-
"@codemcp/knowledge": "1.0.15",
|
|
29
26
|
"@modelcontextprotocol/sdk": "^1.19.1",
|
|
30
27
|
"@types/js-yaml": "4.0.9",
|
|
31
|
-
"js-yaml": "4.1.0",
|
|
32
|
-
"commander": "^12.0.0",
|
|
33
28
|
"chalk": "^5.3.0",
|
|
34
|
-
"
|
|
29
|
+
"commander": "^12.0.0",
|
|
30
|
+
"js-yaml": "4.1.0",
|
|
31
|
+
"ora": "^8.0.1",
|
|
32
|
+
"@codemcp/knowledge-content-loader": "1.0.18",
|
|
33
|
+
"@codemcp/knowledge": "1.0.18",
|
|
34
|
+
"@codemcp/knowledge-core": "1.0.18"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
|
+
"@eslint/js": "^9.34.0",
|
|
37
38
|
"@modelcontextprotocol/inspector": "0.16.8",
|
|
38
39
|
"@tsconfig/node22": "22.0.2",
|
|
39
40
|
"@tsconfig/strictest": "2.0.5",
|
|
@@ -6,7 +6,7 @@ import chalk from "chalk";
|
|
|
6
6
|
import { promises as fs } from "node:fs";
|
|
7
7
|
import * as path from "node:path";
|
|
8
8
|
import { ConfigManager, calculateLocalPath, ensureKnowledgeGitignoreSync, discoverDirectoryPatterns, safelyClearDirectory, getDirectoryInfo, } from "@codemcp/knowledge-core";
|
|
9
|
-
import { GitRepoLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
|
|
9
|
+
import { GitRepoLoader, ZipLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
|
|
10
10
|
export const initCommand = new Command("init")
|
|
11
11
|
.description("Initialize sources for a docset from configuration")
|
|
12
12
|
.argument("<docset-id>", "ID of the docset to initialize")
|
|
@@ -169,6 +169,44 @@ export const initCommand = new Command("init")
|
|
|
169
169
|
};
|
|
170
170
|
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
|
|
171
171
|
}
|
|
172
|
+
else if (source.type === "zip") {
|
|
173
|
+
// Handle zip file initialization
|
|
174
|
+
const loader = new ZipLoader();
|
|
175
|
+
const sourceUrl = source.url || source.path || "";
|
|
176
|
+
console.log(chalk.gray(` Using ZipLoader for zip extraction`));
|
|
177
|
+
const webSourceConfig = {
|
|
178
|
+
url: sourceUrl,
|
|
179
|
+
type: WebSourceType.ZIP,
|
|
180
|
+
options: {
|
|
181
|
+
paths: source.paths || [],
|
|
182
|
+
},
|
|
183
|
+
};
|
|
184
|
+
// Validate configuration
|
|
185
|
+
const validation = loader.validateConfig(webSourceConfig);
|
|
186
|
+
if (validation !== true) {
|
|
187
|
+
throw new Error(`Invalid zip source configuration: ${validation}`);
|
|
188
|
+
}
|
|
189
|
+
// Load content using ZipLoader
|
|
190
|
+
const result = await loader.load(webSourceConfig, localPath);
|
|
191
|
+
if (!result.success) {
|
|
192
|
+
throw new Error(`Zip loading failed: ${result.error}`);
|
|
193
|
+
}
|
|
194
|
+
// Collect discovered paths for config update
|
|
195
|
+
allDiscoveredPaths.push(...result.files);
|
|
196
|
+
totalFiles += result.files.length;
|
|
197
|
+
console.log(chalk.green(` ✅ Extracted ${result.files.length} files from zip`));
|
|
198
|
+
// Create source metadata
|
|
199
|
+
const metadata = {
|
|
200
|
+
source_url: sourceUrl,
|
|
201
|
+
source_type: source.type,
|
|
202
|
+
downloaded_at: new Date().toISOString(),
|
|
203
|
+
files_count: result.files.length,
|
|
204
|
+
files: result.files,
|
|
205
|
+
docset_id: docsetId,
|
|
206
|
+
content_hash: result.contentHash,
|
|
207
|
+
};
|
|
208
|
+
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
|
|
209
|
+
}
|
|
172
210
|
else {
|
|
173
211
|
console.log(chalk.red(` ❌ Source type '${source.type}' not yet supported`));
|
|
174
212
|
}
|
|
@@ -8,6 +8,7 @@ import { promises as fs } from "node:fs";
|
|
|
8
8
|
import * as path from "node:path";
|
|
9
9
|
import { execSync } from "node:child_process";
|
|
10
10
|
import { findConfigPathSync, loadConfigSync, calculateLocalPath, ensureKnowledgeGitignoreSync, } from "@codemcp/knowledge-core";
|
|
11
|
+
import { ZipLoader, WebSourceType } from "@codemcp/knowledge-content-loader";
|
|
11
12
|
export const refreshCommand = new Command("refresh")
|
|
12
13
|
.description("Refresh sources for docsets")
|
|
13
14
|
.argument("[docset-id]", "ID of specific docset to refresh (refresh all if not specified)")
|
|
@@ -95,6 +96,11 @@ async function refreshDocset(docset, configPath, force) {
|
|
|
95
96
|
totalFiles += sourceFiles.files_count;
|
|
96
97
|
refreshedSources.push(sourceFiles);
|
|
97
98
|
}
|
|
99
|
+
else if (source.type === "zip") {
|
|
100
|
+
const sourceFiles = await refreshZipSource(source, localPath, index, docset.id, force);
|
|
101
|
+
totalFiles += sourceFiles.files_count;
|
|
102
|
+
refreshedSources.push(sourceFiles);
|
|
103
|
+
}
|
|
98
104
|
else {
|
|
99
105
|
console.log(chalk.yellow(` ⚠️ Source type '${source.type}' not yet supported, skipping`));
|
|
100
106
|
}
|
|
@@ -238,6 +244,76 @@ async function refreshGitSource(webSource, localPath, index, docsetId, force) {
|
|
|
238
244
|
await fs.rm(tempDir, { recursive: true, force: true });
|
|
239
245
|
}
|
|
240
246
|
}
|
|
247
|
+
async function refreshZipSource(source, localPath, index, docsetId, force) {
|
|
248
|
+
const sourceMetadataPath = path.join(localPath, `.agentic-source-${index}.json`);
|
|
249
|
+
let existingSourceMetadata = null;
|
|
250
|
+
try {
|
|
251
|
+
const content = await fs.readFile(sourceMetadataPath, "utf8");
|
|
252
|
+
existingSourceMetadata = JSON.parse(content);
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
255
|
+
// No existing metadata, will do full refresh
|
|
256
|
+
}
|
|
257
|
+
const sourceUrl = source.url || source.path || "";
|
|
258
|
+
const loader = new ZipLoader();
|
|
259
|
+
const webSourceConfig = {
|
|
260
|
+
url: sourceUrl,
|
|
261
|
+
type: WebSourceType.ZIP,
|
|
262
|
+
options: {
|
|
263
|
+
paths: source.paths || [],
|
|
264
|
+
},
|
|
265
|
+
};
|
|
266
|
+
// Check if content has changed
|
|
267
|
+
if (!force && existingSourceMetadata) {
|
|
268
|
+
try {
|
|
269
|
+
const currentId = await loader.getContentId(webSourceConfig);
|
|
270
|
+
const lastHash = existingSourceMetadata.content_hash;
|
|
271
|
+
if (lastHash === currentId) {
|
|
272
|
+
const updatedMetadata = {
|
|
273
|
+
...existingSourceMetadata,
|
|
274
|
+
downloaded_at: new Date().toISOString(),
|
|
275
|
+
};
|
|
276
|
+
await fs.writeFile(sourceMetadataPath, JSON.stringify(updatedMetadata, null, 2));
|
|
277
|
+
return updatedMetadata;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
catch {
|
|
281
|
+
// Could not check, proceed with full refresh
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
// Remove old files from this source (if we have metadata)
|
|
285
|
+
if (existingSourceMetadata) {
|
|
286
|
+
for (const file of existingSourceMetadata.files) {
|
|
287
|
+
const filePath = path.join(localPath, file);
|
|
288
|
+
try {
|
|
289
|
+
await fs.unlink(filePath);
|
|
290
|
+
}
|
|
291
|
+
catch {
|
|
292
|
+
// File might already be deleted, ignore
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
// Load content
|
|
297
|
+
const result = await loader.load(webSourceConfig, localPath);
|
|
298
|
+
if (!result.success) {
|
|
299
|
+
throw new Error(`Zip refresh failed: ${result.error}`);
|
|
300
|
+
}
|
|
301
|
+
const metadata = {
|
|
302
|
+
source_url: sourceUrl,
|
|
303
|
+
source_type: "zip",
|
|
304
|
+
downloaded_at: new Date().toISOString(),
|
|
305
|
+
files_count: result.files.length,
|
|
306
|
+
files: result.files,
|
|
307
|
+
docset_id: docsetId,
|
|
308
|
+
};
|
|
309
|
+
// Store content hash for future change detection
|
|
310
|
+
const metadataWithHash = {
|
|
311
|
+
...metadata,
|
|
312
|
+
content_hash: result.contentHash,
|
|
313
|
+
};
|
|
314
|
+
await fs.writeFile(sourceMetadataPath, JSON.stringify(metadataWithHash, null, 2));
|
|
315
|
+
return metadata;
|
|
316
|
+
}
|
|
241
317
|
// Reuse utility functions from init.ts
|
|
242
318
|
async function findMarkdownFiles(dir) {
|
|
243
319
|
const files = [];
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codemcp/knowledge-cli",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.18",
|
|
4
4
|
"description": "Command-line interface for agentic knowledge web content management",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/exports.js",
|
|
@@ -32,15 +32,17 @@
|
|
|
32
32
|
"typecheck": "tsc --noEmit"
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"@codemcp/knowledge
|
|
36
|
-
"@codemcp/knowledge-content-loader": "
|
|
37
|
-
"@codemcp/knowledge": "
|
|
38
|
-
"commander": "^12.0.0",
|
|
35
|
+
"@codemcp/knowledge": "workspace:*",
|
|
36
|
+
"@codemcp/knowledge-content-loader": "workspace:*",
|
|
37
|
+
"@codemcp/knowledge-core": "workspace:*",
|
|
39
38
|
"chalk": "^5.3.0",
|
|
39
|
+
"commander": "^12.0.0",
|
|
40
40
|
"ora": "^8.0.1"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
|
+
"@eslint/js": "^9.34.0",
|
|
43
44
|
"@types/node": "^24.3.0",
|
|
45
|
+
"eslint": "^9.34.0",
|
|
44
46
|
"rimraf": "^6.0.1",
|
|
45
47
|
"typescript": "^5.9.2",
|
|
46
48
|
"vitest": "^3.2.4"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared file filtering utilities for documentation content extraction (REQ-18)
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Determine if a file is considered documentation content (REQ-18)
|
|
6
|
+
* @param filePath - Path to the file to check
|
|
7
|
+
* @returns True if file should be included as documentation
|
|
8
|
+
*/
|
|
9
|
+
export declare function isDocumentationFile(filePath: string): boolean;
|
|
10
|
+
/**
|
|
11
|
+
* Filter list of files to only include documentation-relevant files (REQ-18)
|
|
12
|
+
* @param files - Array of file paths to filter
|
|
13
|
+
* @returns Array of file paths that are considered documentation
|
|
14
|
+
*/
|
|
15
|
+
export declare function filterDocumentationFiles(files: string[]): string[];
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared file filtering utilities for documentation content extraction (REQ-18)
|
|
3
|
+
*/
|
|
4
|
+
import * as path from "node:path";
|
|
5
|
+
/**
|
|
6
|
+
* Determine if a file is considered documentation content (REQ-18)
|
|
7
|
+
* @param filePath - Path to the file to check
|
|
8
|
+
* @returns True if file should be included as documentation
|
|
9
|
+
*/
|
|
10
|
+
export function isDocumentationFile(filePath) {
|
|
11
|
+
const filename = path.basename(filePath);
|
|
12
|
+
const extension = path.extname(filePath).toLowerCase();
|
|
13
|
+
const directory = path.dirname(filePath);
|
|
14
|
+
// Exclude project metadata files (REQ-18)
|
|
15
|
+
const metadataFiles = /^(CHANGELOG|LICENSE|CONTRIBUTING|AUTHORS|CODE_OF_CONDUCT)/i;
|
|
16
|
+
if (metadataFiles.test(filename)) {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
// Normalize directory path for consistent matching (use forward slashes)
|
|
20
|
+
const normalizedDir = directory.split(path.sep).join("/");
|
|
21
|
+
const pathParts = normalizedDir.split("/");
|
|
22
|
+
// Exclude build, dependency, and development directories (REQ-18)
|
|
23
|
+
// Use exact directory name matching, not substring matching
|
|
24
|
+
const excludedDirs = [
|
|
25
|
+
"node_modules",
|
|
26
|
+
"vendor",
|
|
27
|
+
".git",
|
|
28
|
+
"build",
|
|
29
|
+
"dist",
|
|
30
|
+
"target",
|
|
31
|
+
".cache",
|
|
32
|
+
"__tests__",
|
|
33
|
+
"test",
|
|
34
|
+
"tests",
|
|
35
|
+
".github",
|
|
36
|
+
".vscode",
|
|
37
|
+
".idea",
|
|
38
|
+
];
|
|
39
|
+
// Check if any path segment matches excluded directories
|
|
40
|
+
for (const excludedDir of excludedDirs) {
|
|
41
|
+
if (pathParts.includes(excludedDir)) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// Include README files anywhere (REQ-18)
|
|
46
|
+
if (/^README/i.test(filename)) {
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
// Include documentation file extensions anywhere, regardless of directory (REQ-18)
|
|
50
|
+
const docExtensions = [".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc"];
|
|
51
|
+
if (docExtensions.includes(extension)) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
// Special case: examples/samples directory - include ALL file types (Issue #12)
|
|
55
|
+
// These directories contain code that demonstrates usage patterns
|
|
56
|
+
const isInExamples = /\b(examples?|samples?)\b/i.test(directory);
|
|
57
|
+
if (isInExamples) {
|
|
58
|
+
// In examples/samples, exclude only binary files
|
|
59
|
+
const excludedInExamples = [
|
|
60
|
+
".exe",
|
|
61
|
+
".bin",
|
|
62
|
+
".so",
|
|
63
|
+
".dll",
|
|
64
|
+
".dylib",
|
|
65
|
+
".a",
|
|
66
|
+
".o",
|
|
67
|
+
".obj",
|
|
68
|
+
];
|
|
69
|
+
return !excludedInExamples.includes(extension);
|
|
70
|
+
}
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Filter list of files to only include documentation-relevant files (REQ-18)
|
|
75
|
+
* @param files - Array of file paths to filter
|
|
76
|
+
* @returns Array of file paths that are considered documentation
|
|
77
|
+
*/
|
|
78
|
+
export function filterDocumentationFiles(files) {
|
|
79
|
+
return files.filter((file) => isDocumentationFile(file));
|
|
80
|
+
}
|
|
@@ -51,18 +51,6 @@ export declare class GitRepoLoader extends ContentLoader {
|
|
|
51
51
|
* Clean up temporary directory
|
|
52
52
|
*/
|
|
53
53
|
private cleanupTempDirectory;
|
|
54
|
-
/**
|
|
55
|
-
* Filter list of files to only include documentation-relevant files (REQ-18)
|
|
56
|
-
* @param files - Array of file paths to filter
|
|
57
|
-
* @returns Array of file paths that are considered documentation
|
|
58
|
-
*/
|
|
59
|
-
private filterDocumentationFiles;
|
|
60
|
-
/**
|
|
61
|
-
* Determine if a file is considered documentation content (REQ-18)
|
|
62
|
-
* @param filePath - Path to the file to check
|
|
63
|
-
* @returns True if file should be included as documentation
|
|
64
|
-
*/
|
|
65
|
-
private isDocumentationFile;
|
|
66
54
|
/**
|
|
67
55
|
* Extract only documentation files from source directory (REQ-18)
|
|
68
56
|
* @param sourceDir - Source directory to scan
|
|
@@ -7,6 +7,7 @@ import { execSync } from "node:child_process";
|
|
|
7
7
|
import { ContentLoader } from "./loader.js";
|
|
8
8
|
import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
|
|
9
9
|
import * as crypto from "node:crypto";
|
|
10
|
+
import { filterDocumentationFiles } from "./file-filter.js";
|
|
10
11
|
/**
|
|
11
12
|
* Content loader for Git repositories (GitHub, GitLab, any Git repo)
|
|
12
13
|
*/
|
|
@@ -244,82 +245,6 @@ export class GitRepoLoader extends ContentLoader {
|
|
|
244
245
|
console.warn(`Warning: Could not clean up temp directory ${tempDir}: ${error instanceof Error ? error.message : String(error)}`);
|
|
245
246
|
}
|
|
246
247
|
}
|
|
247
|
-
/**
|
|
248
|
-
* Filter list of files to only include documentation-relevant files (REQ-18)
|
|
249
|
-
* @param files - Array of file paths to filter
|
|
250
|
-
* @returns Array of file paths that are considered documentation
|
|
251
|
-
*/
|
|
252
|
-
filterDocumentationFiles(files) {
|
|
253
|
-
return files.filter((file) => this.isDocumentationFile(file));
|
|
254
|
-
}
|
|
255
|
-
/**
|
|
256
|
-
* Determine if a file is considered documentation content (REQ-18)
|
|
257
|
-
* @param filePath - Path to the file to check
|
|
258
|
-
* @returns True if file should be included as documentation
|
|
259
|
-
*/
|
|
260
|
-
isDocumentationFile(filePath) {
|
|
261
|
-
const filename = path.basename(filePath);
|
|
262
|
-
const extension = path.extname(filePath).toLowerCase();
|
|
263
|
-
const directory = path.dirname(filePath);
|
|
264
|
-
// Exclude project metadata files (REQ-18)
|
|
265
|
-
const metadataFiles = /^(CHANGELOG|LICENSE|CONTRIBUTING|AUTHORS|CODE_OF_CONDUCT)/i;
|
|
266
|
-
if (metadataFiles.test(filename)) {
|
|
267
|
-
return false;
|
|
268
|
-
}
|
|
269
|
-
// Normalize directory path for consistent matching (use forward slashes)
|
|
270
|
-
const normalizedDir = directory.split(path.sep).join("/");
|
|
271
|
-
const pathParts = normalizedDir.split("/");
|
|
272
|
-
// Exclude build, dependency, and development directories (REQ-18)
|
|
273
|
-
// Use exact directory name matching, not substring matching
|
|
274
|
-
const excludedDirs = [
|
|
275
|
-
"node_modules",
|
|
276
|
-
"vendor",
|
|
277
|
-
".git",
|
|
278
|
-
"build",
|
|
279
|
-
"dist",
|
|
280
|
-
"target",
|
|
281
|
-
".cache",
|
|
282
|
-
"__tests__",
|
|
283
|
-
"test",
|
|
284
|
-
"tests",
|
|
285
|
-
".github",
|
|
286
|
-
".vscode",
|
|
287
|
-
".idea",
|
|
288
|
-
];
|
|
289
|
-
// Check if any path segment matches excluded directories
|
|
290
|
-
for (const excludedDir of excludedDirs) {
|
|
291
|
-
if (pathParts.includes(excludedDir)) {
|
|
292
|
-
return false;
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
// Include README files anywhere (REQ-18)
|
|
296
|
-
if (/^README/i.test(filename)) {
|
|
297
|
-
return true;
|
|
298
|
-
}
|
|
299
|
-
// Include documentation file extensions anywhere, regardless of directory (REQ-18)
|
|
300
|
-
const docExtensions = [".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc"];
|
|
301
|
-
if (docExtensions.includes(extension)) {
|
|
302
|
-
return true;
|
|
303
|
-
}
|
|
304
|
-
// Special case: examples/samples directory - include ALL file types (Issue #12)
|
|
305
|
-
// These directories contain code that demonstrates usage patterns
|
|
306
|
-
const isInExamples = /\b(examples?|samples?)\b/i.test(directory);
|
|
307
|
-
if (isInExamples) {
|
|
308
|
-
// In examples/samples, exclude only binary files
|
|
309
|
-
const excludedInExamples = [
|
|
310
|
-
".exe",
|
|
311
|
-
".bin",
|
|
312
|
-
".so",
|
|
313
|
-
".dll",
|
|
314
|
-
".dylib",
|
|
315
|
-
".a",
|
|
316
|
-
".o",
|
|
317
|
-
".obj",
|
|
318
|
-
];
|
|
319
|
-
return !excludedInExamples.includes(extension);
|
|
320
|
-
}
|
|
321
|
-
return false;
|
|
322
|
-
}
|
|
323
248
|
/**
|
|
324
249
|
* Extract only documentation files from source directory (REQ-18)
|
|
325
250
|
* @param sourceDir - Source directory to scan
|
|
@@ -330,7 +255,7 @@ export class GitRepoLoader extends ContentLoader {
|
|
|
330
255
|
// First, scan all files in the repository
|
|
331
256
|
const allFiles = await this.scanAllFiles(sourceDir);
|
|
332
257
|
// Filter to only documentation files
|
|
333
|
-
const docFiles =
|
|
258
|
+
const docFiles = filterDocumentationFiles(allFiles);
|
|
334
259
|
// Copy the filtered files
|
|
335
260
|
for (const filePath of docFiles) {
|
|
336
261
|
const relativePath = path.relative(sourceDir, filePath);
|
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export { ContentLoader } from "./loader.js";
|
|
5
5
|
export { GitRepoLoader } from "./git-repo-loader.js";
|
|
6
|
+
export { ZipLoader } from "./zip-loader.js";
|
|
6
7
|
export { DocumentationSiteLoader } from "./documentation-site-loader.js";
|
|
7
8
|
export { ApiDocumentationLoader } from "./api-documentation-loader.js";
|
|
8
9
|
export { ContentProcessor } from "./content-processor.js";
|
|
9
10
|
export { MetadataManager } from "./metadata-manager.js";
|
|
11
|
+
export { isDocumentationFile, filterDocumentationFiles, } from "./file-filter.js";
|
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export { ContentLoader } from "./loader.js";
|
|
5
5
|
export { GitRepoLoader } from "./git-repo-loader.js";
|
|
6
|
+
export { ZipLoader } from "./zip-loader.js";
|
|
6
7
|
export { DocumentationSiteLoader } from "./documentation-site-loader.js";
|
|
7
8
|
export { ApiDocumentationLoader } from "./api-documentation-loader.js";
|
|
8
9
|
export { ContentProcessor } from "./content-processor.js";
|
|
9
10
|
export { MetadataManager } from "./metadata-manager.js";
|
|
11
|
+
export { isDocumentationFile, filterDocumentationFiles, } from "./file-filter.js";
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zip file content loader
|
|
3
|
+
*/
|
|
4
|
+
import { ContentLoader, type LoadResult } from "./loader.js";
|
|
5
|
+
import { WebSourceConfig } from "../types.js";
|
|
6
|
+
/**
|
|
7
|
+
* Content loader for zip files (local or remote)
|
|
8
|
+
*/
|
|
9
|
+
export declare class ZipLoader extends ContentLoader {
|
|
10
|
+
/**
|
|
11
|
+
* Check if this loader can handle the given web source type
|
|
12
|
+
*/
|
|
13
|
+
canHandle(webSource: WebSourceConfig): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Validate the web source configuration
|
|
16
|
+
*/
|
|
17
|
+
validateConfig(webSource: WebSourceConfig): true | string;
|
|
18
|
+
/**
|
|
19
|
+
* Load content from a zip file
|
|
20
|
+
*/
|
|
21
|
+
load(webSource: WebSourceConfig, targetPath: string): Promise<LoadResult>;
|
|
22
|
+
/**
|
|
23
|
+
* Get content identifier for change detection
|
|
24
|
+
*/
|
|
25
|
+
getContentId(webSource: WebSourceConfig): Promise<string>;
|
|
26
|
+
/**
|
|
27
|
+
* Get headers from remote URL using HEAD request
|
|
28
|
+
*/
|
|
29
|
+
private getRemoteHeaders;
|
|
30
|
+
/**
|
|
31
|
+
* Determine if the source is a remote URL or local path
|
|
32
|
+
*/
|
|
33
|
+
private isRemoteUrl;
|
|
34
|
+
/**
|
|
35
|
+
* Resolve the zip file path - download if remote, return as-is if local
|
|
36
|
+
*/
|
|
37
|
+
private resolveZipFile;
|
|
38
|
+
/**
|
|
39
|
+
* Download a zip file from a remote URL
|
|
40
|
+
*/
|
|
41
|
+
private downloadZip;
|
|
42
|
+
/**
|
|
43
|
+
* Extract a zip file to a directory using adm-zip
|
|
44
|
+
*/
|
|
45
|
+
private extractZip;
|
|
46
|
+
/**
|
|
47
|
+
* If the extracted contents have a single root directory and no files at root,
|
|
48
|
+
* move that directory's contents one level up.
|
|
49
|
+
*/
|
|
50
|
+
private flattenSingleRoot;
|
|
51
|
+
/**
|
|
52
|
+
* Extract content from extracted zip to target directory
|
|
53
|
+
*/
|
|
54
|
+
private extractContent;
|
|
55
|
+
/**
|
|
56
|
+
* Extract only documentation files from source directory
|
|
57
|
+
*/
|
|
58
|
+
private extractDocumentationFiles;
|
|
59
|
+
/**
|
|
60
|
+
* Copy directory recursively
|
|
61
|
+
*/
|
|
62
|
+
private copyDirectory;
|
|
63
|
+
/**
|
|
64
|
+
* Recursively scan all files in a directory
|
|
65
|
+
*/
|
|
66
|
+
private scanAllFiles;
|
|
67
|
+
/**
|
|
68
|
+
* Generate content hash for change detection
|
|
69
|
+
*/
|
|
70
|
+
private generateContentHash;
|
|
71
|
+
/**
|
|
72
|
+
* Create a temporary directory
|
|
73
|
+
*/
|
|
74
|
+
private createTempDirectory;
|
|
75
|
+
/**
|
|
76
|
+
* Clean up temporary directory
|
|
77
|
+
*/
|
|
78
|
+
private cleanupTempDirectory;
|
|
79
|
+
}
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zip file content loader
|
|
3
|
+
*/
|
|
4
|
+
import { promises as fs } from "node:fs";
|
|
5
|
+
import * as path from "node:path";
|
|
6
|
+
import * as crypto from "node:crypto";
|
|
7
|
+
import https from "node:https";
|
|
8
|
+
import http from "node:http";
|
|
9
|
+
import AdmZip from "adm-zip";
|
|
10
|
+
import { ContentLoader } from "./loader.js";
|
|
11
|
+
import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
|
|
12
|
+
import { filterDocumentationFiles } from "./file-filter.js";
|
|
13
|
+
/**
|
|
14
|
+
* Content loader for zip files (local or remote)
|
|
15
|
+
*/
|
|
16
|
+
export class ZipLoader extends ContentLoader {
|
|
17
|
+
/**
|
|
18
|
+
* Check if this loader can handle the given web source type
|
|
19
|
+
*/
|
|
20
|
+
canHandle(webSource) {
|
|
21
|
+
return webSource.type === WebSourceType.ZIP;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Validate the web source configuration
|
|
25
|
+
*/
|
|
26
|
+
validateConfig(webSource) {
|
|
27
|
+
if (!webSource.url) {
|
|
28
|
+
return "Zip source must have a URL (remote) or local path";
|
|
29
|
+
}
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Load content from a zip file
|
|
34
|
+
*/
|
|
35
|
+
async load(webSource, targetPath) {
|
|
36
|
+
try {
|
|
37
|
+
const options = webSource.options;
|
|
38
|
+
const tempDir = await this.createTempDirectory();
|
|
39
|
+
try {
|
|
40
|
+
// Get the zip file (download if remote, or use local path)
|
|
41
|
+
const zipFilePath = await this.resolveZipFile(webSource.url, tempDir);
|
|
42
|
+
// Extract to temp directory
|
|
43
|
+
const extractDir = path.join(tempDir, "extracted");
|
|
44
|
+
await fs.mkdir(extractDir, { recursive: true });
|
|
45
|
+
this.extractZip(zipFilePath, extractDir);
|
|
46
|
+
// Flatten single root directory
|
|
47
|
+
await this.flattenSingleRoot(extractDir);
|
|
48
|
+
// Extract specified paths or all documentation content
|
|
49
|
+
const extractedFiles = await this.extractContent(extractDir, targetPath, options?.paths);
|
|
50
|
+
// Generate content hash
|
|
51
|
+
const contentHash = await this.generateContentHash(targetPath, extractedFiles);
|
|
52
|
+
return {
|
|
53
|
+
success: true,
|
|
54
|
+
files: extractedFiles,
|
|
55
|
+
contentHash,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
finally {
|
|
59
|
+
await this.cleanupTempDirectory(tempDir);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
64
|
+
return {
|
|
65
|
+
success: false,
|
|
66
|
+
files: [],
|
|
67
|
+
contentHash: "",
|
|
68
|
+
error: `Zip loading failed: ${errorMessage}`,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Get content identifier for change detection
|
|
74
|
+
*/
|
|
75
|
+
async getContentId(webSource) {
|
|
76
|
+
try {
|
|
77
|
+
if (this.isRemoteUrl(webSource.url)) {
|
|
78
|
+
// For remote URLs, try HEAD request for ETag/Last-Modified
|
|
79
|
+
const headers = await this.getRemoteHeaders(webSource.url);
|
|
80
|
+
const etag = headers["etag"] || "";
|
|
81
|
+
const lastModified = headers["last-modified"] || "";
|
|
82
|
+
const identifier = etag || lastModified || webSource.url;
|
|
83
|
+
return crypto
|
|
84
|
+
.createHash("sha256")
|
|
85
|
+
.update(`${webSource.url}:${identifier}`)
|
|
86
|
+
.digest("hex");
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
// For local files, hash the file content
|
|
90
|
+
const content = await fs.readFile(webSource.url);
|
|
91
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
// Fallback to URL-based hash
|
|
96
|
+
return crypto.createHash("sha256").update(webSource.url).digest("hex");
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Get headers from remote URL using HEAD request
|
|
101
|
+
*/
|
|
102
|
+
getRemoteHeaders(url) {
|
|
103
|
+
return new Promise((resolve, reject) => {
|
|
104
|
+
const protocol = url.startsWith("https") ? https : http;
|
|
105
|
+
const request = protocol.request(url, { method: "HEAD" }, (response) => {
|
|
106
|
+
const headers = {};
|
|
107
|
+
if (response.headers) {
|
|
108
|
+
for (const [key, value] of Object.entries(response.headers)) {
|
|
109
|
+
if (typeof value === "string") {
|
|
110
|
+
headers[key] = value;
|
|
111
|
+
}
|
|
112
|
+
else if (Array.isArray(value) && value.length > 0 && value[0]) {
|
|
113
|
+
headers[key] = value[0];
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
resolve(headers);
|
|
118
|
+
});
|
|
119
|
+
request.on("error", reject);
|
|
120
|
+
request.end();
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Determine if the source is a remote URL or local path
|
|
125
|
+
*/
|
|
126
|
+
isRemoteUrl(url) {
|
|
127
|
+
return url.startsWith("http://") || url.startsWith("https://");
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Resolve the zip file path - download if remote, return as-is if local
|
|
131
|
+
*/
|
|
132
|
+
async resolveZipFile(url, tempDir) {
|
|
133
|
+
if (this.isRemoteUrl(url)) {
|
|
134
|
+
return this.downloadZip(url, tempDir);
|
|
135
|
+
}
|
|
136
|
+
// Local file - verify it exists
|
|
137
|
+
try {
|
|
138
|
+
await fs.access(url);
|
|
139
|
+
return url;
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
throw new WebSourceError(WebSourceErrorType.ZIP_ERROR, `Local zip file not found: ${url}`, { url });
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Download a zip file from a remote URL
|
|
147
|
+
*/
|
|
148
|
+
async downloadZip(url, tempDir) {
|
|
149
|
+
const zipPath = path.join(tempDir, "download.zip");
|
|
150
|
+
return new Promise((resolve, reject) => {
|
|
151
|
+
const protocol = url.startsWith("https") ? https : http;
|
|
152
|
+
const request = protocol.get(url, async (response) => {
|
|
153
|
+
if (response.statusCode === undefined || response.statusCode >= 400) {
|
|
154
|
+
reject(new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`));
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
try {
|
|
158
|
+
const chunks = [];
|
|
159
|
+
response.on("data", (chunk) => {
|
|
160
|
+
chunks.push(chunk);
|
|
161
|
+
});
|
|
162
|
+
response.on("end", async () => {
|
|
163
|
+
try {
|
|
164
|
+
const buffer = Buffer.concat(chunks);
|
|
165
|
+
await fs.writeFile(zipPath, buffer);
|
|
166
|
+
resolve(zipPath);
|
|
167
|
+
}
|
|
168
|
+
catch (error) {
|
|
169
|
+
reject(error);
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
catch (error) {
|
|
174
|
+
reject(error);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
request.on("error", (error) => {
|
|
178
|
+
reject(new WebSourceError(WebSourceErrorType.ZIP_ERROR, `Failed to download zip from ${url}: ${error instanceof Error ? error.message : String(error)}`, { url }));
|
|
179
|
+
});
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Extract a zip file to a directory using adm-zip
|
|
184
|
+
*/
|
|
185
|
+
extractZip(zipPath, targetDir) {
|
|
186
|
+
try {
|
|
187
|
+
const zip = new AdmZip(zipPath);
|
|
188
|
+
zip.extractAllTo(targetDir, true);
|
|
189
|
+
}
|
|
190
|
+
catch (error) {
|
|
191
|
+
throw new WebSourceError(WebSourceErrorType.ZIP_ERROR, `Failed to extract zip: ${error instanceof Error ? error.message : String(error)}`, { zipPath });
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* If the extracted contents have a single root directory and no files at root,
|
|
196
|
+
* move that directory's contents one level up.
|
|
197
|
+
*/
|
|
198
|
+
async flattenSingleRoot(extractDir) {
|
|
199
|
+
const entries = await fs.readdir(extractDir, { withFileTypes: true });
|
|
200
|
+
const directories = entries.filter((e) => e.isDirectory());
|
|
201
|
+
const files = entries.filter((e) => e.isFile());
|
|
202
|
+
if (directories.length === 1 && files.length === 0) {
|
|
203
|
+
const singleDir = path.join(extractDir, directories[0].name);
|
|
204
|
+
const innerEntries = await fs.readdir(singleDir);
|
|
205
|
+
// Move all contents up one level
|
|
206
|
+
for (const entry of innerEntries) {
|
|
207
|
+
const src = path.join(singleDir, entry);
|
|
208
|
+
const dest = path.join(extractDir, entry);
|
|
209
|
+
await fs.rename(src, dest);
|
|
210
|
+
}
|
|
211
|
+
// Remove the now-empty directory
|
|
212
|
+
await fs.rmdir(singleDir);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Extract content from extracted zip to target directory
|
|
217
|
+
*/
|
|
218
|
+
async extractContent(sourceDir, targetDir, paths) {
|
|
219
|
+
await fs.mkdir(targetDir, { recursive: true });
|
|
220
|
+
const extractedFiles = [];
|
|
221
|
+
if (paths && paths.length > 0) {
|
|
222
|
+
// Extract only specified paths
|
|
223
|
+
for (const relPath of paths) {
|
|
224
|
+
const sourcePath = path.join(sourceDir, relPath);
|
|
225
|
+
const targetPath = path.join(targetDir, relPath);
|
|
226
|
+
try {
|
|
227
|
+
const stats = await fs.stat(sourcePath);
|
|
228
|
+
if (stats.isDirectory()) {
|
|
229
|
+
await this.copyDirectory(sourcePath, targetPath, extractedFiles);
|
|
230
|
+
}
|
|
231
|
+
else if (stats.isFile()) {
|
|
232
|
+
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
233
|
+
await fs.copyFile(sourcePath, targetPath);
|
|
234
|
+
extractedFiles.push(relPath);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
catch (error) {
|
|
238
|
+
console.warn(`Warning: Could not extract ${relPath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
// Use smart filtering to extract only documentation files
|
|
244
|
+
await this.extractDocumentationFiles(sourceDir, targetDir, extractedFiles);
|
|
245
|
+
}
|
|
246
|
+
return extractedFiles;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Extract only documentation files from source directory
|
|
250
|
+
*/
|
|
251
|
+
async extractDocumentationFiles(sourceDir, targetDir, extractedFiles) {
|
|
252
|
+
const allFiles = await this.scanAllFiles(sourceDir);
|
|
253
|
+
const docFiles = filterDocumentationFiles(allFiles);
|
|
254
|
+
for (const filePath of docFiles) {
|
|
255
|
+
const relativePath = path.relative(sourceDir, filePath);
|
|
256
|
+
const targetPath = path.join(targetDir, relativePath);
|
|
257
|
+
try {
|
|
258
|
+
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
259
|
+
await fs.copyFile(filePath, targetPath);
|
|
260
|
+
extractedFiles.push(relativePath);
|
|
261
|
+
}
|
|
262
|
+
catch (error) {
|
|
263
|
+
console.warn(`Warning: Could not copy ${relativePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Copy directory recursively
|
|
269
|
+
*/
|
|
270
|
+
async copyDirectory(source, target, fileList) {
|
|
271
|
+
await fs.mkdir(target, { recursive: true });
|
|
272
|
+
const items = await fs.readdir(source);
|
|
273
|
+
for (const item of items) {
|
|
274
|
+
const sourcePath = path.join(source, item);
|
|
275
|
+
const targetPath = path.join(target, item);
|
|
276
|
+
const stats = await fs.stat(sourcePath);
|
|
277
|
+
if (stats.isDirectory()) {
|
|
278
|
+
await this.copyDirectory(sourcePath, targetPath, fileList);
|
|
279
|
+
}
|
|
280
|
+
else {
|
|
281
|
+
await fs.copyFile(sourcePath, targetPath);
|
|
282
|
+
const relativePath = path.relative(target, targetPath);
|
|
283
|
+
fileList.push(relativePath);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Recursively scan all files in a directory
|
|
289
|
+
*/
|
|
290
|
+
async scanAllFiles(dir) {
|
|
291
|
+
const files = [];
|
|
292
|
+
async function scan(currentDir) {
|
|
293
|
+
const items = await fs.readdir(currentDir);
|
|
294
|
+
for (const item of items) {
|
|
295
|
+
if (item === ".git")
|
|
296
|
+
continue;
|
|
297
|
+
const fullPath = path.join(currentDir, item);
|
|
298
|
+
const stat = await fs.stat(fullPath);
|
|
299
|
+
if (stat.isDirectory()) {
|
|
300
|
+
await scan(fullPath);
|
|
301
|
+
}
|
|
302
|
+
else if (stat.isFile()) {
|
|
303
|
+
files.push(fullPath);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
await scan(dir);
|
|
308
|
+
return files;
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Generate content hash for change detection
|
|
312
|
+
*/
|
|
313
|
+
async generateContentHash(targetDir, files) {
|
|
314
|
+
const hash = crypto.createHash("sha256");
|
|
315
|
+
const sortedFiles = files.slice().sort();
|
|
316
|
+
for (const file of sortedFiles) {
|
|
317
|
+
const filePath = path.join(targetDir, file);
|
|
318
|
+
try {
|
|
319
|
+
const content = await fs.readFile(filePath);
|
|
320
|
+
hash.update(file);
|
|
321
|
+
hash.update(content);
|
|
322
|
+
}
|
|
323
|
+
catch (error) {
|
|
324
|
+
console.warn(`Warning: Could not hash ${file}: ${error instanceof Error ? error.message : String(error)}`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return hash.digest("hex");
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Create a temporary directory
|
|
331
|
+
*/
|
|
332
|
+
async createTempDirectory() {
|
|
333
|
+
const tempDir = path.join(process.cwd(), ".tmp", `zip-extract-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
334
|
+
await fs.mkdir(tempDir, { recursive: true });
|
|
335
|
+
return tempDir;
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* Clean up temporary directory
|
|
339
|
+
*/
|
|
340
|
+
async cleanupTempDirectory(tempDir) {
|
|
341
|
+
try {
|
|
342
|
+
await fs.rm(tempDir, { recursive: true, force: true });
|
|
343
|
+
}
|
|
344
|
+
catch (error) {
|
|
345
|
+
console.warn(`Warning: Could not clean up temp directory ${tempDir}: ${error instanceof Error ? error.message : String(error)}`);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
}
|
|
@@ -22,7 +22,8 @@ export interface DocsetConfig {
|
|
|
22
22
|
export declare enum WebSourceType {
|
|
23
23
|
GIT_REPO = "git_repo",
|
|
24
24
|
DOCUMENTATION_SITE = "documentation_site",
|
|
25
|
-
API_DOCUMENTATION = "api_documentation"
|
|
25
|
+
API_DOCUMENTATION = "api_documentation",
|
|
26
|
+
ZIP = "zip"
|
|
26
27
|
}
|
|
27
28
|
/**
|
|
28
29
|
* Configuration for Git repository web sources
|
|
@@ -55,16 +56,23 @@ export interface ApiDocumentationOptions {
|
|
|
55
56
|
/** Packages or modules to include */
|
|
56
57
|
include_packages?: string[];
|
|
57
58
|
}
|
|
59
|
+
/**
|
|
60
|
+
* Configuration for zip file web sources
|
|
61
|
+
*/
|
|
62
|
+
export interface ZipOptions {
|
|
63
|
+
/** Specific paths to extract from the zip */
|
|
64
|
+
paths?: string[];
|
|
65
|
+
}
|
|
58
66
|
/**
|
|
59
67
|
* Configuration for a single web source
|
|
60
68
|
*/
|
|
61
69
|
export interface WebSourceConfig {
|
|
62
|
-
/** URL of the web source */
|
|
70
|
+
/** URL of the web source (or local path for zip sources) */
|
|
63
71
|
url: string;
|
|
64
72
|
/** Type of web source */
|
|
65
73
|
type: WebSourceType;
|
|
66
74
|
/** Type-specific options */
|
|
67
|
-
options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions;
|
|
75
|
+
options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions | ZipOptions;
|
|
68
76
|
}
|
|
69
77
|
/**
|
|
70
78
|
* Metadata for a single web source download
|
|
@@ -108,6 +116,7 @@ export declare const METADATA_FILENAME = ".agentic-metadata.json";
|
|
|
108
116
|
export declare enum WebSourceErrorType {
|
|
109
117
|
WEB_SOURCE_ERROR = "WEB_SOURCE_ERROR",
|
|
110
118
|
GIT_REPO_ERROR = "GIT_REPO_ERROR",
|
|
119
|
+
ZIP_ERROR = "ZIP_ERROR",
|
|
111
120
|
NOT_IMPLEMENTED = "NOT_IMPLEMENTED"
|
|
112
121
|
}
|
|
113
122
|
/**
|
|
@@ -9,6 +9,7 @@ export var WebSourceType;
|
|
|
9
9
|
WebSourceType["GIT_REPO"] = "git_repo";
|
|
10
10
|
WebSourceType["DOCUMENTATION_SITE"] = "documentation_site";
|
|
11
11
|
WebSourceType["API_DOCUMENTATION"] = "api_documentation";
|
|
12
|
+
WebSourceType["ZIP"] = "zip";
|
|
12
13
|
})(WebSourceType || (WebSourceType = {}));
|
|
13
14
|
/**
|
|
14
15
|
* Metadata file name pattern
|
|
@@ -21,6 +22,7 @@ export var WebSourceErrorType;
|
|
|
21
22
|
(function (WebSourceErrorType) {
|
|
22
23
|
WebSourceErrorType["WEB_SOURCE_ERROR"] = "WEB_SOURCE_ERROR";
|
|
23
24
|
WebSourceErrorType["GIT_REPO_ERROR"] = "GIT_REPO_ERROR";
|
|
25
|
+
WebSourceErrorType["ZIP_ERROR"] = "ZIP_ERROR";
|
|
24
26
|
WebSourceErrorType["NOT_IMPLEMENTED"] = "NOT_IMPLEMENTED";
|
|
25
27
|
})(WebSourceErrorType || (WebSourceErrorType = {}));
|
|
26
28
|
/**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codemcp/knowledge-content-loader",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.18",
|
|
4
4
|
"description": "Web content loading and metadata management for agentic knowledge system",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -29,10 +29,14 @@
|
|
|
29
29
|
"typecheck": "tsc --noEmit"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
+
"adm-zip": "0.5.16",
|
|
32
33
|
"simple-git": "^3.22.0"
|
|
33
34
|
},
|
|
34
35
|
"devDependencies": {
|
|
36
|
+
"@eslint/js": "^9.34.0",
|
|
37
|
+
"@types/adm-zip": "0.5.7",
|
|
35
38
|
"@types/node": "^24.3.0",
|
|
39
|
+
"eslint": "^9.34.0",
|
|
36
40
|
"rimraf": "^6.0.1",
|
|
37
41
|
"typescript": "^5.9.2",
|
|
38
42
|
"vitest": "^3.2.4"
|
|
@@ -207,6 +207,30 @@ function validateSource(source) {
|
|
|
207
207
|
}
|
|
208
208
|
return true;
|
|
209
209
|
}
|
|
210
|
+
if (type === "zip") {
|
|
211
|
+
const hasPath = obj["path"] !== undefined &&
|
|
212
|
+
typeof obj["path"] === "string" &&
|
|
213
|
+
obj["path"].trim() !== "";
|
|
214
|
+
const hasUrl = obj["url"] !== undefined &&
|
|
215
|
+
typeof obj["url"] === "string" &&
|
|
216
|
+
obj["url"].trim() !== "";
|
|
217
|
+
// Must have exactly one of path or url
|
|
218
|
+
if (hasPath === hasUrl) {
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
// Optional paths field
|
|
222
|
+
if (obj["paths"] !== undefined) {
|
|
223
|
+
if (!Array.isArray(obj["paths"])) {
|
|
224
|
+
return false;
|
|
225
|
+
}
|
|
226
|
+
for (const path of obj["paths"]) {
|
|
227
|
+
if (typeof path !== "string" || path.trim() === "") {
|
|
228
|
+
return false;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
return true;
|
|
233
|
+
}
|
|
210
234
|
// Unknown source type
|
|
211
235
|
return false;
|
|
212
236
|
}
|
|
@@ -46,6 +46,10 @@ export function calculateLocalPath(docset, configPath) {
|
|
|
46
46
|
// For git repos, use standardized path: .knowledge/docsets/{id}
|
|
47
47
|
return join(configDir, "docsets", docset.id);
|
|
48
48
|
}
|
|
49
|
+
if (primarySource.type === "zip") {
|
|
50
|
+
// For zip sources, use standardized path: .knowledge/docsets/{id}
|
|
51
|
+
return join(configDir, "docsets", docset.id);
|
|
52
|
+
}
|
|
49
53
|
throw new Error(`Unsupported source type: ${primarySource.type}`);
|
|
50
54
|
}
|
|
51
55
|
catch (error) {
|
|
@@ -87,6 +91,10 @@ export async function calculateLocalPathWithSymlinks(docset, configPath) {
|
|
|
87
91
|
// For git repos, use standardized path: .knowledge/docsets/{id}
|
|
88
92
|
return join(configDir, "docsets", docset.id);
|
|
89
93
|
}
|
|
94
|
+
if (primarySource.type === "zip") {
|
|
95
|
+
// For zip sources, use standardized path: .knowledge/docsets/{id}
|
|
96
|
+
return join(configDir, "docsets", docset.id);
|
|
97
|
+
}
|
|
90
98
|
throw new Error(`Unsupported source type: ${primarySource.type}`);
|
|
91
99
|
}
|
|
92
100
|
/**
|
|
@@ -30,10 +30,22 @@ export interface GitRepoSourceConfig extends BaseSourceConfig {
|
|
|
30
30
|
/** Specific paths to extract (optional) */
|
|
31
31
|
paths?: string[];
|
|
32
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Zip file source configuration
|
|
35
|
+
*/
|
|
36
|
+
export interface ZipSourceConfig extends BaseSourceConfig {
|
|
37
|
+
type: "zip";
|
|
38
|
+
/** Local path to zip file (mutually exclusive with url) */
|
|
39
|
+
path?: string;
|
|
40
|
+
/** Remote URL to download zip from (mutually exclusive with path) */
|
|
41
|
+
url?: string;
|
|
42
|
+
/** Specific paths to extract (optional) */
|
|
43
|
+
paths?: string[];
|
|
44
|
+
}
|
|
33
45
|
/**
|
|
34
46
|
* Union type for all source configurations
|
|
35
47
|
*/
|
|
36
|
-
export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig;
|
|
48
|
+
export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig | ZipSourceConfig;
|
|
37
49
|
/**
|
|
38
50
|
* Configuration for a single docset
|
|
39
51
|
*/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codemcp/knowledge-core",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.18",
|
|
4
4
|
"description": "Core functionality for agentic knowledge guidance system",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -32,8 +32,10 @@
|
|
|
32
32
|
"js-yaml": "^4.1.0"
|
|
33
33
|
},
|
|
34
34
|
"devDependencies": {
|
|
35
|
+
"@eslint/js": "^9.34.0",
|
|
35
36
|
"@types/js-yaml": "^4.0.9",
|
|
36
37
|
"@types/node": "^24.3.0",
|
|
38
|
+
"eslint": "^9.34.0",
|
|
37
39
|
"rimraf": "^6.0.1",
|
|
38
40
|
"typescript": "^5.9.2",
|
|
39
41
|
"vitest": "^3.2.4"
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codemcp/knowledge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.18",
|
|
4
4
|
"description": "MCP server implementation for agentic knowledge guidance system",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
8
|
+
"bin": {
|
|
9
|
+
"@codemcp/knowledge": "dist/bin.js"
|
|
10
|
+
},
|
|
8
11
|
"exports": {
|
|
9
12
|
".": {
|
|
10
13
|
"import": "./dist/index.js",
|
|
@@ -30,11 +33,13 @@
|
|
|
30
33
|
"typecheck": "tsc --noEmit"
|
|
31
34
|
},
|
|
32
35
|
"dependencies": {
|
|
33
|
-
"@codemcp/knowledge-core": "
|
|
36
|
+
"@codemcp/knowledge-core": "workspace:*",
|
|
34
37
|
"@modelcontextprotocol/sdk": "^1.19.1"
|
|
35
38
|
},
|
|
36
39
|
"devDependencies": {
|
|
40
|
+
"@eslint/js": "^9.34.0",
|
|
37
41
|
"@types/node": "^24.3.0",
|
|
42
|
+
"eslint": "^9.34.0",
|
|
38
43
|
"rimraf": "^6.0.1",
|
|
39
44
|
"typescript": "^5.9.2",
|
|
40
45
|
"vitest": "^3.2.4"
|