agentic-knowledge-mcp 1.0.18 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/packages/cli/dist/commands/create.js +59 -4
- package/packages/cli/dist/commands/init.js +10 -10
- package/packages/cli/dist/commands/refresh.js +8 -8
- package/packages/cli/package.json +1 -1
- package/packages/content-loader/dist/content/{zip-loader.d.ts → archive-loader.d.ts} +17 -9
- package/packages/content-loader/dist/content/{zip-loader.js → archive-loader.js} +66 -23
- package/packages/content-loader/dist/content/index.d.ts +1 -1
- package/packages/content-loader/dist/content/index.js +1 -1
- package/packages/content-loader/dist/types.d.ts +7 -7
- package/packages/content-loader/dist/types.js +2 -2
- package/packages/content-loader/package.json +4 -2
- package/packages/core/dist/config/loader.js +1 -1
- package/packages/core/dist/paths/calculator.js +4 -4
- package/packages/core/dist/types.d.ts +6 -6
- package/packages/core/package.json +1 -1
- package/packages/mcp-server/package.json +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentic-knowledge-mcp",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "A Model Context Protocol server for agentic knowledge guidance with web-based documentation loading and intelligent search instructions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "packages/cli/dist/index.js",
|
|
@@ -29,9 +29,9 @@
|
|
|
29
29
|
"commander": "^12.0.0",
|
|
30
30
|
"js-yaml": "4.1.0",
|
|
31
31
|
"ora": "^8.0.1",
|
|
32
|
-
"@codemcp/knowledge
|
|
33
|
-
"@codemcp/knowledge": "1.0
|
|
34
|
-
"@codemcp/knowledge-
|
|
32
|
+
"@codemcp/knowledge": "1.2.0",
|
|
33
|
+
"@codemcp/knowledge-core": "1.2.0",
|
|
34
|
+
"@codemcp/knowledge-content-loader": "1.2.0"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@eslint/js": "^9.34.0",
|
|
@@ -5,15 +5,16 @@ import { Command } from "commander";
|
|
|
5
5
|
import chalk from "chalk";
|
|
6
6
|
import { promises as fs } from "node:fs";
|
|
7
7
|
import * as path from "node:path";
|
|
8
|
+
import { URL } from "node:url";
|
|
8
9
|
import { ConfigManager } from "@codemcp/knowledge-core";
|
|
9
10
|
export const createCommand = new Command("create")
|
|
10
11
|
.description("Create a new docset using presets")
|
|
11
|
-
.requiredOption("--preset <type>", "Preset type: git-repo
|
|
12
|
+
.requiredOption("--preset <type>", "Preset type: git-repo, local-folder, or archive")
|
|
12
13
|
.requiredOption("--id <id>", "Unique docset ID")
|
|
13
14
|
.requiredOption("--name <name>", "Human-readable docset name")
|
|
14
15
|
.option("--description <desc>", "Docset description")
|
|
15
|
-
.option("--url <url>", "Git repository URL (
|
|
16
|
-
.option("--path <path>", "Local folder path (
|
|
16
|
+
.option("--url <url>", "Git repository URL (git-repo) or archive file URL (archive preset)")
|
|
17
|
+
.option("--path <path>", "Local folder path (local-folder) or local archive file path (archive preset)")
|
|
17
18
|
.option("--branch <branch>", "Git branch (default: main)", "main")
|
|
18
19
|
.action(async (options) => {
|
|
19
20
|
try {
|
|
@@ -47,8 +48,11 @@ export const createCommand = new Command("create")
|
|
|
47
48
|
else if (options.preset === "local-folder") {
|
|
48
49
|
newDocset = await createLocalFolderDocset(options);
|
|
49
50
|
}
|
|
51
|
+
else if (options.preset === "archive") {
|
|
52
|
+
newDocset = await createArchiveDocset(options);
|
|
53
|
+
}
|
|
50
54
|
else {
|
|
51
|
-
throw new Error(`Unknown preset: ${options.preset}. Use 'git-repo'
|
|
55
|
+
throw new Error(`Unknown preset: ${options.preset}. Use 'git-repo', 'local-folder', or 'archive'`);
|
|
52
56
|
}
|
|
53
57
|
// Add to config
|
|
54
58
|
config.docsets.push(newDocset);
|
|
@@ -111,3 +115,54 @@ async function createLocalFolderDocset(options) {
|
|
|
111
115
|
],
|
|
112
116
|
};
|
|
113
117
|
}
|
|
118
|
+
async function createArchiveDocset(options) {
|
|
119
|
+
if (!options.path && !options.url) {
|
|
120
|
+
throw new Error("Either --path or --url is required for archive preset");
|
|
121
|
+
}
|
|
122
|
+
// If path is provided, validate it exists
|
|
123
|
+
if (options.path) {
|
|
124
|
+
const fullPath = path.resolve(options.path);
|
|
125
|
+
try {
|
|
126
|
+
const stat = await fs.stat(fullPath);
|
|
127
|
+
if (!stat.isFile()) {
|
|
128
|
+
throw new Error(`Path is not a file: ${options.path}`);
|
|
129
|
+
}
|
|
130
|
+
const lowerPath = options.path.toLowerCase();
|
|
131
|
+
if (!lowerPath.endsWith(".zip") &&
|
|
132
|
+
!lowerPath.endsWith(".tar.gz") &&
|
|
133
|
+
!lowerPath.endsWith(".tgz")) {
|
|
134
|
+
throw new Error(`File is not a supported archive format (zip, tar.gz): ${options.path}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
throw new Error(`Path does not exist or is invalid: ${options.path}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// If URL is provided, validate it's a valid URL
|
|
142
|
+
if (options.url) {
|
|
143
|
+
try {
|
|
144
|
+
new URL(options.url);
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
throw new Error(`Invalid URL format: ${options.url}`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
const source = {
|
|
151
|
+
type: "archive",
|
|
152
|
+
};
|
|
153
|
+
if (options.path) {
|
|
154
|
+
source.path = options.path;
|
|
155
|
+
}
|
|
156
|
+
if (options.url) {
|
|
157
|
+
source.url = options.url;
|
|
158
|
+
}
|
|
159
|
+
if (options.paths) {
|
|
160
|
+
source.paths = options.paths.split(",");
|
|
161
|
+
}
|
|
162
|
+
return {
|
|
163
|
+
id: options.id,
|
|
164
|
+
name: options.name,
|
|
165
|
+
description: options.description || `Archive: ${options.path || options.url}`,
|
|
166
|
+
sources: [source],
|
|
167
|
+
};
|
|
168
|
+
}
|
|
@@ -6,7 +6,7 @@ import chalk from "chalk";
|
|
|
6
6
|
import { promises as fs } from "node:fs";
|
|
7
7
|
import * as path from "node:path";
|
|
8
8
|
import { ConfigManager, calculateLocalPath, ensureKnowledgeGitignoreSync, discoverDirectoryPatterns, safelyClearDirectory, getDirectoryInfo, } from "@codemcp/knowledge-core";
|
|
9
|
-
import { GitRepoLoader,
|
|
9
|
+
import { GitRepoLoader, ArchiveLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
|
|
10
10
|
export const initCommand = new Command("init")
|
|
11
11
|
.description("Initialize sources for a docset from configuration")
|
|
12
12
|
.argument("<docset-id>", "ID of the docset to initialize")
|
|
@@ -169,14 +169,14 @@ export const initCommand = new Command("init")
|
|
|
169
169
|
};
|
|
170
170
|
await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
|
|
171
171
|
}
|
|
172
|
-
else if (source.type === "
|
|
173
|
-
// Handle
|
|
174
|
-
const loader = new
|
|
172
|
+
else if (source.type === "archive") {
|
|
173
|
+
// Handle archive file initialization (zip, tar.gz, etc.)
|
|
174
|
+
const loader = new ArchiveLoader();
|
|
175
175
|
const sourceUrl = source.url || source.path || "";
|
|
176
|
-
console.log(chalk.gray(` Using
|
|
176
|
+
console.log(chalk.gray(` Using ArchiveLoader for archive extraction`));
|
|
177
177
|
const webSourceConfig = {
|
|
178
178
|
url: sourceUrl,
|
|
179
|
-
type: WebSourceType.
|
|
179
|
+
type: WebSourceType.ARCHIVE,
|
|
180
180
|
options: {
|
|
181
181
|
paths: source.paths || [],
|
|
182
182
|
},
|
|
@@ -184,17 +184,17 @@ export const initCommand = new Command("init")
|
|
|
184
184
|
// Validate configuration
|
|
185
185
|
const validation = loader.validateConfig(webSourceConfig);
|
|
186
186
|
if (validation !== true) {
|
|
187
|
-
throw new Error(`Invalid
|
|
187
|
+
throw new Error(`Invalid archive source configuration: ${validation}`);
|
|
188
188
|
}
|
|
189
|
-
// Load content using
|
|
189
|
+
// Load content using ArchiveLoader
|
|
190
190
|
const result = await loader.load(webSourceConfig, localPath);
|
|
191
191
|
if (!result.success) {
|
|
192
|
-
throw new Error(`
|
|
192
|
+
throw new Error(`Archive loading failed: ${result.error}`);
|
|
193
193
|
}
|
|
194
194
|
// Collect discovered paths for config update
|
|
195
195
|
allDiscoveredPaths.push(...result.files);
|
|
196
196
|
totalFiles += result.files.length;
|
|
197
|
-
console.log(chalk.green(` ✅ Extracted ${result.files.length} files from
|
|
197
|
+
console.log(chalk.green(` ✅ Extracted ${result.files.length} files from archive`));
|
|
198
198
|
// Create source metadata
|
|
199
199
|
const metadata = {
|
|
200
200
|
source_url: sourceUrl,
|
|
@@ -8,7 +8,7 @@ import { promises as fs } from "node:fs";
|
|
|
8
8
|
import * as path from "node:path";
|
|
9
9
|
import { execSync } from "node:child_process";
|
|
10
10
|
import { findConfigPathSync, loadConfigSync, calculateLocalPath, ensureKnowledgeGitignoreSync, } from "@codemcp/knowledge-core";
|
|
11
|
-
import {
|
|
11
|
+
import { ArchiveLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
|
|
12
12
|
export const refreshCommand = new Command("refresh")
|
|
13
13
|
.description("Refresh sources for docsets")
|
|
14
14
|
.argument("[docset-id]", "ID of specific docset to refresh (refresh all if not specified)")
|
|
@@ -96,8 +96,8 @@ async function refreshDocset(docset, configPath, force) {
|
|
|
96
96
|
totalFiles += sourceFiles.files_count;
|
|
97
97
|
refreshedSources.push(sourceFiles);
|
|
98
98
|
}
|
|
99
|
-
else if (source.type === "
|
|
100
|
-
const sourceFiles = await
|
|
99
|
+
else if (source.type === "archive") {
|
|
100
|
+
const sourceFiles = await refreshArchiveSource(source, localPath, index, docset.id, force);
|
|
101
101
|
totalFiles += sourceFiles.files_count;
|
|
102
102
|
refreshedSources.push(sourceFiles);
|
|
103
103
|
}
|
|
@@ -244,7 +244,7 @@ async function refreshGitSource(webSource, localPath, index, docsetId, force) {
|
|
|
244
244
|
await fs.rm(tempDir, { recursive: true, force: true });
|
|
245
245
|
}
|
|
246
246
|
}
|
|
247
|
-
async function
|
|
247
|
+
async function refreshArchiveSource(source, localPath, index, docsetId, force) {
|
|
248
248
|
const sourceMetadataPath = path.join(localPath, `.agentic-source-${index}.json`);
|
|
249
249
|
let existingSourceMetadata = null;
|
|
250
250
|
try {
|
|
@@ -255,10 +255,10 @@ async function refreshZipSource(source, localPath, index, docsetId, force) {
|
|
|
255
255
|
// No existing metadata, will do full refresh
|
|
256
256
|
}
|
|
257
257
|
const sourceUrl = source.url || source.path || "";
|
|
258
|
-
const loader = new
|
|
258
|
+
const loader = new ArchiveLoader();
|
|
259
259
|
const webSourceConfig = {
|
|
260
260
|
url: sourceUrl,
|
|
261
|
-
type: WebSourceType.
|
|
261
|
+
type: WebSourceType.ARCHIVE,
|
|
262
262
|
options: {
|
|
263
263
|
paths: source.paths || [],
|
|
264
264
|
},
|
|
@@ -296,11 +296,11 @@ async function refreshZipSource(source, localPath, index, docsetId, force) {
|
|
|
296
296
|
// Load content
|
|
297
297
|
const result = await loader.load(webSourceConfig, localPath);
|
|
298
298
|
if (!result.success) {
|
|
299
|
-
throw new Error(`
|
|
299
|
+
throw new Error(`Archive refresh failed: ${result.error}`);
|
|
300
300
|
}
|
|
301
301
|
const metadata = {
|
|
302
302
|
source_url: sourceUrl,
|
|
303
|
-
source_type: "
|
|
303
|
+
source_type: "archive",
|
|
304
304
|
downloaded_at: new Date().toISOString(),
|
|
305
305
|
files_count: result.files.length,
|
|
306
306
|
files: result.files,
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Archive file content loader (supports zip, tar.gz, etc.)
|
|
3
3
|
*/
|
|
4
4
|
import { ContentLoader, type LoadResult } from "./loader.js";
|
|
5
5
|
import { WebSourceConfig } from "../types.js";
|
|
6
6
|
/**
|
|
7
|
-
* Content loader for
|
|
7
|
+
* Content loader for archive files - zip, tar.gz, etc. (local or remote)
|
|
8
8
|
*/
|
|
9
|
-
export declare class
|
|
9
|
+
export declare class ArchiveLoader extends ContentLoader {
|
|
10
10
|
/**
|
|
11
11
|
* Check if this loader can handle the given web source type
|
|
12
12
|
*/
|
|
@@ -16,7 +16,7 @@ export declare class ZipLoader extends ContentLoader {
|
|
|
16
16
|
*/
|
|
17
17
|
validateConfig(webSource: WebSourceConfig): true | string;
|
|
18
18
|
/**
|
|
19
|
-
* Load content from
|
|
19
|
+
* Load content from an archive file
|
|
20
20
|
*/
|
|
21
21
|
load(webSource: WebSourceConfig, targetPath: string): Promise<LoadResult>;
|
|
22
22
|
/**
|
|
@@ -32,24 +32,32 @@ export declare class ZipLoader extends ContentLoader {
|
|
|
32
32
|
*/
|
|
33
33
|
private isRemoteUrl;
|
|
34
34
|
/**
|
|
35
|
-
*
|
|
35
|
+
* Detect archive type based on file extension
|
|
36
36
|
*/
|
|
37
|
-
private
|
|
37
|
+
private detectArchiveType;
|
|
38
38
|
/**
|
|
39
|
-
*
|
|
39
|
+
* Resolve the archive file path - download if remote, return as-is if local
|
|
40
40
|
*/
|
|
41
|
-
private
|
|
41
|
+
private resolveArchiveFile;
|
|
42
|
+
/**
|
|
43
|
+
* Download an archive file from a remote URL
|
|
44
|
+
*/
|
|
45
|
+
private downloadArchive;
|
|
42
46
|
/**
|
|
43
47
|
* Extract a zip file to a directory using adm-zip
|
|
44
48
|
*/
|
|
45
49
|
private extractZip;
|
|
50
|
+
/**
|
|
51
|
+
* Extract a tar.gz file to a directory
|
|
52
|
+
*/
|
|
53
|
+
private extractTarGz;
|
|
46
54
|
/**
|
|
47
55
|
* If the extracted contents have a single root directory and no files at root,
|
|
48
56
|
* move that directory's contents one level up.
|
|
49
57
|
*/
|
|
50
58
|
private flattenSingleRoot;
|
|
51
59
|
/**
|
|
52
|
-
* Extract content from extracted
|
|
60
|
+
* Extract content from extracted archive to target directory
|
|
53
61
|
*/
|
|
54
62
|
private extractContent;
|
|
55
63
|
/**
|
|
@@ -1,48 +1,60 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Archive file content loader (supports zip, tar.gz, etc.)
|
|
3
3
|
*/
|
|
4
4
|
import { promises as fs } from "node:fs";
|
|
5
5
|
import * as path from "node:path";
|
|
6
6
|
import * as crypto from "node:crypto";
|
|
7
7
|
import https from "node:https";
|
|
8
8
|
import http from "node:http";
|
|
9
|
+
import { URL } from "node:url";
|
|
9
10
|
import AdmZip from "adm-zip";
|
|
11
|
+
import * as tar from "tar";
|
|
10
12
|
import { ContentLoader } from "./loader.js";
|
|
11
13
|
import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
|
|
12
14
|
import { filterDocumentationFiles } from "./file-filter.js";
|
|
13
15
|
/**
|
|
14
|
-
* Content loader for
|
|
16
|
+
* Content loader for archive files - zip, tar.gz, etc. (local or remote)
|
|
15
17
|
*/
|
|
16
|
-
export class
|
|
18
|
+
export class ArchiveLoader extends ContentLoader {
|
|
17
19
|
/**
|
|
18
20
|
* Check if this loader can handle the given web source type
|
|
19
21
|
*/
|
|
20
22
|
canHandle(webSource) {
|
|
21
|
-
return webSource.type === WebSourceType.
|
|
23
|
+
return webSource.type === WebSourceType.ARCHIVE;
|
|
22
24
|
}
|
|
23
25
|
/**
|
|
24
26
|
* Validate the web source configuration
|
|
25
27
|
*/
|
|
26
28
|
validateConfig(webSource) {
|
|
27
29
|
if (!webSource.url) {
|
|
28
|
-
return "
|
|
30
|
+
return "Archive source must have a URL (remote) or local path";
|
|
29
31
|
}
|
|
30
32
|
return true;
|
|
31
33
|
}
|
|
32
34
|
/**
|
|
33
|
-
* Load content from
|
|
35
|
+
* Load content from an archive file
|
|
34
36
|
*/
|
|
35
37
|
async load(webSource, targetPath) {
|
|
36
38
|
try {
|
|
37
39
|
const options = webSource.options;
|
|
38
40
|
const tempDir = await this.createTempDirectory();
|
|
39
41
|
try {
|
|
40
|
-
// Get the
|
|
41
|
-
const
|
|
42
|
+
// Get the archive file (download if remote, or use local path)
|
|
43
|
+
const archiveFilePath = await this.resolveArchiveFile(webSource.url, tempDir);
|
|
44
|
+
// Detect archive type
|
|
45
|
+
const archiveType = this.detectArchiveType(archiveFilePath);
|
|
42
46
|
// Extract to temp directory
|
|
43
47
|
const extractDir = path.join(tempDir, "extracted");
|
|
44
48
|
await fs.mkdir(extractDir, { recursive: true });
|
|
45
|
-
|
|
49
|
+
if (archiveType === "zip") {
|
|
50
|
+
this.extractZip(archiveFilePath, extractDir);
|
|
51
|
+
}
|
|
52
|
+
else if (archiveType === "tar.gz") {
|
|
53
|
+
await this.extractTarGz(archiveFilePath, extractDir);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Unsupported archive format. Supported formats: .zip, .tar.gz`, { archiveType });
|
|
57
|
+
}
|
|
46
58
|
// Flatten single root directory
|
|
47
59
|
await this.flattenSingleRoot(extractDir);
|
|
48
60
|
// Extract specified paths or all documentation content
|
|
@@ -65,7 +77,7 @@ export class ZipLoader extends ContentLoader {
|
|
|
65
77
|
success: false,
|
|
66
78
|
files: [],
|
|
67
79
|
contentHash: "",
|
|
68
|
-
error: `
|
|
80
|
+
error: `Archive loading failed: ${errorMessage}`,
|
|
69
81
|
};
|
|
70
82
|
}
|
|
71
83
|
}
|
|
@@ -127,11 +139,24 @@ export class ZipLoader extends ContentLoader {
|
|
|
127
139
|
return url.startsWith("http://") || url.startsWith("https://");
|
|
128
140
|
}
|
|
129
141
|
/**
|
|
130
|
-
*
|
|
142
|
+
* Detect archive type based on file extension
|
|
143
|
+
*/
|
|
144
|
+
detectArchiveType(filePath) {
|
|
145
|
+
const lowerPath = filePath.toLowerCase();
|
|
146
|
+
if (lowerPath.endsWith(".tar.gz") || lowerPath.endsWith(".tgz")) {
|
|
147
|
+
return "tar.gz";
|
|
148
|
+
}
|
|
149
|
+
if (lowerPath.endsWith(".zip")) {
|
|
150
|
+
return "zip";
|
|
151
|
+
}
|
|
152
|
+
return "unknown";
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Resolve the archive file path - download if remote, return as-is if local
|
|
131
156
|
*/
|
|
132
|
-
async
|
|
157
|
+
async resolveArchiveFile(url, tempDir) {
|
|
133
158
|
if (this.isRemoteUrl(url)) {
|
|
134
|
-
return this.
|
|
159
|
+
return this.downloadArchive(url, tempDir);
|
|
135
160
|
}
|
|
136
161
|
// Local file - verify it exists
|
|
137
162
|
try {
|
|
@@ -139,14 +164,17 @@ export class ZipLoader extends ContentLoader {
|
|
|
139
164
|
return url;
|
|
140
165
|
}
|
|
141
166
|
catch {
|
|
142
|
-
throw new WebSourceError(WebSourceErrorType.
|
|
167
|
+
throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Local archive file not found: ${url}`, { url });
|
|
143
168
|
}
|
|
144
169
|
}
|
|
145
170
|
/**
|
|
146
|
-
* Download
|
|
171
|
+
* Download an archive file from a remote URL
|
|
147
172
|
*/
|
|
148
|
-
async
|
|
149
|
-
|
|
173
|
+
async downloadArchive(url, tempDir) {
|
|
174
|
+
// Determine filename from URL
|
|
175
|
+
const urlPath = new URL(url).pathname;
|
|
176
|
+
const filename = path.basename(urlPath) || "download.archive";
|
|
177
|
+
const archivePath = path.join(tempDir, filename);
|
|
150
178
|
return new Promise((resolve, reject) => {
|
|
151
179
|
const protocol = url.startsWith("https") ? https : http;
|
|
152
180
|
const request = protocol.get(url, async (response) => {
|
|
@@ -162,8 +190,8 @@ export class ZipLoader extends ContentLoader {
|
|
|
162
190
|
response.on("end", async () => {
|
|
163
191
|
try {
|
|
164
192
|
const buffer = Buffer.concat(chunks);
|
|
165
|
-
await fs.writeFile(
|
|
166
|
-
resolve(
|
|
193
|
+
await fs.writeFile(archivePath, buffer);
|
|
194
|
+
resolve(archivePath);
|
|
167
195
|
}
|
|
168
196
|
catch (error) {
|
|
169
197
|
reject(error);
|
|
@@ -175,7 +203,7 @@ export class ZipLoader extends ContentLoader {
|
|
|
175
203
|
}
|
|
176
204
|
});
|
|
177
205
|
request.on("error", (error) => {
|
|
178
|
-
reject(new WebSourceError(WebSourceErrorType.
|
|
206
|
+
reject(new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Failed to download archive from ${url}: ${error instanceof Error ? error.message : String(error)}`, { url }));
|
|
179
207
|
});
|
|
180
208
|
});
|
|
181
209
|
}
|
|
@@ -188,7 +216,22 @@ export class ZipLoader extends ContentLoader {
|
|
|
188
216
|
zip.extractAllTo(targetDir, true);
|
|
189
217
|
}
|
|
190
218
|
catch (error) {
|
|
191
|
-
throw new WebSourceError(WebSourceErrorType.
|
|
219
|
+
throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Failed to extract zip: ${error instanceof Error ? error.message : String(error)}`, { zipPath });
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Extract a tar.gz file to a directory
|
|
224
|
+
*/
|
|
225
|
+
async extractTarGz(tarGzPath, targetDir) {
|
|
226
|
+
try {
|
|
227
|
+
await tar.extract({
|
|
228
|
+
file: tarGzPath,
|
|
229
|
+
cwd: targetDir,
|
|
230
|
+
strip: 0,
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
catch (error) {
|
|
234
|
+
throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Failed to extract tar.gz: ${error instanceof Error ? error.message : String(error)}`, { tarGzPath });
|
|
192
235
|
}
|
|
193
236
|
}
|
|
194
237
|
/**
|
|
@@ -213,7 +256,7 @@ export class ZipLoader extends ContentLoader {
|
|
|
213
256
|
}
|
|
214
257
|
}
|
|
215
258
|
/**
|
|
216
|
-
* Extract content from extracted
|
|
259
|
+
* Extract content from extracted archive to target directory
|
|
217
260
|
*/
|
|
218
261
|
async extractContent(sourceDir, targetDir, paths) {
|
|
219
262
|
await fs.mkdir(targetDir, { recursive: true });
|
|
@@ -330,7 +373,7 @@ export class ZipLoader extends ContentLoader {
|
|
|
330
373
|
* Create a temporary directory
|
|
331
374
|
*/
|
|
332
375
|
async createTempDirectory() {
|
|
333
|
-
const tempDir = path.join(process.cwd(), ".tmp", `
|
|
376
|
+
const tempDir = path.join(process.cwd(), ".tmp", `archive-extract-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
334
377
|
await fs.mkdir(tempDir, { recursive: true });
|
|
335
378
|
return tempDir;
|
|
336
379
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export { ContentLoader } from "./loader.js";
|
|
5
5
|
export { GitRepoLoader } from "./git-repo-loader.js";
|
|
6
|
-
export {
|
|
6
|
+
export { ArchiveLoader } from "./archive-loader.js";
|
|
7
7
|
export { DocumentationSiteLoader } from "./documentation-site-loader.js";
|
|
8
8
|
export { ApiDocumentationLoader } from "./api-documentation-loader.js";
|
|
9
9
|
export { ContentProcessor } from "./content-processor.js";
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export { ContentLoader } from "./loader.js";
|
|
5
5
|
export { GitRepoLoader } from "./git-repo-loader.js";
|
|
6
|
-
export {
|
|
6
|
+
export { ArchiveLoader } from "./archive-loader.js";
|
|
7
7
|
export { DocumentationSiteLoader } from "./documentation-site-loader.js";
|
|
8
8
|
export { ApiDocumentationLoader } from "./api-documentation-loader.js";
|
|
9
9
|
export { ContentProcessor } from "./content-processor.js";
|
|
@@ -23,7 +23,7 @@ export declare enum WebSourceType {
|
|
|
23
23
|
GIT_REPO = "git_repo",
|
|
24
24
|
DOCUMENTATION_SITE = "documentation_site",
|
|
25
25
|
API_DOCUMENTATION = "api_documentation",
|
|
26
|
-
|
|
26
|
+
ARCHIVE = "archive"
|
|
27
27
|
}
|
|
28
28
|
/**
|
|
29
29
|
* Configuration for Git repository web sources
|
|
@@ -57,22 +57,22 @@ export interface ApiDocumentationOptions {
|
|
|
57
57
|
include_packages?: string[];
|
|
58
58
|
}
|
|
59
59
|
/**
|
|
60
|
-
* Configuration for
|
|
60
|
+
* Configuration for archive file web sources (zip, tar.gz, etc.)
|
|
61
61
|
*/
|
|
62
|
-
export interface
|
|
63
|
-
/** Specific paths to extract from the
|
|
62
|
+
export interface ArchiveOptions {
|
|
63
|
+
/** Specific paths to extract from the archive */
|
|
64
64
|
paths?: string[];
|
|
65
65
|
}
|
|
66
66
|
/**
|
|
67
67
|
* Configuration for a single web source
|
|
68
68
|
*/
|
|
69
69
|
export interface WebSourceConfig {
|
|
70
|
-
/** URL of the web source (or local path for
|
|
70
|
+
/** URL of the web source (or local path for archive sources) */
|
|
71
71
|
url: string;
|
|
72
72
|
/** Type of web source */
|
|
73
73
|
type: WebSourceType;
|
|
74
74
|
/** Type-specific options */
|
|
75
|
-
options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions |
|
|
75
|
+
options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions | ArchiveOptions;
|
|
76
76
|
}
|
|
77
77
|
/**
|
|
78
78
|
* Metadata for a single web source download
|
|
@@ -116,7 +116,7 @@ export declare const METADATA_FILENAME = ".agentic-metadata.json";
|
|
|
116
116
|
export declare enum WebSourceErrorType {
|
|
117
117
|
WEB_SOURCE_ERROR = "WEB_SOURCE_ERROR",
|
|
118
118
|
GIT_REPO_ERROR = "GIT_REPO_ERROR",
|
|
119
|
-
|
|
119
|
+
ARCHIVE_ERROR = "ARCHIVE_ERROR",
|
|
120
120
|
NOT_IMPLEMENTED = "NOT_IMPLEMENTED"
|
|
121
121
|
}
|
|
122
122
|
/**
|
|
@@ -9,7 +9,7 @@ export var WebSourceType;
|
|
|
9
9
|
WebSourceType["GIT_REPO"] = "git_repo";
|
|
10
10
|
WebSourceType["DOCUMENTATION_SITE"] = "documentation_site";
|
|
11
11
|
WebSourceType["API_DOCUMENTATION"] = "api_documentation";
|
|
12
|
-
WebSourceType["
|
|
12
|
+
WebSourceType["ARCHIVE"] = "archive";
|
|
13
13
|
})(WebSourceType || (WebSourceType = {}));
|
|
14
14
|
/**
|
|
15
15
|
* Metadata file name pattern
|
|
@@ -22,7 +22,7 @@ export var WebSourceErrorType;
|
|
|
22
22
|
(function (WebSourceErrorType) {
|
|
23
23
|
WebSourceErrorType["WEB_SOURCE_ERROR"] = "WEB_SOURCE_ERROR";
|
|
24
24
|
WebSourceErrorType["GIT_REPO_ERROR"] = "GIT_REPO_ERROR";
|
|
25
|
-
WebSourceErrorType["
|
|
25
|
+
WebSourceErrorType["ARCHIVE_ERROR"] = "ARCHIVE_ERROR";
|
|
26
26
|
WebSourceErrorType["NOT_IMPLEMENTED"] = "NOT_IMPLEMENTED";
|
|
27
27
|
})(WebSourceErrorType || (WebSourceErrorType = {}));
|
|
28
28
|
/**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codemcp/knowledge-content-loader",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Web content loading and metadata management for agentic knowledge system",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -30,12 +30,14 @@
|
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
32
|
"adm-zip": "0.5.16",
|
|
33
|
-
"simple-git": "^3.22.0"
|
|
33
|
+
"simple-git": "^3.22.0",
|
|
34
|
+
"tar": "7.5.9"
|
|
34
35
|
},
|
|
35
36
|
"devDependencies": {
|
|
36
37
|
"@eslint/js": "^9.34.0",
|
|
37
38
|
"@types/adm-zip": "0.5.7",
|
|
38
39
|
"@types/node": "^24.3.0",
|
|
40
|
+
"@types/tar": "7.0.87",
|
|
39
41
|
"eslint": "^9.34.0",
|
|
40
42
|
"rimraf": "^6.0.1",
|
|
41
43
|
"typescript": "^5.9.2",
|
|
@@ -46,8 +46,8 @@ export function calculateLocalPath(docset, configPath) {
|
|
|
46
46
|
// For git repos, use standardized path: .knowledge/docsets/{id}
|
|
47
47
|
return join(configDir, "docsets", docset.id);
|
|
48
48
|
}
|
|
49
|
-
if (primarySource.type === "
|
|
50
|
-
// For
|
|
49
|
+
if (primarySource.type === "archive") {
|
|
50
|
+
// For archive sources, use standardized path: .knowledge/docsets/{id}
|
|
51
51
|
return join(configDir, "docsets", docset.id);
|
|
52
52
|
}
|
|
53
53
|
throw new Error(`Unsupported source type: ${primarySource.type}`);
|
|
@@ -91,8 +91,8 @@ export async function calculateLocalPathWithSymlinks(docset, configPath) {
|
|
|
91
91
|
// For git repos, use standardized path: .knowledge/docsets/{id}
|
|
92
92
|
return join(configDir, "docsets", docset.id);
|
|
93
93
|
}
|
|
94
|
-
if (primarySource.type === "
|
|
95
|
-
// For
|
|
94
|
+
if (primarySource.type === "archive") {
|
|
95
|
+
// For archive sources, use standardized path: .knowledge/docsets/{id}
|
|
96
96
|
return join(configDir, "docsets", docset.id);
|
|
97
97
|
}
|
|
98
98
|
throw new Error(`Unsupported source type: ${primarySource.type}`);
|
|
@@ -31,13 +31,13 @@ export interface GitRepoSourceConfig extends BaseSourceConfig {
|
|
|
31
31
|
paths?: string[];
|
|
32
32
|
}
|
|
33
33
|
/**
|
|
34
|
-
*
|
|
34
|
+
* Archive file source configuration (supports zip, tar.gz, etc.)
|
|
35
35
|
*/
|
|
36
|
-
export interface
|
|
37
|
-
type: "
|
|
38
|
-
/** Local path to
|
|
36
|
+
export interface ArchiveSourceConfig extends BaseSourceConfig {
|
|
37
|
+
type: "archive";
|
|
38
|
+
/** Local path to archive file (mutually exclusive with url) */
|
|
39
39
|
path?: string;
|
|
40
|
-
/** Remote URL to download
|
|
40
|
+
/** Remote URL to download archive from (mutually exclusive with path) */
|
|
41
41
|
url?: string;
|
|
42
42
|
/** Specific paths to extract (optional) */
|
|
43
43
|
paths?: string[];
|
|
@@ -45,7 +45,7 @@ export interface ZipSourceConfig extends BaseSourceConfig {
|
|
|
45
45
|
/**
|
|
46
46
|
* Union type for all source configurations
|
|
47
47
|
*/
|
|
48
|
-
export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig |
|
|
48
|
+
export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig | ArchiveSourceConfig;
|
|
49
49
|
/**
|
|
50
50
|
* Configuration for a single docset
|
|
51
51
|
*/
|