agentic-knowledge-mcp 1.0.17 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentic-knowledge-mcp",
3
- "version": "1.0.17",
3
+ "version": "1.0.18",
4
4
  "description": "A Model Context Protocol server for agentic knowledge guidance with web-based documentation loading and intelligent search instructions",
5
5
  "type": "module",
6
6
  "main": "packages/cli/dist/index.js",
@@ -29,9 +29,9 @@
29
29
  "commander": "^12.0.0",
30
30
  "js-yaml": "4.1.0",
31
31
  "ora": "^8.0.1",
32
- "@codemcp/knowledge": "1.0.17",
33
- "@codemcp/knowledge-content-loader": "1.0.17",
34
- "@codemcp/knowledge-core": "1.0.17"
32
+ "@codemcp/knowledge-content-loader": "1.0.18",
33
+ "@codemcp/knowledge": "1.0.18",
34
+ "@codemcp/knowledge-core": "1.0.18"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@eslint/js": "^9.34.0",
@@ -6,7 +6,7 @@ import chalk from "chalk";
6
6
  import { promises as fs } from "node:fs";
7
7
  import * as path from "node:path";
8
8
  import { ConfigManager, calculateLocalPath, ensureKnowledgeGitignoreSync, discoverDirectoryPatterns, safelyClearDirectory, getDirectoryInfo, } from "@codemcp/knowledge-core";
9
- import { GitRepoLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
9
+ import { GitRepoLoader, ZipLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
10
10
  export const initCommand = new Command("init")
11
11
  .description("Initialize sources for a docset from configuration")
12
12
  .argument("<docset-id>", "ID of the docset to initialize")
@@ -169,6 +169,44 @@ export const initCommand = new Command("init")
169
169
  };
170
170
  await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
171
171
  }
172
+ else if (source.type === "zip") {
173
+ // Handle zip file initialization
174
+ const loader = new ZipLoader();
175
+ const sourceUrl = source.url || source.path || "";
176
+ console.log(chalk.gray(` Using ZipLoader for zip extraction`));
177
+ const webSourceConfig = {
178
+ url: sourceUrl,
179
+ type: WebSourceType.ZIP,
180
+ options: {
181
+ paths: source.paths || [],
182
+ },
183
+ };
184
+ // Validate configuration
185
+ const validation = loader.validateConfig(webSourceConfig);
186
+ if (validation !== true) {
187
+ throw new Error(`Invalid zip source configuration: ${validation}`);
188
+ }
189
+ // Load content using ZipLoader
190
+ const result = await loader.load(webSourceConfig, localPath);
191
+ if (!result.success) {
192
+ throw new Error(`Zip loading failed: ${result.error}`);
193
+ }
194
+ // Collect discovered paths for config update
195
+ allDiscoveredPaths.push(...result.files);
196
+ totalFiles += result.files.length;
197
+ console.log(chalk.green(` ✅ Extracted ${result.files.length} files from zip`));
198
+ // Create source metadata
199
+ const metadata = {
200
+ source_url: sourceUrl,
201
+ source_type: source.type,
202
+ downloaded_at: new Date().toISOString(),
203
+ files_count: result.files.length,
204
+ files: result.files,
205
+ docset_id: docsetId,
206
+ content_hash: result.contentHash,
207
+ };
208
+ await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
209
+ }
172
210
  else {
173
211
  console.log(chalk.red(` ❌ Source type '${source.type}' not yet supported`));
174
212
  }
@@ -8,6 +8,7 @@ import { promises as fs } from "node:fs";
8
8
  import * as path from "node:path";
9
9
  import { execSync } from "node:child_process";
10
10
  import { findConfigPathSync, loadConfigSync, calculateLocalPath, ensureKnowledgeGitignoreSync, } from "@codemcp/knowledge-core";
11
+ import { ZipLoader, WebSourceType } from "@codemcp/knowledge-content-loader";
11
12
  export const refreshCommand = new Command("refresh")
12
13
  .description("Refresh sources for docsets")
13
14
  .argument("[docset-id]", "ID of specific docset to refresh (refresh all if not specified)")
@@ -95,6 +96,11 @@ async function refreshDocset(docset, configPath, force) {
95
96
  totalFiles += sourceFiles.files_count;
96
97
  refreshedSources.push(sourceFiles);
97
98
  }
99
+ else if (source.type === "zip") {
100
+ const sourceFiles = await refreshZipSource(source, localPath, index, docset.id, force);
101
+ totalFiles += sourceFiles.files_count;
102
+ refreshedSources.push(sourceFiles);
103
+ }
98
104
  else {
99
105
  console.log(chalk.yellow(` ⚠️ Source type '${source.type}' not yet supported, skipping`));
100
106
  }
@@ -238,6 +244,76 @@ async function refreshGitSource(webSource, localPath, index, docsetId, force) {
238
244
  await fs.rm(tempDir, { recursive: true, force: true });
239
245
  }
240
246
  }
247
+ async function refreshZipSource(source, localPath, index, docsetId, force) {
248
+ const sourceMetadataPath = path.join(localPath, `.agentic-source-${index}.json`);
249
+ let existingSourceMetadata = null;
250
+ try {
251
+ const content = await fs.readFile(sourceMetadataPath, "utf8");
252
+ existingSourceMetadata = JSON.parse(content);
253
+ }
254
+ catch {
255
+ // No existing metadata, will do full refresh
256
+ }
257
+ const sourceUrl = source.url || source.path || "";
258
+ const loader = new ZipLoader();
259
+ const webSourceConfig = {
260
+ url: sourceUrl,
261
+ type: WebSourceType.ZIP,
262
+ options: {
263
+ paths: source.paths || [],
264
+ },
265
+ };
266
+ // Check if content has changed
267
+ if (!force && existingSourceMetadata) {
268
+ try {
269
+ const currentId = await loader.getContentId(webSourceConfig);
270
+ const lastHash = existingSourceMetadata.content_hash;
271
+ if (lastHash === currentId) {
272
+ const updatedMetadata = {
273
+ ...existingSourceMetadata,
274
+ downloaded_at: new Date().toISOString(),
275
+ };
276
+ await fs.writeFile(sourceMetadataPath, JSON.stringify(updatedMetadata, null, 2));
277
+ return updatedMetadata;
278
+ }
279
+ }
280
+ catch {
281
+ // Could not check, proceed with full refresh
282
+ }
283
+ }
284
+ // Remove old files from this source (if we have metadata)
285
+ if (existingSourceMetadata) {
286
+ for (const file of existingSourceMetadata.files) {
287
+ const filePath = path.join(localPath, file);
288
+ try {
289
+ await fs.unlink(filePath);
290
+ }
291
+ catch {
292
+ // File might already be deleted, ignore
293
+ }
294
+ }
295
+ }
296
+ // Load content
297
+ const result = await loader.load(webSourceConfig, localPath);
298
+ if (!result.success) {
299
+ throw new Error(`Zip refresh failed: ${result.error}`);
300
+ }
301
+ const metadata = {
302
+ source_url: sourceUrl,
303
+ source_type: "zip",
304
+ downloaded_at: new Date().toISOString(),
305
+ files_count: result.files.length,
306
+ files: result.files,
307
+ docset_id: docsetId,
308
+ };
309
+ // Store content hash for future change detection
310
+ const metadataWithHash = {
311
+ ...metadata,
312
+ content_hash: result.contentHash,
313
+ };
314
+ await fs.writeFile(sourceMetadataPath, JSON.stringify(metadataWithHash, null, 2));
315
+ return metadata;
316
+ }
241
317
  // Reuse utility functions from init.ts
242
318
  async function findMarkdownFiles(dir) {
243
319
  const files = [];
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge-cli",
3
- "version": "1.0.17",
3
+ "version": "1.0.18",
4
4
  "description": "Command-line interface for agentic knowledge web content management",
5
5
  "type": "module",
6
6
  "main": "dist/exports.js",
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Shared file filtering utilities for documentation content extraction (REQ-18)
3
+ */
4
+ /**
5
+ * Determine if a file is considered documentation content (REQ-18)
6
+ * @param filePath - Path to the file to check
7
+ * @returns True if file should be included as documentation
8
+ */
9
+ export declare function isDocumentationFile(filePath: string): boolean;
10
+ /**
11
+ * Filter list of files to only include documentation-relevant files (REQ-18)
12
+ * @param files - Array of file paths to filter
13
+ * @returns Array of file paths that are considered documentation
14
+ */
15
+ export declare function filterDocumentationFiles(files: string[]): string[];
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Shared file filtering utilities for documentation content extraction (REQ-18)
3
+ */
4
+ import * as path from "node:path";
5
+ /**
6
+ * Determine if a file is considered documentation content (REQ-18)
7
+ * @param filePath - Path to the file to check
8
+ * @returns True if file should be included as documentation
9
+ */
10
+ export function isDocumentationFile(filePath) {
11
+ const filename = path.basename(filePath);
12
+ const extension = path.extname(filePath).toLowerCase();
13
+ const directory = path.dirname(filePath);
14
+ // Exclude project metadata files (REQ-18)
15
+ const metadataFiles = /^(CHANGELOG|LICENSE|CONTRIBUTING|AUTHORS|CODE_OF_CONDUCT)/i;
16
+ if (metadataFiles.test(filename)) {
17
+ return false;
18
+ }
19
+ // Normalize directory path for consistent matching (use forward slashes)
20
+ const normalizedDir = directory.split(path.sep).join("/");
21
+ const pathParts = normalizedDir.split("/");
22
+ // Exclude build, dependency, and development directories (REQ-18)
23
+ // Use exact directory name matching, not substring matching
24
+ const excludedDirs = [
25
+ "node_modules",
26
+ "vendor",
27
+ ".git",
28
+ "build",
29
+ "dist",
30
+ "target",
31
+ ".cache",
32
+ "__tests__",
33
+ "test",
34
+ "tests",
35
+ ".github",
36
+ ".vscode",
37
+ ".idea",
38
+ ];
39
+ // Check if any path segment matches excluded directories
40
+ for (const excludedDir of excludedDirs) {
41
+ if (pathParts.includes(excludedDir)) {
42
+ return false;
43
+ }
44
+ }
45
+ // Include README files anywhere (REQ-18)
46
+ if (/^README/i.test(filename)) {
47
+ return true;
48
+ }
49
+ // Include documentation file extensions anywhere, regardless of directory (REQ-18)
50
+ const docExtensions = [".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc"];
51
+ if (docExtensions.includes(extension)) {
52
+ return true;
53
+ }
54
+ // Special case: examples/samples directory - include ALL file types (Issue #12)
55
+ // These directories contain code that demonstrates usage patterns
56
+ const isInExamples = /\b(examples?|samples?)\b/i.test(directory);
57
+ if (isInExamples) {
58
+ // In examples/samples, exclude only binary files
59
+ const excludedInExamples = [
60
+ ".exe",
61
+ ".bin",
62
+ ".so",
63
+ ".dll",
64
+ ".dylib",
65
+ ".a",
66
+ ".o",
67
+ ".obj",
68
+ ];
69
+ return !excludedInExamples.includes(extension);
70
+ }
71
+ return false;
72
+ }
73
+ /**
74
+ * Filter list of files to only include documentation-relevant files (REQ-18)
75
+ * @param files - Array of file paths to filter
76
+ * @returns Array of file paths that are considered documentation
77
+ */
78
+ export function filterDocumentationFiles(files) {
79
+ return files.filter((file) => isDocumentationFile(file));
80
+ }
@@ -51,18 +51,6 @@ export declare class GitRepoLoader extends ContentLoader {
51
51
  * Clean up temporary directory
52
52
  */
53
53
  private cleanupTempDirectory;
54
- /**
55
- * Filter list of files to only include documentation-relevant files (REQ-18)
56
- * @param files - Array of file paths to filter
57
- * @returns Array of file paths that are considered documentation
58
- */
59
- private filterDocumentationFiles;
60
- /**
61
- * Determine if a file is considered documentation content (REQ-18)
62
- * @param filePath - Path to the file to check
63
- * @returns True if file should be included as documentation
64
- */
65
- private isDocumentationFile;
66
54
  /**
67
55
  * Extract only documentation files from source directory (REQ-18)
68
56
  * @param sourceDir - Source directory to scan
@@ -7,6 +7,7 @@ import { execSync } from "node:child_process";
7
7
  import { ContentLoader } from "./loader.js";
8
8
  import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
9
9
  import * as crypto from "node:crypto";
10
+ import { filterDocumentationFiles } from "./file-filter.js";
10
11
  /**
11
12
  * Content loader for Git repositories (GitHub, GitLab, any Git repo)
12
13
  */
@@ -244,82 +245,6 @@ export class GitRepoLoader extends ContentLoader {
244
245
  console.warn(`Warning: Could not clean up temp directory ${tempDir}: ${error instanceof Error ? error.message : String(error)}`);
245
246
  }
246
247
  }
247
- /**
248
- * Filter list of files to only include documentation-relevant files (REQ-18)
249
- * @param files - Array of file paths to filter
250
- * @returns Array of file paths that are considered documentation
251
- */
252
- filterDocumentationFiles(files) {
253
- return files.filter((file) => this.isDocumentationFile(file));
254
- }
255
- /**
256
- * Determine if a file is considered documentation content (REQ-18)
257
- * @param filePath - Path to the file to check
258
- * @returns True if file should be included as documentation
259
- */
260
- isDocumentationFile(filePath) {
261
- const filename = path.basename(filePath);
262
- const extension = path.extname(filePath).toLowerCase();
263
- const directory = path.dirname(filePath);
264
- // Exclude project metadata files (REQ-18)
265
- const metadataFiles = /^(CHANGELOG|LICENSE|CONTRIBUTING|AUTHORS|CODE_OF_CONDUCT)/i;
266
- if (metadataFiles.test(filename)) {
267
- return false;
268
- }
269
- // Normalize directory path for consistent matching (use forward slashes)
270
- const normalizedDir = directory.split(path.sep).join("/");
271
- const pathParts = normalizedDir.split("/");
272
- // Exclude build, dependency, and development directories (REQ-18)
273
- // Use exact directory name matching, not substring matching
274
- const excludedDirs = [
275
- "node_modules",
276
- "vendor",
277
- ".git",
278
- "build",
279
- "dist",
280
- "target",
281
- ".cache",
282
- "__tests__",
283
- "test",
284
- "tests",
285
- ".github",
286
- ".vscode",
287
- ".idea",
288
- ];
289
- // Check if any path segment matches excluded directories
290
- for (const excludedDir of excludedDirs) {
291
- if (pathParts.includes(excludedDir)) {
292
- return false;
293
- }
294
- }
295
- // Include README files anywhere (REQ-18)
296
- if (/^README/i.test(filename)) {
297
- return true;
298
- }
299
- // Include documentation file extensions anywhere, regardless of directory (REQ-18)
300
- const docExtensions = [".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc"];
301
- if (docExtensions.includes(extension)) {
302
- return true;
303
- }
304
- // Special case: examples/samples directory - include ALL file types (Issue #12)
305
- // These directories contain code that demonstrates usage patterns
306
- const isInExamples = /\b(examples?|samples?)\b/i.test(directory);
307
- if (isInExamples) {
308
- // In examples/samples, exclude only binary files
309
- const excludedInExamples = [
310
- ".exe",
311
- ".bin",
312
- ".so",
313
- ".dll",
314
- ".dylib",
315
- ".a",
316
- ".o",
317
- ".obj",
318
- ];
319
- return !excludedInExamples.includes(extension);
320
- }
321
- return false;
322
- }
323
248
  /**
324
249
  * Extract only documentation files from source directory (REQ-18)
325
250
  * @param sourceDir - Source directory to scan
@@ -330,7 +255,7 @@ export class GitRepoLoader extends ContentLoader {
330
255
  // First, scan all files in the repository
331
256
  const allFiles = await this.scanAllFiles(sourceDir);
332
257
  // Filter to only documentation files
333
- const docFiles = this.filterDocumentationFiles(allFiles);
258
+ const docFiles = filterDocumentationFiles(allFiles);
334
259
  // Copy the filtered files
335
260
  for (const filePath of docFiles) {
336
261
  const relativePath = path.relative(sourceDir, filePath);
@@ -3,7 +3,9 @@
3
3
  */
4
4
  export { ContentLoader } from "./loader.js";
5
5
  export { GitRepoLoader } from "./git-repo-loader.js";
6
+ export { ZipLoader } from "./zip-loader.js";
6
7
  export { DocumentationSiteLoader } from "./documentation-site-loader.js";
7
8
  export { ApiDocumentationLoader } from "./api-documentation-loader.js";
8
9
  export { ContentProcessor } from "./content-processor.js";
9
10
  export { MetadataManager } from "./metadata-manager.js";
11
+ export { isDocumentationFile, filterDocumentationFiles, } from "./file-filter.js";
@@ -3,7 +3,9 @@
3
3
  */
4
4
  export { ContentLoader } from "./loader.js";
5
5
  export { GitRepoLoader } from "./git-repo-loader.js";
6
+ export { ZipLoader } from "./zip-loader.js";
6
7
  export { DocumentationSiteLoader } from "./documentation-site-loader.js";
7
8
  export { ApiDocumentationLoader } from "./api-documentation-loader.js";
8
9
  export { ContentProcessor } from "./content-processor.js";
9
10
  export { MetadataManager } from "./metadata-manager.js";
11
+ export { isDocumentationFile, filterDocumentationFiles, } from "./file-filter.js";
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Zip file content loader
3
+ */
4
+ import { ContentLoader, type LoadResult } from "./loader.js";
5
+ import { WebSourceConfig } from "../types.js";
6
+ /**
7
+ * Content loader for zip files (local or remote)
8
+ */
9
+ export declare class ZipLoader extends ContentLoader {
10
+ /**
11
+ * Check if this loader can handle the given web source type
12
+ */
13
+ canHandle(webSource: WebSourceConfig): boolean;
14
+ /**
15
+ * Validate the web source configuration
16
+ */
17
+ validateConfig(webSource: WebSourceConfig): true | string;
18
+ /**
19
+ * Load content from a zip file
20
+ */
21
+ load(webSource: WebSourceConfig, targetPath: string): Promise<LoadResult>;
22
+ /**
23
+ * Get content identifier for change detection
24
+ */
25
+ getContentId(webSource: WebSourceConfig): Promise<string>;
26
+ /**
27
+ * Get headers from remote URL using HEAD request
28
+ */
29
+ private getRemoteHeaders;
30
+ /**
31
+ * Determine if the source is a remote URL or local path
32
+ */
33
+ private isRemoteUrl;
34
+ /**
35
+ * Resolve the zip file path - download if remote, return as-is if local
36
+ */
37
+ private resolveZipFile;
38
+ /**
39
+ * Download a zip file from a remote URL
40
+ */
41
+ private downloadZip;
42
+ /**
43
+ * Extract a zip file to a directory using adm-zip
44
+ */
45
+ private extractZip;
46
+ /**
47
+ * If the extracted contents have a single root directory and no files at root,
48
+ * move that directory's contents one level up.
49
+ */
50
+ private flattenSingleRoot;
51
+ /**
52
+ * Extract content from extracted zip to target directory
53
+ */
54
+ private extractContent;
55
+ /**
56
+ * Extract only documentation files from source directory
57
+ */
58
+ private extractDocumentationFiles;
59
+ /**
60
+ * Copy directory recursively
61
+ */
62
+ private copyDirectory;
63
+ /**
64
+ * Recursively scan all files in a directory
65
+ */
66
+ private scanAllFiles;
67
+ /**
68
+ * Generate content hash for change detection
69
+ */
70
+ private generateContentHash;
71
+ /**
72
+ * Create a temporary directory
73
+ */
74
+ private createTempDirectory;
75
+ /**
76
+ * Clean up temporary directory
77
+ */
78
+ private cleanupTempDirectory;
79
+ }
@@ -0,0 +1,348 @@
1
+ /**
2
+ * Zip file content loader
3
+ */
4
+ import { promises as fs } from "node:fs";
5
+ import * as path from "node:path";
6
+ import * as crypto from "node:crypto";
7
+ import https from "node:https";
8
+ import http from "node:http";
9
+ import AdmZip from "adm-zip";
10
+ import { ContentLoader } from "./loader.js";
11
+ import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
12
+ import { filterDocumentationFiles } from "./file-filter.js";
13
+ /**
14
+ * Content loader for zip files (local or remote)
15
+ */
16
+ export class ZipLoader extends ContentLoader {
17
+ /**
18
+ * Check if this loader can handle the given web source type
19
+ */
20
+ canHandle(webSource) {
21
+ return webSource.type === WebSourceType.ZIP;
22
+ }
23
+ /**
24
+ * Validate the web source configuration
25
+ */
26
+ validateConfig(webSource) {
27
+ if (!webSource.url) {
28
+ return "Zip source must have a URL (remote) or local path";
29
+ }
30
+ return true;
31
+ }
32
+ /**
33
+ * Load content from a zip file
34
+ */
35
+ async load(webSource, targetPath) {
36
+ try {
37
+ const options = webSource.options;
38
+ const tempDir = await this.createTempDirectory();
39
+ try {
40
+ // Get the zip file (download if remote, or use local path)
41
+ const zipFilePath = await this.resolveZipFile(webSource.url, tempDir);
42
+ // Extract to temp directory
43
+ const extractDir = path.join(tempDir, "extracted");
44
+ await fs.mkdir(extractDir, { recursive: true });
45
+ this.extractZip(zipFilePath, extractDir);
46
+ // Flatten single root directory
47
+ await this.flattenSingleRoot(extractDir);
48
+ // Extract specified paths or all documentation content
49
+ const extractedFiles = await this.extractContent(extractDir, targetPath, options?.paths);
50
+ // Generate content hash
51
+ const contentHash = await this.generateContentHash(targetPath, extractedFiles);
52
+ return {
53
+ success: true,
54
+ files: extractedFiles,
55
+ contentHash,
56
+ };
57
+ }
58
+ finally {
59
+ await this.cleanupTempDirectory(tempDir);
60
+ }
61
+ }
62
+ catch (error) {
63
+ const errorMessage = error instanceof Error ? error.message : String(error);
64
+ return {
65
+ success: false,
66
+ files: [],
67
+ contentHash: "",
68
+ error: `Zip loading failed: ${errorMessage}`,
69
+ };
70
+ }
71
+ }
72
+ /**
73
+ * Get content identifier for change detection
74
+ */
75
+ async getContentId(webSource) {
76
+ try {
77
+ if (this.isRemoteUrl(webSource.url)) {
78
+ // For remote URLs, try HEAD request for ETag/Last-Modified
79
+ const headers = await this.getRemoteHeaders(webSource.url);
80
+ const etag = headers["etag"] || "";
81
+ const lastModified = headers["last-modified"] || "";
82
+ const identifier = etag || lastModified || webSource.url;
83
+ return crypto
84
+ .createHash("sha256")
85
+ .update(`${webSource.url}:${identifier}`)
86
+ .digest("hex");
87
+ }
88
+ else {
89
+ // For local files, hash the file content
90
+ const content = await fs.readFile(webSource.url);
91
+ return crypto.createHash("sha256").update(content).digest("hex");
92
+ }
93
+ }
94
+ catch {
95
+ // Fallback to URL-based hash
96
+ return crypto.createHash("sha256").update(webSource.url).digest("hex");
97
+ }
98
+ }
99
+ /**
100
+ * Get headers from remote URL using HEAD request
101
+ */
102
+ getRemoteHeaders(url) {
103
+ return new Promise((resolve, reject) => {
104
+ const protocol = url.startsWith("https") ? https : http;
105
+ const request = protocol.request(url, { method: "HEAD" }, (response) => {
106
+ const headers = {};
107
+ if (response.headers) {
108
+ for (const [key, value] of Object.entries(response.headers)) {
109
+ if (typeof value === "string") {
110
+ headers[key] = value;
111
+ }
112
+ else if (Array.isArray(value) && value.length > 0 && value[0]) {
113
+ headers[key] = value[0];
114
+ }
115
+ }
116
+ }
117
+ resolve(headers);
118
+ });
119
+ request.on("error", reject);
120
+ request.end();
121
+ });
122
+ }
123
+ /**
124
+ * Determine if the source is a remote URL or local path
125
+ */
126
+ isRemoteUrl(url) {
127
+ return url.startsWith("http://") || url.startsWith("https://");
128
+ }
129
+ /**
130
+ * Resolve the zip file path - download if remote, return as-is if local
131
+ */
132
+ async resolveZipFile(url, tempDir) {
133
+ if (this.isRemoteUrl(url)) {
134
+ return this.downloadZip(url, tempDir);
135
+ }
136
+ // Local file - verify it exists
137
+ try {
138
+ await fs.access(url);
139
+ return url;
140
+ }
141
+ catch {
142
+ throw new WebSourceError(WebSourceErrorType.ZIP_ERROR, `Local zip file not found: ${url}`, { url });
143
+ }
144
+ }
145
+ /**
146
+ * Download a zip file from a remote URL
147
+ */
148
+ async downloadZip(url, tempDir) {
149
+ const zipPath = path.join(tempDir, "download.zip");
150
+ return new Promise((resolve, reject) => {
151
+ const protocol = url.startsWith("https") ? https : http;
152
+ const request = protocol.get(url, async (response) => {
153
+ if (response.statusCode === undefined || response.statusCode >= 400) {
154
+ reject(new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`));
155
+ return;
156
+ }
157
+ try {
158
+ const chunks = [];
159
+ response.on("data", (chunk) => {
160
+ chunks.push(chunk);
161
+ });
162
+ response.on("end", async () => {
163
+ try {
164
+ const buffer = Buffer.concat(chunks);
165
+ await fs.writeFile(zipPath, buffer);
166
+ resolve(zipPath);
167
+ }
168
+ catch (error) {
169
+ reject(error);
170
+ }
171
+ });
172
+ }
173
+ catch (error) {
174
+ reject(error);
175
+ }
176
+ });
177
+ request.on("error", (error) => {
178
+ reject(new WebSourceError(WebSourceErrorType.ZIP_ERROR, `Failed to download zip from ${url}: ${error instanceof Error ? error.message : String(error)}`, { url }));
179
+ });
180
+ });
181
+ }
182
+ /**
183
+ * Extract a zip file to a directory using adm-zip
184
+ */
185
+ extractZip(zipPath, targetDir) {
186
+ try {
187
+ const zip = new AdmZip(zipPath);
188
+ zip.extractAllTo(targetDir, true);
189
+ }
190
+ catch (error) {
191
+ throw new WebSourceError(WebSourceErrorType.ZIP_ERROR, `Failed to extract zip: ${error instanceof Error ? error.message : String(error)}`, { zipPath });
192
+ }
193
+ }
194
+ /**
195
+ * If the extracted contents have a single root directory and no files at root,
196
+ * move that directory's contents one level up.
197
+ */
198
+ async flattenSingleRoot(extractDir) {
199
+ const entries = await fs.readdir(extractDir, { withFileTypes: true });
200
+ const directories = entries.filter((e) => e.isDirectory());
201
+ const files = entries.filter((e) => e.isFile());
202
+ if (directories.length === 1 && files.length === 0) {
203
+ const singleDir = path.join(extractDir, directories[0].name);
204
+ const innerEntries = await fs.readdir(singleDir);
205
+ // Move all contents up one level
206
+ for (const entry of innerEntries) {
207
+ const src = path.join(singleDir, entry);
208
+ const dest = path.join(extractDir, entry);
209
+ await fs.rename(src, dest);
210
+ }
211
+ // Remove the now-empty directory
212
+ await fs.rmdir(singleDir);
213
+ }
214
+ }
215
+ /**
216
+ * Extract content from extracted zip to target directory
217
+ */
218
+ async extractContent(sourceDir, targetDir, paths) {
219
+ await fs.mkdir(targetDir, { recursive: true });
220
+ const extractedFiles = [];
221
+ if (paths && paths.length > 0) {
222
+ // Extract only specified paths
223
+ for (const relPath of paths) {
224
+ const sourcePath = path.join(sourceDir, relPath);
225
+ const targetPath = path.join(targetDir, relPath);
226
+ try {
227
+ const stats = await fs.stat(sourcePath);
228
+ if (stats.isDirectory()) {
229
+ await this.copyDirectory(sourcePath, targetPath, extractedFiles);
230
+ }
231
+ else if (stats.isFile()) {
232
+ await fs.mkdir(path.dirname(targetPath), { recursive: true });
233
+ await fs.copyFile(sourcePath, targetPath);
234
+ extractedFiles.push(relPath);
235
+ }
236
+ }
237
+ catch (error) {
238
+ console.warn(`Warning: Could not extract ${relPath}: ${error instanceof Error ? error.message : String(error)}`);
239
+ }
240
+ }
241
+ }
242
+ else {
243
+ // Use smart filtering to extract only documentation files
244
+ await this.extractDocumentationFiles(sourceDir, targetDir, extractedFiles);
245
+ }
246
+ return extractedFiles;
247
+ }
248
+ /**
249
+ * Extract only documentation files from source directory
250
+ */
251
+ async extractDocumentationFiles(sourceDir, targetDir, extractedFiles) {
252
+ const allFiles = await this.scanAllFiles(sourceDir);
253
+ const docFiles = filterDocumentationFiles(allFiles);
254
+ for (const filePath of docFiles) {
255
+ const relativePath = path.relative(sourceDir, filePath);
256
+ const targetPath = path.join(targetDir, relativePath);
257
+ try {
258
+ await fs.mkdir(path.dirname(targetPath), { recursive: true });
259
+ await fs.copyFile(filePath, targetPath);
260
+ extractedFiles.push(relativePath);
261
+ }
262
+ catch (error) {
263
+ console.warn(`Warning: Could not copy ${relativePath}: ${error instanceof Error ? error.message : String(error)}`);
264
+ }
265
+ }
266
+ }
267
+ /**
268
+ * Copy directory recursively
269
+ */
270
+ async copyDirectory(source, target, fileList) {
271
+ await fs.mkdir(target, { recursive: true });
272
+ const items = await fs.readdir(source);
273
+ for (const item of items) {
274
+ const sourcePath = path.join(source, item);
275
+ const targetPath = path.join(target, item);
276
+ const stats = await fs.stat(sourcePath);
277
+ if (stats.isDirectory()) {
278
+ await this.copyDirectory(sourcePath, targetPath, fileList);
279
+ }
280
+ else {
281
+ await fs.copyFile(sourcePath, targetPath);
282
+ const relativePath = path.relative(target, targetPath);
283
+ fileList.push(relativePath);
284
+ }
285
+ }
286
+ }
287
+ /**
288
+ * Recursively scan all files in a directory
289
+ */
290
+ async scanAllFiles(dir) {
291
+ const files = [];
292
+ async function scan(currentDir) {
293
+ const items = await fs.readdir(currentDir);
294
+ for (const item of items) {
295
+ if (item === ".git")
296
+ continue;
297
+ const fullPath = path.join(currentDir, item);
298
+ const stat = await fs.stat(fullPath);
299
+ if (stat.isDirectory()) {
300
+ await scan(fullPath);
301
+ }
302
+ else if (stat.isFile()) {
303
+ files.push(fullPath);
304
+ }
305
+ }
306
+ }
307
+ await scan(dir);
308
+ return files;
309
+ }
310
+ /**
311
+ * Generate content hash for change detection
312
+ */
313
+ async generateContentHash(targetDir, files) {
314
+ const hash = crypto.createHash("sha256");
315
+ const sortedFiles = files.slice().sort();
316
+ for (const file of sortedFiles) {
317
+ const filePath = path.join(targetDir, file);
318
+ try {
319
+ const content = await fs.readFile(filePath);
320
+ hash.update(file);
321
+ hash.update(content);
322
+ }
323
+ catch (error) {
324
+ console.warn(`Warning: Could not hash ${file}: ${error instanceof Error ? error.message : String(error)}`);
325
+ }
326
+ }
327
+ return hash.digest("hex");
328
+ }
329
+ /**
330
+ * Create a temporary directory
331
+ */
332
+ async createTempDirectory() {
333
+ const tempDir = path.join(process.cwd(), ".tmp", `zip-extract-${Date.now()}-${Math.random().toString(36).slice(2)}`);
334
+ await fs.mkdir(tempDir, { recursive: true });
335
+ return tempDir;
336
+ }
337
+ /**
338
+ * Clean up temporary directory
339
+ */
340
+ async cleanupTempDirectory(tempDir) {
341
+ try {
342
+ await fs.rm(tempDir, { recursive: true, force: true });
343
+ }
344
+ catch (error) {
345
+ console.warn(`Warning: Could not clean up temp directory ${tempDir}: ${error instanceof Error ? error.message : String(error)}`);
346
+ }
347
+ }
348
+ }
@@ -22,7 +22,8 @@ export interface DocsetConfig {
22
22
  export declare enum WebSourceType {
23
23
  GIT_REPO = "git_repo",
24
24
  DOCUMENTATION_SITE = "documentation_site",
25
- API_DOCUMENTATION = "api_documentation"
25
+ API_DOCUMENTATION = "api_documentation",
26
+ ZIP = "zip"
26
27
  }
27
28
  /**
28
29
  * Configuration for Git repository web sources
@@ -55,16 +56,23 @@ export interface ApiDocumentationOptions {
55
56
  /** Packages or modules to include */
56
57
  include_packages?: string[];
57
58
  }
59
+ /**
60
+ * Configuration for zip file web sources
61
+ */
62
+ export interface ZipOptions {
63
+ /** Specific paths to extract from the zip */
64
+ paths?: string[];
65
+ }
58
66
  /**
59
67
  * Configuration for a single web source
60
68
  */
61
69
  export interface WebSourceConfig {
62
- /** URL of the web source */
70
+ /** URL of the web source (or local path for zip sources) */
63
71
  url: string;
64
72
  /** Type of web source */
65
73
  type: WebSourceType;
66
74
  /** Type-specific options */
67
- options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions;
75
+ options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions | ZipOptions;
68
76
  }
69
77
  /**
70
78
  * Metadata for a single web source download
@@ -108,6 +116,7 @@ export declare const METADATA_FILENAME = ".agentic-metadata.json";
108
116
  export declare enum WebSourceErrorType {
109
117
  WEB_SOURCE_ERROR = "WEB_SOURCE_ERROR",
110
118
  GIT_REPO_ERROR = "GIT_REPO_ERROR",
119
+ ZIP_ERROR = "ZIP_ERROR",
111
120
  NOT_IMPLEMENTED = "NOT_IMPLEMENTED"
112
121
  }
113
122
  /**
@@ -9,6 +9,7 @@ export var WebSourceType;
9
9
  WebSourceType["GIT_REPO"] = "git_repo";
10
10
  WebSourceType["DOCUMENTATION_SITE"] = "documentation_site";
11
11
  WebSourceType["API_DOCUMENTATION"] = "api_documentation";
12
+ WebSourceType["ZIP"] = "zip";
12
13
  })(WebSourceType || (WebSourceType = {}));
13
14
  /**
14
15
  * Metadata file name pattern
@@ -21,6 +22,7 @@ export var WebSourceErrorType;
21
22
  (function (WebSourceErrorType) {
22
23
  WebSourceErrorType["WEB_SOURCE_ERROR"] = "WEB_SOURCE_ERROR";
23
24
  WebSourceErrorType["GIT_REPO_ERROR"] = "GIT_REPO_ERROR";
25
+ WebSourceErrorType["ZIP_ERROR"] = "ZIP_ERROR";
24
26
  WebSourceErrorType["NOT_IMPLEMENTED"] = "NOT_IMPLEMENTED";
25
27
  })(WebSourceErrorType || (WebSourceErrorType = {}));
26
28
  /**
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge-content-loader",
3
- "version": "1.0.17",
3
+ "version": "1.0.18",
4
4
  "description": "Web content loading and metadata management for agentic knowledge system",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -29,10 +29,12 @@
29
29
  "typecheck": "tsc --noEmit"
30
30
  },
31
31
  "dependencies": {
32
+ "adm-zip": "0.5.16",
32
33
  "simple-git": "^3.22.0"
33
34
  },
34
35
  "devDependencies": {
35
36
  "@eslint/js": "^9.34.0",
37
+ "@types/adm-zip": "0.5.7",
36
38
  "@types/node": "^24.3.0",
37
39
  "eslint": "^9.34.0",
38
40
  "rimraf": "^6.0.1",
@@ -207,6 +207,30 @@ function validateSource(source) {
207
207
  }
208
208
  return true;
209
209
  }
210
+ if (type === "zip") {
211
+ const hasPath = obj["path"] !== undefined &&
212
+ typeof obj["path"] === "string" &&
213
+ obj["path"].trim() !== "";
214
+ const hasUrl = obj["url"] !== undefined &&
215
+ typeof obj["url"] === "string" &&
216
+ obj["url"].trim() !== "";
217
+ // Must have exactly one of path or url
218
+ if (hasPath === hasUrl) {
219
+ return false;
220
+ }
221
+ // Optional paths field
222
+ if (obj["paths"] !== undefined) {
223
+ if (!Array.isArray(obj["paths"])) {
224
+ return false;
225
+ }
226
+ for (const path of obj["paths"]) {
227
+ if (typeof path !== "string" || path.trim() === "") {
228
+ return false;
229
+ }
230
+ }
231
+ }
232
+ return true;
233
+ }
210
234
  // Unknown source type
211
235
  return false;
212
236
  }
@@ -46,6 +46,10 @@ export function calculateLocalPath(docset, configPath) {
46
46
  // For git repos, use standardized path: .knowledge/docsets/{id}
47
47
  return join(configDir, "docsets", docset.id);
48
48
  }
49
+ if (primarySource.type === "zip") {
50
+ // For zip sources, use standardized path: .knowledge/docsets/{id}
51
+ return join(configDir, "docsets", docset.id);
52
+ }
49
53
  throw new Error(`Unsupported source type: ${primarySource.type}`);
50
54
  }
51
55
  catch (error) {
@@ -87,6 +91,10 @@ export async function calculateLocalPathWithSymlinks(docset, configPath) {
87
91
  // For git repos, use standardized path: .knowledge/docsets/{id}
88
92
  return join(configDir, "docsets", docset.id);
89
93
  }
94
+ if (primarySource.type === "zip") {
95
+ // For zip sources, use standardized path: .knowledge/docsets/{id}
96
+ return join(configDir, "docsets", docset.id);
97
+ }
90
98
  throw new Error(`Unsupported source type: ${primarySource.type}`);
91
99
  }
92
100
  /**
@@ -30,10 +30,22 @@ export interface GitRepoSourceConfig extends BaseSourceConfig {
30
30
  /** Specific paths to extract (optional) */
31
31
  paths?: string[];
32
32
  }
33
+ /**
34
+ * Zip file source configuration
35
+ */
36
+ export interface ZipSourceConfig extends BaseSourceConfig {
37
+ type: "zip";
38
+ /** Local path to zip file (mutually exclusive with url) */
39
+ path?: string;
40
+ /** Remote URL to download zip from (mutually exclusive with path) */
41
+ url?: string;
42
+ /** Specific paths to extract (optional) */
43
+ paths?: string[];
44
+ }
33
45
  /**
34
46
  * Union type for all source configurations
35
47
  */
36
- export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig;
48
+ export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig | ZipSourceConfig;
37
49
  /**
38
50
  * Configuration for a single docset
39
51
  */
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge-core",
3
- "version": "1.0.17",
3
+ "version": "1.0.18",
4
4
  "description": "Core functionality for agentic knowledge guidance system",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge",
3
- "version": "1.0.17",
3
+ "version": "1.0.18",
4
4
  "description": "MCP server implementation for agentic knowledge guidance system",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",