npm - agentic-knowledge-mcp - Versions diffs - 1.0.17 → 1.1.0 - Mend

agentic-knowledge-mcp 1.0.17 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/package.json +4 -4
package/packages/cli/dist/commands/create.js +59 -4
package/packages/cli/dist/commands/init.js +39 -1
package/packages/cli/dist/commands/refresh.js +76 -0
package/packages/cli/package.json +1 -1
package/packages/content-loader/dist/content/archive-loader.d.ts +87 -0
package/packages/content-loader/dist/content/archive-loader.js +391 -0
package/packages/content-loader/dist/content/file-filter.d.ts +15 -0
package/packages/content-loader/dist/content/file-filter.js +80 -0
package/packages/content-loader/dist/content/git-repo-loader.d.ts +0 -12
package/packages/content-loader/dist/content/git-repo-loader.js +2 -77
package/packages/content-loader/dist/content/index.d.ts +2 -0
package/packages/content-loader/dist/content/index.js +2 -0
package/packages/content-loader/dist/types.d.ts +12 -3
package/packages/content-loader/dist/types.js +2 -0
package/packages/content-loader/package.json +6 -2
package/packages/core/dist/config/loader.js +24 -0
package/packages/core/dist/paths/calculator.js +8 -0
package/packages/core/dist/types.d.ts +13 -1
package/packages/core/package.json +1 -1
package/packages/mcp-server/package.json +1 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentic-knowledge-mcp",
-  "version": "1.0.17",
+  "version": "1.1.0",
   "description": "A Model Context Protocol server for agentic knowledge guidance with web-based documentation loading and intelligent search instructions",
   "type": "module",
   "main": "packages/cli/dist/index.js",
@@ -29,9 +29,9 @@
     "commander": "^12.0.0",
     "js-yaml": "4.1.0",
     "ora": "^8.0.1",
-    "@codemcp/knowledge": "1.0.17",
-    "@codemcp/knowledge-content-loader": "1.0.17",
-    "@codemcp/knowledge-core": "1.0.17"
+    "@codemcp/knowledge": "1.1.0",
+    "@codemcp/knowledge-content-loader": "1.1.0",
+    "@codemcp/knowledge-core": "1.1.0"
   },
   "devDependencies": {
     "@eslint/js": "^9.34.0",

package/packages/cli/dist/commands/create.js CHANGED Viewed

@@ -5,15 +5,16 @@ import { Command } from "commander";
 import chalk from "chalk";
 import { promises as fs } from "node:fs";
 import * as path from "node:path";
+import { URL } from "node:url";
 import { ConfigManager } from "@codemcp/knowledge-core";
 export const createCommand = new Command("create")
     .description("Create a new docset using presets")
-    .requiredOption("--preset <type>", "Preset type: git-repo or local-folder")
+    .requiredOption("--preset <type>", "Preset type: git-repo, local-folder, or archive")
     .requiredOption("--id <id>", "Unique docset ID")
     .requiredOption("--name <name>", "Human-readable docset name")
     .option("--description <desc>", "Docset description")
-    .option("--url <url>", "Git repository URL (required for git-repo preset)")
-    .option("--path <path>", "Local folder path (required for local-folder preset)")
+    .option("--url <url>", "Git repository URL (git-repo) or archive file URL (archive preset)")
+    .option("--path <path>", "Local folder path (local-folder) or local archive file path (archive preset)")
     .option("--branch <branch>", "Git branch (default: main)", "main")
     .action(async (options) => {
     try {
@@ -47,8 +48,11 @@ export const createCommand = new Command("create")
         else if (options.preset === "local-folder") {
             newDocset = await createLocalFolderDocset(options);
         }
+        else if (options.preset === "archive") {
+            newDocset = await createArchiveDocset(options);
+        }
         else {
-            throw new Error(`Unknown preset: ${options.preset}. Use 'git-repo' or 'local-folder'`);
+            throw new Error(`Unknown preset: ${options.preset}. Use 'git-repo', 'local-folder', or 'archive'`);
         }
         // Add to config
         config.docsets.push(newDocset);
@@ -111,3 +115,54 @@ async function createLocalFolderDocset(options) {
         ],
     };
 }
+async function createArchiveDocset(options) {
+    if (!options.path && !options.url) {
+        throw new Error("Either --path or --url is required for archive preset");
+    }
+    // If path is provided, validate it exists
+    if (options.path) {
+        const fullPath = path.resolve(options.path);
+        try {
+            const stat = await fs.stat(fullPath);
+            if (!stat.isFile()) {
+                throw new Error(`Path is not a file: ${options.path}`);
+            }
+            const lowerPath = options.path.toLowerCase();
+            if (!lowerPath.endsWith(".zip") &&
+                !lowerPath.endsWith(".tar.gz") &&
+                !lowerPath.endsWith(".tgz")) {
+                throw new Error(`File is not a supported archive format (zip, tar.gz): ${options.path}`);
+            }
+        }
+        catch {
+            throw new Error(`Path does not exist or is invalid: ${options.path}`);
+        }
+    }
+    // If URL is provided, validate it's a valid URL
+    if (options.url) {
+        try {
+            new URL(options.url);
+        }
+        catch {
+            throw new Error(`Invalid URL format: ${options.url}`);
+        }
+    }
+    const source = {
+        type: "archive",
+    };
+    if (options.path) {
+        source.path = options.path;
+    }
+    if (options.url) {
+        source.url = options.url;
+    }
+    if (options.paths) {
+        source.paths = options.paths.split(",");
+    }
+    return {
+        id: options.id,
+        name: options.name,
+        description: options.description || `Archive: ${options.path || options.url}`,
+        sources: [source],
+    };
+}

package/packages/cli/dist/commands/init.js CHANGED Viewed

@@ -6,7 +6,7 @@ import chalk from "chalk";
 import { promises as fs } from "node:fs";
 import * as path from "node:path";
 import { ConfigManager, calculateLocalPath, ensureKnowledgeGitignoreSync, discoverDirectoryPatterns, safelyClearDirectory, getDirectoryInfo, } from "@codemcp/knowledge-core";
-import { GitRepoLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
+import { GitRepoLoader, ArchiveLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
 export const initCommand = new Command("init")
     .description("Initialize sources for a docset from configuration")
     .argument("<docset-id>", "ID of the docset to initialize")
@@ -169,6 +169,44 @@ export const initCommand = new Command("init")
                 };
                 await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
             }
+            else if (source.type === "archive") {
+                // Handle archive file initialization (zip, tar.gz, etc.)
+                const loader = new ArchiveLoader();
+                const sourceUrl = source.url || source.path || "";
+                console.log(chalk.gray(`  Using ArchiveLoader for archive extraction`));
+                const webSourceConfig = {
+                    url: sourceUrl,
+                    type: WebSourceType.ARCHIVE,
+                    options: {
+                        paths: source.paths || [],
+                    },
+                };
+                // Validate configuration
+                const validation = loader.validateConfig(webSourceConfig);
+                if (validation !== true) {
+                    throw new Error(`Invalid archive source configuration: ${validation}`);
+                }
+                // Load content using ArchiveLoader
+                const result = await loader.load(webSourceConfig, localPath);
+                if (!result.success) {
+                    throw new Error(`Archive loading failed: ${result.error}`);
+                }
+                // Collect discovered paths for config update
+                allDiscoveredPaths.push(...result.files);
+                totalFiles += result.files.length;
+                console.log(chalk.green(`    ✅ Extracted ${result.files.length} files from archive`));
+                // Create source metadata
+                const metadata = {
+                    source_url: sourceUrl,
+                    source_type: source.type,
+                    downloaded_at: new Date().toISOString(),
+                    files_count: result.files.length,
+                    files: result.files,
+                    docset_id: docsetId,
+                    content_hash: result.contentHash,
+                };
+                await fs.writeFile(path.join(localPath, `.agentic-source-${index}.json`), JSON.stringify(metadata, null, 2));
+            }
             else {
                 console.log(chalk.red(`    ❌ Source type '${source.type}' not yet supported`));
             }

package/packages/cli/dist/commands/refresh.js CHANGED Viewed

@@ -8,6 +8,7 @@ import { promises as fs } from "node:fs";
 import * as path from "node:path";
 import { execSync } from "node:child_process";
 import { findConfigPathSync, loadConfigSync, calculateLocalPath, ensureKnowledgeGitignoreSync, } from "@codemcp/knowledge-core";
+import { ArchiveLoader, WebSourceType, } from "@codemcp/knowledge-content-loader";
 export const refreshCommand = new Command("refresh")
     .description("Refresh sources for docsets")
     .argument("[docset-id]", "ID of specific docset to refresh (refresh all if not specified)")
@@ -95,6 +96,11 @@ async function refreshDocset(docset, configPath, force) {
                 totalFiles += sourceFiles.files_count;
                 refreshedSources.push(sourceFiles);
             }
+            else if (source.type === "archive") {
+                const sourceFiles = await refreshArchiveSource(source, localPath, index, docset.id, force);
+                totalFiles += sourceFiles.files_count;
+                refreshedSources.push(sourceFiles);
+            }
             else {
                 console.log(chalk.yellow(`    ⚠️  Source type '${source.type}' not yet supported, skipping`));
             }
@@ -238,6 +244,76 @@ async function refreshGitSource(webSource, localPath, index, docsetId, force) {
         await fs.rm(tempDir, { recursive: true, force: true });
     }
 }
+async function refreshArchiveSource(source, localPath, index, docsetId, force) {
+    const sourceMetadataPath = path.join(localPath, `.agentic-source-${index}.json`);
+    let existingSourceMetadata = null;
+    try {
+        const content = await fs.readFile(sourceMetadataPath, "utf8");
+        existingSourceMetadata = JSON.parse(content);
+    }
+    catch {
+        // No existing metadata, will do full refresh
+    }
+    const sourceUrl = source.url || source.path || "";
+    const loader = new ArchiveLoader();
+    const webSourceConfig = {
+        url: sourceUrl,
+        type: WebSourceType.ARCHIVE,
+        options: {
+            paths: source.paths || [],
+        },
+    };
+    // Check if content has changed
+    if (!force && existingSourceMetadata) {
+        try {
+            const currentId = await loader.getContentId(webSourceConfig);
+            const lastHash = existingSourceMetadata.content_hash;
+            if (lastHash === currentId) {
+                const updatedMetadata = {
+                    ...existingSourceMetadata,
+                    downloaded_at: new Date().toISOString(),
+                };
+                await fs.writeFile(sourceMetadataPath, JSON.stringify(updatedMetadata, null, 2));
+                return updatedMetadata;
+            }
+        }
+        catch {
+            // Could not check, proceed with full refresh
+        }
+    }
+    // Remove old files from this source (if we have metadata)
+    if (existingSourceMetadata) {
+        for (const file of existingSourceMetadata.files) {
+            const filePath = path.join(localPath, file);
+            try {
+                await fs.unlink(filePath);
+            }
+            catch {
+                // File might already be deleted, ignore
+            }
+        }
+    }
+    // Load content
+    const result = await loader.load(webSourceConfig, localPath);
+    if (!result.success) {
+        throw new Error(`Archive refresh failed: ${result.error}`);
+    }
+    const metadata = {
+        source_url: sourceUrl,
+        source_type: "archive",
+        downloaded_at: new Date().toISOString(),
+        files_count: result.files.length,
+        files: result.files,
+        docset_id: docsetId,
+    };
+    // Store content hash for future change detection
+    const metadataWithHash = {
+        ...metadata,
+        content_hash: result.contentHash,
+    };
+    await fs.writeFile(sourceMetadataPath, JSON.stringify(metadataWithHash, null, 2));
+    return metadata;
+}
 // Reuse utility functions from init.ts
 async function findMarkdownFiles(dir) {
     const files = [];

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@codemcp/knowledge-cli",
-  "version": "1.0.17",
+  "version": "1.1.0",
   "description": "Command-line interface for agentic knowledge web content management",
   "type": "module",
   "main": "dist/exports.js",

package/packages/content-loader/dist/content/archive-loader.d.ts ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * Archive file content loader (supports zip, tar.gz, etc.)
+ */
+import { ContentLoader, type LoadResult } from "./loader.js";
+import { WebSourceConfig } from "../types.js";
+/**
+ * Content loader for archive files - zip, tar.gz, etc. (local or remote)
+ */
+export declare class ArchiveLoader extends ContentLoader {
+    /**
+     * Check if this loader can handle the given web source type
+     */
+    canHandle(webSource: WebSourceConfig): boolean;
+    /**
+     * Validate the web source configuration
+     */
+    validateConfig(webSource: WebSourceConfig): true | string;
+    /**
+     * Load content from an archive file
+     */
+    load(webSource: WebSourceConfig, targetPath: string): Promise<LoadResult>;
+    /**
+     * Get content identifier for change detection
+     */
+    getContentId(webSource: WebSourceConfig): Promise<string>;
+    /**
+     * Get headers from remote URL using HEAD request
+     */
+    private getRemoteHeaders;
+    /**
+     * Determine if the source is a remote URL or local path
+     */
+    private isRemoteUrl;
+    /**
+     * Detect archive type based on file extension
+     */
+    private detectArchiveType;
+    /**
+     * Resolve the archive file path - download if remote, return as-is if local
+     */
+    private resolveArchiveFile;
+    /**
+     * Download an archive file from a remote URL
+     */
+    private downloadArchive;
+    /**
+     * Extract a zip file to a directory using adm-zip
+     */
+    private extractZip;
+    /**
+     * Extract a tar.gz file to a directory
+     */
+    private extractTarGz;
+    /**
+     * If the extracted contents have a single root directory and no files at root,
+     * move that directory's contents one level up.
+     */
+    private flattenSingleRoot;
+    /**
+     * Extract content from extracted archive to target directory
+     */
+    private extractContent;
+    /**
+     * Extract only documentation files from source directory
+     */
+    private extractDocumentationFiles;
+    /**
+     * Copy directory recursively
+     */
+    private copyDirectory;
+    /**
+     * Recursively scan all files in a directory
+     */
+    private scanAllFiles;
+    /**
+     * Generate content hash for change detection
+     */
+    private generateContentHash;
+    /**
+     * Create a temporary directory
+     */
+    private createTempDirectory;
+    /**
+     * Clean up temporary directory
+     */
+    private cleanupTempDirectory;
+}

package/packages/content-loader/dist/content/archive-loader.js ADDED Viewed

@@ -0,0 +1,391 @@
+/**
+ * Archive file content loader (supports zip, tar.gz, etc.)
+ */
+import { promises as fs } from "node:fs";
+import * as path from "node:path";
+import * as crypto from "node:crypto";
+import https from "node:https";
+import http from "node:http";
+import { URL } from "node:url";
+import AdmZip from "adm-zip";
+import * as tar from "tar";
+import { ContentLoader } from "./loader.js";
+import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
+import { filterDocumentationFiles } from "./file-filter.js";
+/**
+ * Content loader for archive files - zip, tar.gz, etc. (local or remote)
+ */
+export class ArchiveLoader extends ContentLoader {
+    /**
+     * Check if this loader can handle the given web source type
+     */
+    canHandle(webSource) {
+        return webSource.type === WebSourceType.ARCHIVE;
+    }
+    /**
+     * Validate the web source configuration
+     */
+    validateConfig(webSource) {
+        if (!webSource.url) {
+            return "Archive source must have a URL (remote) or local path";
+        }
+        return true;
+    }
+    /**
+     * Load content from an archive file
+     */
+    async load(webSource, targetPath) {
+        try {
+            const options = webSource.options;
+            const tempDir = await this.createTempDirectory();
+            try {
+                // Get the archive file (download if remote, or use local path)
+                const archiveFilePath = await this.resolveArchiveFile(webSource.url, tempDir);
+                // Detect archive type
+                const archiveType = this.detectArchiveType(archiveFilePath);
+                // Extract to temp directory
+                const extractDir = path.join(tempDir, "extracted");
+                await fs.mkdir(extractDir, { recursive: true });
+                if (archiveType === "zip") {
+                    this.extractZip(archiveFilePath, extractDir);
+                }
+                else if (archiveType === "tar.gz") {
+                    await this.extractTarGz(archiveFilePath, extractDir);
+                }
+                else {
+                    throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Unsupported archive format. Supported formats: .zip, .tar.gz`, { archiveType });
+                }
+                // Flatten single root directory
+                await this.flattenSingleRoot(extractDir);
+                // Extract specified paths or all documentation content
+                const extractedFiles = await this.extractContent(extractDir, targetPath, options?.paths);
+                // Generate content hash
+                const contentHash = await this.generateContentHash(targetPath, extractedFiles);
+                return {
+                    success: true,
+                    files: extractedFiles,
+                    contentHash,
+                };
+            }
+            finally {
+                await this.cleanupTempDirectory(tempDir);
+            }
+        }
+        catch (error) {
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            return {
+                success: false,
+                files: [],
+                contentHash: "",
+                error: `Archive loading failed: ${errorMessage}`,
+            };
+        }
+    }
+    /**
+     * Get content identifier for change detection
+     */
+    async getContentId(webSource) {
+        try {
+            if (this.isRemoteUrl(webSource.url)) {
+                // For remote URLs, try HEAD request for ETag/Last-Modified
+                const headers = await this.getRemoteHeaders(webSource.url);
+                const etag = headers["etag"] || "";
+                const lastModified = headers["last-modified"] || "";
+                const identifier = etag || lastModified || webSource.url;
+                return crypto
+                    .createHash("sha256")
+                    .update(`${webSource.url}:${identifier}`)
+                    .digest("hex");
+            }
+            else {
+                // For local files, hash the file content
+                const content = await fs.readFile(webSource.url);
+                return crypto.createHash("sha256").update(content).digest("hex");
+            }
+        }
+        catch {
+            // Fallback to URL-based hash
+            return crypto.createHash("sha256").update(webSource.url).digest("hex");
+        }
+    }
+    /**
+     * Get headers from remote URL using HEAD request
+     */
+    getRemoteHeaders(url) {
+        return new Promise((resolve, reject) => {
+            const protocol = url.startsWith("https") ? https : http;
+            const request = protocol.request(url, { method: "HEAD" }, (response) => {
+                const headers = {};
+                if (response.headers) {
+                    for (const [key, value] of Object.entries(response.headers)) {
+                        if (typeof value === "string") {
+                            headers[key] = value;
+                        }
+                        else if (Array.isArray(value) && value.length > 0 && value[0]) {
+                            headers[key] = value[0];
+                        }
+                    }
+                }
+                resolve(headers);
+            });
+            request.on("error", reject);
+            request.end();
+        });
+    }
+    /**
+     * Determine if the source is a remote URL or local path
+     */
+    isRemoteUrl(url) {
+        return url.startsWith("http://") || url.startsWith("https://");
+    }
+    /**
+     * Detect archive type based on file extension
+     */
+    detectArchiveType(filePath) {
+        const lowerPath = filePath.toLowerCase();
+        if (lowerPath.endsWith(".tar.gz") || lowerPath.endsWith(".tgz")) {
+            return "tar.gz";
+        }
+        if (lowerPath.endsWith(".zip")) {
+            return "zip";
+        }
+        return "unknown";
+    }
+    /**
+     * Resolve the archive file path - download if remote, return as-is if local
+     */
+    async resolveArchiveFile(url, tempDir) {
+        if (this.isRemoteUrl(url)) {
+            return this.downloadArchive(url, tempDir);
+        }
+        // Local file - verify it exists
+        try {
+            await fs.access(url);
+            return url;
+        }
+        catch {
+            throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Local archive file not found: ${url}`, { url });
+        }
+    }
+    /**
+     * Download an archive file from a remote URL
+     */
+    async downloadArchive(url, tempDir) {
+        // Determine filename from URL
+        const urlPath = new URL(url).pathname;
+        const filename = path.basename(urlPath) || "download.archive";
+        const archivePath = path.join(tempDir, filename);
+        return new Promise((resolve, reject) => {
+            const protocol = url.startsWith("https") ? https : http;
+            const request = protocol.get(url, async (response) => {
+                if (response.statusCode === undefined || response.statusCode >= 400) {
+                    reject(new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`));
+                    return;
+                }
+                try {
+                    const chunks = [];
+                    response.on("data", (chunk) => {
+                        chunks.push(chunk);
+                    });
+                    response.on("end", async () => {
+                        try {
+                            const buffer = Buffer.concat(chunks);
+                            await fs.writeFile(archivePath, buffer);
+                            resolve(archivePath);
+                        }
+                        catch (error) {
+                            reject(error);
+                        }
+                    });
+                }
+                catch (error) {
+                    reject(error);
+                }
+            });
+            request.on("error", (error) => {
+                reject(new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Failed to download archive from ${url}: ${error instanceof Error ? error.message : String(error)}`, { url }));
+            });
+        });
+    }
+    /**
+     * Extract a zip file to a directory using adm-zip
+     */
+    extractZip(zipPath, targetDir) {
+        try {
+            const zip = new AdmZip(zipPath);
+            zip.extractAllTo(targetDir, true);
+        }
+        catch (error) {
+            throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Failed to extract zip: ${error instanceof Error ? error.message : String(error)}`, { zipPath });
+        }
+    }
+    /**
+     * Extract a tar.gz file to a directory
+     */
+    async extractTarGz(tarGzPath, targetDir) {
+        try {
+            await tar.extract({
+                file: tarGzPath,
+                cwd: targetDir,
+                strip: 0,
+            });
+        }
+        catch (error) {
+            throw new WebSourceError(WebSourceErrorType.ARCHIVE_ERROR, `Failed to extract tar.gz: ${error instanceof Error ? error.message : String(error)}`, { tarGzPath });
+        }
+    }
+    /**
+     * If the extracted contents have a single root directory and no files at root,
+     * move that directory's contents one level up.
+     */
+    async flattenSingleRoot(extractDir) {
+        const entries = await fs.readdir(extractDir, { withFileTypes: true });
+        const directories = entries.filter((e) => e.isDirectory());
+        const files = entries.filter((e) => e.isFile());
+        if (directories.length === 1 && files.length === 0) {
+            const singleDir = path.join(extractDir, directories[0].name);
+            const innerEntries = await fs.readdir(singleDir);
+            // Move all contents up one level
+            for (const entry of innerEntries) {
+                const src = path.join(singleDir, entry);
+                const dest = path.join(extractDir, entry);
+                await fs.rename(src, dest);
+            }
+            // Remove the now-empty directory
+            await fs.rmdir(singleDir);
+        }
+    }
+    /**
+     * Extract content from extracted archive to target directory
+     */
+    async extractContent(sourceDir, targetDir, paths) {
+        await fs.mkdir(targetDir, { recursive: true });
+        const extractedFiles = [];
+        if (paths && paths.length > 0) {
+            // Extract only specified paths
+            for (const relPath of paths) {
+                const sourcePath = path.join(sourceDir, relPath);
+                const targetPath = path.join(targetDir, relPath);
+                try {
+                    const stats = await fs.stat(sourcePath);
+                    if (stats.isDirectory()) {
+                        await this.copyDirectory(sourcePath, targetPath, extractedFiles);
+                    }
+                    else if (stats.isFile()) {
+                        await fs.mkdir(path.dirname(targetPath), { recursive: true });
+                        await fs.copyFile(sourcePath, targetPath);
+                        extractedFiles.push(relPath);
+                    }
+                }
+                catch (error) {
+                    console.warn(`Warning: Could not extract ${relPath}: ${error instanceof Error ? error.message : String(error)}`);
+                }
+            }
+        }
+        else {
+            // Use smart filtering to extract only documentation files
+            await this.extractDocumentationFiles(sourceDir, targetDir, extractedFiles);
+        }
+        return extractedFiles;
+    }
+    /**
+     * Extract only documentation files from source directory
+     */
+    async extractDocumentationFiles(sourceDir, targetDir, extractedFiles) {
+        const allFiles = await this.scanAllFiles(sourceDir);
+        const docFiles = filterDocumentationFiles(allFiles);
+        for (const filePath of docFiles) {
+            const relativePath = path.relative(sourceDir, filePath);
+            const targetPath = path.join(targetDir, relativePath);
+            try {
+                await fs.mkdir(path.dirname(targetPath), { recursive: true });
+                await fs.copyFile(filePath, targetPath);
+                extractedFiles.push(relativePath);
+            }
+            catch (error) {
+                console.warn(`Warning: Could not copy ${relativePath}: ${error instanceof Error ? error.message : String(error)}`);
+            }
+        }
+    }
+    /**
+     * Copy directory recursively
+     */
+    async copyDirectory(source, target, fileList) {
+        await fs.mkdir(target, { recursive: true });
+        const items = await fs.readdir(source);
+        for (const item of items) {
+            const sourcePath = path.join(source, item);
+            const targetPath = path.join(target, item);
+            const stats = await fs.stat(sourcePath);
+            if (stats.isDirectory()) {
+                await this.copyDirectory(sourcePath, targetPath, fileList);
+            }
+            else {
+                await fs.copyFile(sourcePath, targetPath);
+                const relativePath = path.relative(target, targetPath);
+                fileList.push(relativePath);
+            }
+        }
+    }
+    /**
+     * Recursively scan all files in a directory
+     */
+    async scanAllFiles(dir) {
+        const files = [];
+        async function scan(currentDir) {
+            const items = await fs.readdir(currentDir);
+            for (const item of items) {
+                if (item === ".git")
+                    continue;
+                const fullPath = path.join(currentDir, item);
+                const stat = await fs.stat(fullPath);
+                if (stat.isDirectory()) {
+                    await scan(fullPath);
+                }
+                else if (stat.isFile()) {
+                    files.push(fullPath);
+                }
+            }
+        }
+        await scan(dir);
+        return files;
+    }
+    /**
+     * Generate content hash for change detection
+     */
+    async generateContentHash(targetDir, files) {
+        const hash = crypto.createHash("sha256");
+        const sortedFiles = files.slice().sort();
+        for (const file of sortedFiles) {
+            const filePath = path.join(targetDir, file);
+            try {
+                const content = await fs.readFile(filePath);
+                hash.update(file);
+                hash.update(content);
+            }
+            catch (error) {
+                console.warn(`Warning: Could not hash ${file}: ${error instanceof Error ? error.message : String(error)}`);
+            }
+        }
+        return hash.digest("hex");
+    }
+    /**
+     * Create a temporary directory
+     */
+    async createTempDirectory() {
+        const tempDir = path.join(process.cwd(), ".tmp", `archive-extract-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+        await fs.mkdir(tempDir, { recursive: true });
+        return tempDir;
+    }
+    /**
+     * Clean up temporary directory
+     */
+    async cleanupTempDirectory(tempDir) {
+        try {
+            await fs.rm(tempDir, { recursive: true, force: true });
+        }
+        catch (error) {
+            console.warn(`Warning: Could not clean up temp directory ${tempDir}: ${error instanceof Error ? error.message : String(error)}`);
+        }
+    }
+}

package/packages/content-loader/dist/content/file-filter.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * Shared file filtering utilities for documentation content extraction (REQ-18)
+ */
+/**
+ * Determine if a file is considered documentation content (REQ-18)
+ * @param filePath - Path to the file to check
+ * @returns True if file should be included as documentation
+ */
+export declare function isDocumentationFile(filePath: string): boolean;
+/**
+ * Filter list of files to only include documentation-relevant files (REQ-18)
+ * @param files - Array of file paths to filter
+ * @returns Array of file paths that are considered documentation
+ */
+export declare function filterDocumentationFiles(files: string[]): string[];

package/packages/content-loader/dist/content/file-filter.js ADDED Viewed

@@ -0,0 +1,80 @@
+/**
+ * Shared file filtering utilities for documentation content extraction (REQ-18)
+ */
+import * as path from "node:path";
+/**
+ * Determine if a file is considered documentation content (REQ-18)
+ * @param filePath - Path to the file to check
+ * @returns True if file should be included as documentation
+ */
+export function isDocumentationFile(filePath) {
+    const filename = path.basename(filePath);
+    const extension = path.extname(filePath).toLowerCase();
+    const directory = path.dirname(filePath);
+    // Exclude project metadata files (REQ-18)
+    const metadataFiles = /^(CHANGELOG|LICENSE|CONTRIBUTING|AUTHORS|CODE_OF_CONDUCT)/i;
+    if (metadataFiles.test(filename)) {
+        return false;
+    }
+    // Normalize directory path for consistent matching (use forward slashes)
+    const normalizedDir = directory.split(path.sep).join("/");
+    const pathParts = normalizedDir.split("/");
+    // Exclude build, dependency, and development directories (REQ-18)
+    // Use exact directory name matching, not substring matching
+    const excludedDirs = [
+        "node_modules",
+        "vendor",
+        ".git",
+        "build",
+        "dist",
+        "target",
+        ".cache",
+        "__tests__",
+        "test",
+        "tests",
+        ".github",
+        ".vscode",
+        ".idea",
+    ];
+    // Check if any path segment matches excluded directories
+    for (const excludedDir of excludedDirs) {
+        if (pathParts.includes(excludedDir)) {
+            return false;
+        }
+    }
+    // Include README files anywhere (REQ-18)
+    if (/^README/i.test(filename)) {
+        return true;
+    }
+    // Include documentation file extensions anywhere, regardless of directory (REQ-18)
+    const docExtensions = [".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc"];
+    if (docExtensions.includes(extension)) {
+        return true;
+    }
+    // Special case: examples/samples directory - include ALL file types (Issue #12)
+    // These directories contain code that demonstrates usage patterns
+    const isInExamples = /\b(examples?|samples?)\b/i.test(directory);
+    if (isInExamples) {
+        // In examples/samples, exclude only binary files
+        const excludedInExamples = [
+            ".exe",
+            ".bin",
+            ".so",
+            ".dll",
+            ".dylib",
+            ".a",
+            ".o",
+            ".obj",
+        ];
+        return !excludedInExamples.includes(extension);
+    }
+    return false;
+}
+/**
+ * Filter list of files to only include documentation-relevant files (REQ-18)
+ * @param files - Array of file paths to filter
+ * @returns Array of file paths that are considered documentation
+ */
+export function filterDocumentationFiles(files) {
+    return files.filter((file) => isDocumentationFile(file));
+}

package/packages/content-loader/dist/content/git-repo-loader.d.ts CHANGED Viewed

@@ -51,18 +51,6 @@ export declare class GitRepoLoader extends ContentLoader {
      * Clean up temporary directory
      */
     private cleanupTempDirectory;
-    /**
-     * Filter list of files to only include documentation-relevant files (REQ-18)
-     * @param files - Array of file paths to filter
-     * @returns Array of file paths that are considered documentation
-     */
-    private filterDocumentationFiles;
-    /**
-     * Determine if a file is considered documentation content (REQ-18)
-     * @param filePath - Path to the file to check
-     * @returns True if file should be included as documentation
-     */
-    private isDocumentationFile;
     /**
      * Extract only documentation files from source directory (REQ-18)
      * @param sourceDir - Source directory to scan

package/packages/content-loader/dist/content/git-repo-loader.js CHANGED Viewed

@@ -7,6 +7,7 @@ import { execSync } from "node:child_process";
 import { ContentLoader } from "./loader.js";
 import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
 import * as crypto from "node:crypto";
+import { filterDocumentationFiles } from "./file-filter.js";
 /**
  * Content loader for Git repositories (GitHub, GitLab, any Git repo)
  */
@@ -244,82 +245,6 @@ export class GitRepoLoader extends ContentLoader {
             console.warn(`Warning: Could not clean up temp directory ${tempDir}: ${error instanceof Error ? error.message : String(error)}`);
         }
     }
-    /**
-     * Filter list of files to only include documentation-relevant files (REQ-18)
-     * @param files - Array of file paths to filter
-     * @returns Array of file paths that are considered documentation
-     */
-    filterDocumentationFiles(files) {
-        return files.filter((file) => this.isDocumentationFile(file));
-    }
-    /**
-     * Determine if a file is considered documentation content (REQ-18)
-     * @param filePath - Path to the file to check
-     * @returns True if file should be included as documentation
-     */
-    isDocumentationFile(filePath) {
-        const filename = path.basename(filePath);
-        const extension = path.extname(filePath).toLowerCase();
-        const directory = path.dirname(filePath);
-        // Exclude project metadata files (REQ-18)
-        const metadataFiles = /^(CHANGELOG|LICENSE|CONTRIBUTING|AUTHORS|CODE_OF_CONDUCT)/i;
-        if (metadataFiles.test(filename)) {
-            return false;
-        }
-        // Normalize directory path for consistent matching (use forward slashes)
-        const normalizedDir = directory.split(path.sep).join("/");
-        const pathParts = normalizedDir.split("/");
-        // Exclude build, dependency, and development directories (REQ-18)
-        // Use exact directory name matching, not substring matching
-        const excludedDirs = [
-            "node_modules",
-            "vendor",
-            ".git",
-            "build",
-            "dist",
-            "target",
-            ".cache",
-            "__tests__",
-            "test",
-            "tests",
-            ".github",
-            ".vscode",
-            ".idea",
-        ];
-        // Check if any path segment matches excluded directories
-        for (const excludedDir of excludedDirs) {
-            if (pathParts.includes(excludedDir)) {
-                return false;
-            }
-        }
-        // Include README files anywhere (REQ-18)
-        if (/^README/i.test(filename)) {
-            return true;
-        }
-        // Include documentation file extensions anywhere, regardless of directory (REQ-18)
-        const docExtensions = [".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc"];
-        if (docExtensions.includes(extension)) {
-            return true;
-        }
-        // Special case: examples/samples directory - include ALL file types (Issue #12)
-        // These directories contain code that demonstrates usage patterns
-        const isInExamples = /\b(examples?|samples?)\b/i.test(directory);
-        if (isInExamples) {
-            // In examples/samples, exclude only binary files
-            const excludedInExamples = [
-                ".exe",
-                ".bin",
-                ".so",
-                ".dll",
-                ".dylib",
-                ".a",
-                ".o",
-                ".obj",
-            ];
-            return !excludedInExamples.includes(extension);
-        }
-        return false;
-    }
     /**
      * Extract only documentation files from source directory (REQ-18)
      * @param sourceDir - Source directory to scan
@@ -330,7 +255,7 @@ export class GitRepoLoader extends ContentLoader {
         // First, scan all files in the repository
         const allFiles = await this.scanAllFiles(sourceDir);
         // Filter to only documentation files
-        const docFiles = this.filterDocumentationFiles(allFiles);
+        const docFiles = filterDocumentationFiles(allFiles);
         // Copy the filtered files
         for (const filePath of docFiles) {
             const relativePath = path.relative(sourceDir, filePath);

package/packages/content-loader/dist/content/index.d.ts CHANGED Viewed

@@ -3,7 +3,9 @@
  */
 export { ContentLoader } from "./loader.js";
 export { GitRepoLoader } from "./git-repo-loader.js";
+export { ArchiveLoader } from "./archive-loader.js";
 export { DocumentationSiteLoader } from "./documentation-site-loader.js";
 export { ApiDocumentationLoader } from "./api-documentation-loader.js";
 export { ContentProcessor } from "./content-processor.js";
 export { MetadataManager } from "./metadata-manager.js";
+export { isDocumentationFile, filterDocumentationFiles, } from "./file-filter.js";

package/packages/content-loader/dist/content/index.js CHANGED Viewed

@@ -3,7 +3,9 @@
  */
 export { ContentLoader } from "./loader.js";
 export { GitRepoLoader } from "./git-repo-loader.js";
+export { ArchiveLoader } from "./archive-loader.js";
 export { DocumentationSiteLoader } from "./documentation-site-loader.js";
 export { ApiDocumentationLoader } from "./api-documentation-loader.js";
 export { ContentProcessor } from "./content-processor.js";
 export { MetadataManager } from "./metadata-manager.js";
+export { isDocumentationFile, filterDocumentationFiles, } from "./file-filter.js";

package/packages/content-loader/dist/types.d.ts CHANGED Viewed

@@ -22,7 +22,8 @@ export interface DocsetConfig {
 export declare enum WebSourceType {
     GIT_REPO = "git_repo",
     DOCUMENTATION_SITE = "documentation_site",
-    API_DOCUMENTATION = "api_documentation"
+    API_DOCUMENTATION = "api_documentation",
+    ARCHIVE = "archive"
 }
 /**
  * Configuration for Git repository web sources
@@ -55,16 +56,23 @@ export interface ApiDocumentationOptions {
     /** Packages or modules to include */
     include_packages?: string[];
 }
+/**
+ * Configuration for archive file web sources (zip, tar.gz, etc.)
+ */
+export interface ArchiveOptions {
+    /** Specific paths to extract from the archive */
+    paths?: string[];
+}
 /**
  * Configuration for a single web source
  */
 export interface WebSourceConfig {
-    /** URL of the web source */
+    /** URL of the web source (or local path for archive sources) */
     url: string;
     /** Type of web source */
     type: WebSourceType;
     /** Type-specific options */
-    options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions;
+    options?: GitRepoOptions | DocumentationSiteOptions | ApiDocumentationOptions | ArchiveOptions;
 }
 /**
  * Metadata for a single web source download
@@ -108,6 +116,7 @@ export declare const METADATA_FILENAME = ".agentic-metadata.json";
 export declare enum WebSourceErrorType {
     WEB_SOURCE_ERROR = "WEB_SOURCE_ERROR",
     GIT_REPO_ERROR = "GIT_REPO_ERROR",
+    ARCHIVE_ERROR = "ARCHIVE_ERROR",
     NOT_IMPLEMENTED = "NOT_IMPLEMENTED"
 }
 /**

package/packages/content-loader/dist/types.js CHANGED Viewed

@@ -9,6 +9,7 @@ export var WebSourceType;
     WebSourceType["GIT_REPO"] = "git_repo";
     WebSourceType["DOCUMENTATION_SITE"] = "documentation_site";
     WebSourceType["API_DOCUMENTATION"] = "api_documentation";
+    WebSourceType["ARCHIVE"] = "archive";
 })(WebSourceType || (WebSourceType = {}));
 /**
  * Metadata file name pattern
@@ -21,6 +22,7 @@ export var WebSourceErrorType;
 (function (WebSourceErrorType) {
     WebSourceErrorType["WEB_SOURCE_ERROR"] = "WEB_SOURCE_ERROR";
     WebSourceErrorType["GIT_REPO_ERROR"] = "GIT_REPO_ERROR";
+    WebSourceErrorType["ARCHIVE_ERROR"] = "ARCHIVE_ERROR";
     WebSourceErrorType["NOT_IMPLEMENTED"] = "NOT_IMPLEMENTED";
 })(WebSourceErrorType || (WebSourceErrorType = {}));
 /**

package/packages/content-loader/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@codemcp/knowledge-content-loader",
-  "version": "1.0.17",
+  "version": "1.1.0",
   "description": "Web content loading and metadata management for agentic knowledge system",
   "type": "module",
   "main": "dist/index.js",
@@ -29,11 +29,15 @@
     "typecheck": "tsc --noEmit"
   },
   "dependencies": {
-    "simple-git": "^3.22.0"
+    "adm-zip": "0.5.16",
+    "simple-git": "^3.22.0",
+    "tar": "7.5.9"
   },
   "devDependencies": {
     "@eslint/js": "^9.34.0",
+    "@types/adm-zip": "0.5.7",
     "@types/node": "^24.3.0",
+    "@types/tar": "7.0.87",
     "eslint": "^9.34.0",
     "rimraf": "^6.0.1",
     "typescript": "^5.9.2",

package/packages/core/dist/config/loader.js CHANGED Viewed

@@ -207,6 +207,30 @@ function validateSource(source) {
         }
         return true;
     }
+    if (type === "archive") {
+        const hasPath = obj["path"] !== undefined &&
+            typeof obj["path"] === "string" &&
+            obj["path"].trim() !== "";
+        const hasUrl = obj["url"] !== undefined &&
+            typeof obj["url"] === "string" &&
+            obj["url"].trim() !== "";
+        // Must have exactly one of path or url
+        if (hasPath === hasUrl) {
+            return false;
+        }
+        // Optional paths field
+        if (obj["paths"] !== undefined) {
+            if (!Array.isArray(obj["paths"])) {
+                return false;
+            }
+            for (const path of obj["paths"]) {
+                if (typeof path !== "string" || path.trim() === "") {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
     // Unknown source type
     return false;
 }

package/packages/core/dist/paths/calculator.js CHANGED Viewed

@@ -46,6 +46,10 @@ export function calculateLocalPath(docset, configPath) {
             // For git repos, use standardized path: .knowledge/docsets/{id}
             return join(configDir, "docsets", docset.id);
         }
+        if (primarySource.type === "archive") {
+            // For archive sources, use standardized path: .knowledge/docsets/{id}
+            return join(configDir, "docsets", docset.id);
+        }
         throw new Error(`Unsupported source type: ${primarySource.type}`);
     }
     catch (error) {
@@ -87,6 +91,10 @@ export async function calculateLocalPathWithSymlinks(docset, configPath) {
         // For git repos, use standardized path: .knowledge/docsets/{id}
         return join(configDir, "docsets", docset.id);
     }
+    if (primarySource.type === "archive") {
+        // For archive sources, use standardized path: .knowledge/docsets/{id}
+        return join(configDir, "docsets", docset.id);
+    }
     throw new Error(`Unsupported source type: ${primarySource.type}`);
 }
 /**

package/packages/core/dist/types.d.ts CHANGED Viewed

@@ -30,10 +30,22 @@ export interface GitRepoSourceConfig extends BaseSourceConfig {
     /** Specific paths to extract (optional) */
     paths?: string[];
 }
+/**
+ * Archive file source configuration (supports zip, tar.gz, etc.)
+ */
+export interface ArchiveSourceConfig extends BaseSourceConfig {
+    type: "archive";
+    /** Local path to archive file (mutually exclusive with url) */
+    path?: string;
+    /** Remote URL to download archive from (mutually exclusive with path) */
+    url?: string;
+    /** Specific paths to extract (optional) */
+    paths?: string[];
+}
 /**
  * Union type for all source configurations
  */
-export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig;
+export type SourceConfig = LocalFolderSourceConfig | GitRepoSourceConfig | ArchiveSourceConfig;
 /**
  * Configuration for a single docset
  */

package/packages/core/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@codemcp/knowledge-core",
-  "version": "1.0.17",
+  "version": "1.1.0",
   "description": "Core functionality for agentic knowledge guidance system",
   "type": "module",
   "main": "dist/index.js",

package/packages/mcp-server/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@codemcp/knowledge",
-  "version": "1.0.17",
+  "version": "1.1.0",
   "description": "MCP server implementation for agentic knowledge guidance system",
   "type": "module",
   "main": "dist/index.js",