agentic-knowledge-mcp 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentic-knowledge-mcp",
3
- "version": "1.5.0",
3
+ "version": "1.6.1",
4
4
  "description": "A Model Context Protocol server for agentic knowledge guidance with web-based documentation loading and intelligent search instructions",
5
5
  "type": "module",
6
6
  "main": "packages/cli/dist/index.js",
@@ -8,7 +8,7 @@
8
8
  "agentic-knowledge": "packages/cli/dist/index.js"
9
9
  },
10
10
  "engines": {
11
- "node": ">=18.0.0",
11
+ "node": ">=20.0.0",
12
12
  "pnpm": ">=9.0.0"
13
13
  },
14
14
  "files": [
@@ -29,9 +29,9 @@
29
29
  "commander": "^12.0.0",
30
30
  "js-yaml": "4.1.0",
31
31
  "ora": "^8.0.1",
32
- "@codemcp/knowledge": "1.5.0",
33
- "@codemcp/knowledge-content-loader": "1.5.0",
34
- "@codemcp/knowledge-core": "1.5.0"
32
+ "@codemcp/knowledge-content-loader": "1.6.1",
33
+ "@codemcp/knowledge": "1.6.1",
34
+ "@codemcp/knowledge-core": "1.6.1"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@eslint/js": "^9.34.0",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge-cli",
3
- "version": "1.5.0",
3
+ "version": "1.6.1",
4
4
  "description": "Command-line interface for agentic knowledge web content management",
5
5
  "type": "module",
6
6
  "main": "dist/exports.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge-content-loader",
3
- "version": "1.5.0",
3
+ "version": "1.6.1",
4
4
  "description": "Web content loading and metadata management for agentic knowledge system",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -13,3 +13,4 @@ export { createSymlinks, removeSymlinks } from "./paths/symlinks.js";
13
13
  export { discoverDirectoryPatterns, discoverMinimalPatterns, } from "./paths/discovery.js";
14
14
  export { safelyClearDirectory, containsSymlinks, getDirectoryInfo, } from "./paths/cleanup.js";
15
15
  export { processTemplate, getEffectiveTemplate, validateTemplate, extractVariables, createTemplateContext, createStructuredResponse, } from "./templates/processor.js";
16
+ export { buildFileIndex, searchDocset, formatSearchResult, type DocsetIndex, } from "./search/searcher.js";
@@ -20,3 +20,5 @@ export { discoverDirectoryPatterns, discoverMinimalPatterns, } from "./paths/dis
20
20
  export { safelyClearDirectory, containsSymlinks, getDirectoryInfo, } from "./paths/cleanup.js";
21
21
  // Export template processing
22
22
  export { processTemplate, getEffectiveTemplate, validateTemplate, extractVariables, createTemplateContext, createStructuredResponse, } from "./templates/processor.js";
23
+ // Export search functionality
24
+ export { buildFileIndex, searchDocset, formatSearchResult, } from "./search/searcher.js";
@@ -0,0 +1,53 @@
1
+ /**
2
+ * File-content search for docsets.
3
+ *
4
+ * Strategy (ADR-001 Option C + optional MiniSearch pre-filter):
5
+ * 1. Walk the docset directory, skip binary files and ignored paths.
6
+ * 2. If MiniSearch is available AND the pattern looks like a plain term (no regex
7
+ * metacharacters), build/retrieve a lightweight in-memory index and use it to
8
+ * rank the most relevant files first — this keeps the hot path fast on large
9
+ * docsets without requiring any extra dependency.
10
+ * 3. Stream each candidate file line by line; test against the compiled RegExp.
11
+ * 4. Collect up to `maxMatches` results with surrounding context lines.
12
+ * 5. If 0 matches and a fallbackPattern is provided, repeat with that pattern.
13
+ * 6. If still 0, re-run without pre-filtering (safety net for exotic regex).
14
+ */
15
+ import type { SearchDocsResult, SearchOptions } from "../types.js";
16
+ /** Opaque handle returned by {@link buildFileIndex}. */
17
+ export interface DocsetIndex {
18
+ /** MiniSearch instance (null when MiniSearch could not be loaded) */
19
+ _ms: {
20
+ search(_query: string, _opts?: Record<string, unknown>): Array<{
21
+ id: unknown;
22
+ score: number;
23
+ }>;
24
+ } | null;
25
+ /** Absolute path to the docset root used to build this index */
26
+ rootPath: string;
27
+ /** Map from numeric doc id → absolute file path */
28
+ _idToPath: Map<number, string>;
29
+ }
30
+ /**
31
+ * Build an in-memory full-text index over all text files in `rootPath`.
32
+ * Returns a {@link DocsetIndex} regardless of whether MiniSearch is available;
33
+ * when it is not, the index is a no-op stub that causes the caller to fall
34
+ * back to a full streaming search.
35
+ */
36
+ export declare function buildFileIndex(rootPath: string): Promise<DocsetIndex>;
37
+ /**
38
+ * Search `rootPath` for lines matching `pattern` (a regex string).
39
+ *
40
+ * @param rootPath Absolute path to the docset directory.
41
+ * @param pattern Primary search pattern. Supports full JS regex syntax
42
+ * (e.g. `"auth|login"`, `"function\\s+\\w+"`, `"TODO.*fix"`).
43
+ * The match is always case-insensitive.
44
+ * @param options Optional tuning parameters.
45
+ * @param index Pre-built index for the docset. Pass one to avoid re-walking
46
+ * on repeated calls. Omit to build ad-hoc (no caching).
47
+ */
48
+ export declare function searchDocset(rootPath: string, pattern: string, options?: SearchOptions, index?: DocsetIndex): Promise<SearchDocsResult>;
49
+ /**
50
+ * Format a {@link SearchDocsResult} as a human-readable, grep-style text block
51
+ * suitable for returning as MCP tool content.
52
+ */
53
+ export declare function formatSearchResult(result: SearchDocsResult): string;
@@ -0,0 +1,371 @@
1
+ /**
2
+ * File-content search for docsets.
3
+ *
4
+ * Strategy (ADR-001 Option C + optional MiniSearch pre-filter):
5
+ * 1. Walk the docset directory, skip binary files and ignored paths.
6
+ * 2. If MiniSearch is available AND the pattern looks like a plain term (no regex
7
+ * metacharacters), build/retrieve a lightweight in-memory index and use it to
8
+ * rank the most relevant files first — this keeps the hot path fast on large
9
+ * docsets without requiring any extra dependency.
10
+ * 3. Stream each candidate file line by line; test against the compiled RegExp.
11
+ * 4. Collect up to `maxMatches` results with surrounding context lines.
12
+ * 5. If 0 matches and a fallbackPattern is provided, repeat with that pattern.
13
+ * 6. If still 0, re-run without pre-filtering (safety net for exotic regex).
14
+ */
15
+ import { createReadStream } from "node:fs";
16
+ import { readdir, stat } from "node:fs/promises";
17
+ import { join, relative } from "node:path";
18
+ import { createInterface } from "node:readline";
19
+ // ---------------------------------------------------------------------------
20
+ // Constants
21
+ // ---------------------------------------------------------------------------
22
+ const DEFAULT_CONTEXT_LINES = 0;
23
+ const DEFAULT_MAX_MATCHES = 50;
24
+ /** Directories / files that are never useful to search inside a docset. */
25
+ const IGNORED_NAMES = new Set([
26
+ "node_modules",
27
+ ".git",
28
+ "dist",
29
+ "build",
30
+ ".turbo",
31
+ ".cache",
32
+ ]);
33
+ /** Files that are always skipped regardless of directory. */
34
+ const IGNORED_FILES = new Set([".agentic-metadata.json", ".gitignore"]);
35
+ /**
36
+ * Regex metacharacters that indicate the user supplied a real regex pattern.
37
+ * When present we skip the MiniSearch pre-filter (it would tokenise the raw
38
+ * pattern incorrectly) and go straight to streaming grep.
39
+ */
40
+ const REGEX_META = /[.+*?^${}()|[\]\\]/;
41
+ // ---------------------------------------------------------------------------
42
+ // MiniSearch integration (optional, best-effort)
43
+ // ---------------------------------------------------------------------------
44
+ /**
45
+ * Lazily attempt to load MiniSearch. Returns null when the package is absent
46
+ * so callers can degrade gracefully without throwing.
47
+ */
48
+ async function tryLoadMiniSearch() {
49
+ try {
50
+ const mod = await import("minisearch");
51
+ return (mod.default ??
52
+ mod
53
+ .default);
54
+ }
55
+ catch {
56
+ return null;
57
+ }
58
+ }
59
+ /**
60
+ * Build an in-memory full-text index over all text files in `rootPath`.
61
+ * Returns a {@link DocsetIndex} regardless of whether MiniSearch is available;
62
+ * when it is not, the index is a no-op stub that causes the caller to fall
63
+ * back to a full streaming search.
64
+ */
65
+ export async function buildFileIndex(rootPath) {
66
+ const MiniSearch = await tryLoadMiniSearch();
67
+ if (!MiniSearch) {
68
+ return { _ms: null, rootPath, _idToPath: new Map() };
69
+ }
70
+ const ms = new MiniSearch({
71
+ fields: ["content"],
72
+ storeFields: [],
73
+ });
74
+ const idToPath = new Map();
75
+ let id = 0;
76
+ const batch = [];
77
+ for await (const absPath of walkFiles(rootPath)) {
78
+ const content = await readTextFile(absPath);
79
+ if (content === null)
80
+ continue; // binary or unreadable
81
+ batch.push({ id, content });
82
+ idToPath.set(id, absPath);
83
+ id++;
84
+ }
85
+ await ms.addAllAsync(batch);
86
+ return { _ms: ms, rootPath, _idToPath: idToPath };
87
+ }
88
+ // ---------------------------------------------------------------------------
89
+ // Main search entry point
90
+ // ---------------------------------------------------------------------------
91
+ /**
92
+ * Search `rootPath` for lines matching `pattern` (a regex string).
93
+ *
94
+ * @param rootPath Absolute path to the docset directory.
95
+ * @param pattern Primary search pattern. Supports full JS regex syntax
96
+ * (e.g. `"auth|login"`, `"function\\s+\\w+"`, `"TODO.*fix"`).
97
+ * The match is always case-insensitive.
98
+ * @param options Optional tuning parameters.
99
+ * @param index Pre-built index for the docset. Pass one to avoid re-walking
100
+ * on repeated calls. Omit to build ad-hoc (no caching).
101
+ */
102
+ export async function searchDocset(rootPath, pattern, options = {}, index) {
103
+ const contextLines = options.contextLines ?? DEFAULT_CONTEXT_LINES;
104
+ const maxMatches = options.maxMatches ?? DEFAULT_MAX_MATCHES;
105
+ // --- primary search ---
106
+ const primary = await runSearch(rootPath, pattern, { contextLines, maxMatches, include: options.include }, index);
107
+ if (primary.total_matches > 0 || !options.fallbackPattern?.trim()) {
108
+ return primary;
109
+ }
110
+ // --- fallback search ---
111
+ const fallback = await runSearch(rootPath, options.fallbackPattern.trim(), { contextLines, maxMatches, include: options.include }, index);
112
+ return fallback;
113
+ }
114
+ async function runSearch(rootPath, pattern, opts, index) {
115
+ let regex;
116
+ try {
117
+ regex = new RegExp(pattern, "i");
118
+ }
119
+ catch {
120
+ // Invalid regex: treat as literal string
121
+ regex = new RegExp(escapeRegex(pattern), "i");
122
+ }
123
+ // Decide which files to scan
124
+ const useMiniSearch = index?._ms !== null && index !== undefined && !REGEX_META.test(pattern);
125
+ let candidateFiles;
126
+ if (useMiniSearch && index) {
127
+ // Use MiniSearch to rank and limit candidate files
128
+ const results = index._ms.search(pattern, {
129
+ prefix: true,
130
+ fuzzy: 0.2,
131
+ combineWith: "OR",
132
+ });
133
+ // Take top 20 ranked files; fall back to all files if no results
134
+ if (results.length > 0) {
135
+ candidateFiles = results
136
+ .slice(0, 20)
137
+ .map((r) => index._idToPath.get(r.id))
138
+ .filter((p) => p !== undefined);
139
+ }
140
+ else {
141
+ // MiniSearch found nothing — walk all files
142
+ candidateFiles = await collectFiles(rootPath, opts.include);
143
+ }
144
+ }
145
+ else {
146
+ candidateFiles = await collectFiles(rootPath, opts.include);
147
+ }
148
+ // Stream-grep the candidate files
149
+ const matches = [];
150
+ let totalMatches = 0;
151
+ let searchedFiles = 0;
152
+ let truncated = false;
153
+ for (const absPath of candidateFiles) {
154
+ if (truncated)
155
+ break;
156
+ const relPath = relative(rootPath, absPath).replace(/\\/g, "/");
157
+ searchedFiles++;
158
+ const fileMatches = await grepFile(absPath, relPath, regex, opts.contextLines, opts.maxMatches - totalMatches);
159
+ totalMatches += fileMatches.length;
160
+ matches.push(...fileMatches);
161
+ if (totalMatches >= opts.maxMatches) {
162
+ truncated = true;
163
+ }
164
+ }
165
+ return {
166
+ matches,
167
+ total_matches: totalMatches,
168
+ searched_files: searchedFiles,
169
+ used_pattern: pattern,
170
+ truncated,
171
+ };
172
+ }
173
+ // ---------------------------------------------------------------------------
174
+ // File walking
175
+ // ---------------------------------------------------------------------------
176
+ /** Recursively yield absolute paths of all non-ignored files under `dir`. */
177
+ async function* walkFiles(dir) {
178
+ let entries;
179
+ try {
180
+ entries = await readdir(dir, { withFileTypes: true });
181
+ }
182
+ catch {
183
+ return;
184
+ }
185
+ for (const entry of entries) {
186
+ const absPath = join(dir, entry.name);
187
+ // For symlinks, stat() follows the link to get the real type.
188
+ // entry.isDirectory() / entry.isFile() return false for symlinks.
189
+ let isDir = entry.isDirectory();
190
+ let isFile = entry.isFile();
191
+ if (entry.isSymbolicLink()) {
192
+ try {
193
+ const s = await stat(absPath);
194
+ isDir = s.isDirectory();
195
+ isFile = s.isFile();
196
+ }
197
+ catch {
198
+ continue; // broken symlink — skip
199
+ }
200
+ }
201
+ if (isDir) {
202
+ if (!IGNORED_NAMES.has(entry.name)) {
203
+ yield* walkFiles(absPath);
204
+ }
205
+ }
206
+ else if (isFile) {
207
+ if (!IGNORED_FILES.has(entry.name)) {
208
+ yield absPath;
209
+ }
210
+ }
211
+ }
212
+ }
213
+ /** Collect all walkable file paths into an array (respects optional glob include). */
214
+ async function collectFiles(rootPath, include) {
215
+ const files = [];
216
+ for await (const absPath of walkFiles(rootPath)) {
217
+ if (include && !matchGlob(absPath, include))
218
+ continue;
219
+ files.push(absPath);
220
+ }
221
+ return files;
222
+ }
223
+ // ---------------------------------------------------------------------------
224
+ // Per-file grep
225
+ // ---------------------------------------------------------------------------
226
+ /**
227
+ * Read `absPath` line by line; return up to `limit` matches with context.
228
+ * Returns an empty array for binary files.
229
+ */
230
+ async function grepFile(absPath, relPath, regex, contextLines, limit) {
231
+ if (limit <= 0)
232
+ return [];
233
+ // Binary detection: read first 8 KB and check for null bytes
234
+ if (await isBinaryFile(absPath))
235
+ return [];
236
+ const lines = [];
237
+ const matchIndices = []; // 0-based indices into `lines`
238
+ try {
239
+ const rl = createInterface({
240
+ input: createReadStream(absPath, { encoding: "utf8" }),
241
+ crlfDelay: Infinity,
242
+ });
243
+ for await (const line of rl) {
244
+ lines.push(line);
245
+ if (regex.test(line)) {
246
+ matchIndices.push(lines.length - 1);
247
+ }
248
+ }
249
+ }
250
+ catch {
251
+ // Unreadable file (permissions, encoding errors) — skip silently
252
+ return [];
253
+ }
254
+ const results = [];
255
+ for (const idx of matchIndices) {
256
+ if (results.length >= limit)
257
+ break;
258
+ const before = lines
259
+ .slice(Math.max(0, idx - contextLines), idx)
260
+ .map((l) => l.trimEnd());
261
+ const after = lines
262
+ .slice(idx + 1, idx + 1 + contextLines)
263
+ .map((l) => l.trimEnd());
264
+ results.push({
265
+ file: relPath,
266
+ line: idx + 1, // convert to 1-based
267
+ content: lines[idx].trimEnd(),
268
+ context_before: before,
269
+ context_after: after,
270
+ });
271
+ }
272
+ return results;
273
+ }
274
+ // ---------------------------------------------------------------------------
275
+ // Helpers
276
+ // ---------------------------------------------------------------------------
277
+ /** Read a file as UTF-8 text; returns null for binary or unreadable files. */
278
+ async function readTextFile(absPath) {
279
+ if (await isBinaryFile(absPath))
280
+ return null;
281
+ try {
282
+ const { readFile } = await import("node:fs/promises");
283
+ return await readFile(absPath, "utf8");
284
+ }
285
+ catch {
286
+ return null;
287
+ }
288
+ }
289
+ /**
290
+ * Detect binary files by reading the first 8 KB and looking for a null byte.
291
+ * This is the same heuristic used by git and ripgrep.
292
+ */
293
+ async function isBinaryFile(absPath) {
294
+ try {
295
+ const fileStat = await stat(absPath);
296
+ if (fileStat.size === 0)
297
+ return false;
298
+ const { open } = await import("node:fs/promises");
299
+ const fh = await open(absPath, "r");
300
+ try {
301
+ const buf = Buffer.alloc(Math.min(8192, fileStat.size));
302
+ const { bytesRead } = await fh.read(buf, 0, buf.length, 0);
303
+ for (let i = 0; i < bytesRead; i++) {
304
+ if (buf[i] === 0)
305
+ return true;
306
+ }
307
+ return false;
308
+ }
309
+ finally {
310
+ await fh.close();
311
+ }
312
+ }
313
+ catch {
314
+ return true; // treat unreadable as binary → skip
315
+ }
316
+ }
317
+ /** Escape all regex metacharacters in a literal string. */
318
+ function escapeRegex(s) {
319
+ return s.replace(/[.+*?^${}()|[\]\\]/g, "\\$&");
320
+ }
321
+ /**
322
+ * Very lightweight glob matching supporting `*`, `**`, and `?`.
323
+ * Only used for the `include` file-filter option; not a full glob engine.
324
+ */
325
+ function matchGlob(filePath, pattern) {
326
+ // Convert simple glob to regex.
327
+ // Use a rare Unicode placeholder (U+FFFE) to temporarily represent **
328
+ // so that the single-* replacement doesn't clobber it.
329
+ const DOUBLE_STAR = "\uFFFE";
330
+ const regexStr = pattern
331
+ .replace(/[.+^${}()|[\]\\]/g, "\\$&") // escape regex chars (not * and ?)
332
+ .replace(/\*\*/g, DOUBLE_STAR) // placeholder for **
333
+ .replace(/\*/g, "[^/]*") // * → any chars except /
334
+ .replace(/\?/g, "[^/]") // ? → single char except /
335
+ .replace(new RegExp(DOUBLE_STAR, "g"), ".*"); // ** → any chars including /
336
+ return new RegExp(regexStr + "$", "i").test(filePath);
337
+ }
338
+ // ---------------------------------------------------------------------------
339
+ // Formatting helpers (used by the MCP server layer)
340
+ // ---------------------------------------------------------------------------
341
+ /**
342
+ * Format a {@link SearchDocsResult} as a human-readable, grep-style text block
343
+ * suitable for returning as MCP tool content.
344
+ */
345
+ export function formatSearchResult(result) {
346
+ if (result.matches.length === 0) {
347
+ return `No matches found for pattern: ${result.used_pattern}\n(searched ${result.searched_files} file${result.searched_files === 1 ? "" : "s"})`;
348
+ }
349
+ const lines = [];
350
+ let currentFile = "";
351
+ for (const match of result.matches) {
352
+ if (match.file !== currentFile) {
353
+ if (currentFile !== "")
354
+ lines.push(""); // blank separator between files
355
+ lines.push(`==> ${match.file} <==`);
356
+ currentFile = match.file;
357
+ }
358
+ for (const ctx of match.context_before) {
359
+ lines.push(` ${ctx}`);
360
+ }
361
+ lines.push(`${match.line}: ${match.content}`);
362
+ for (const ctx of match.context_after) {
363
+ lines.push(` ${ctx}`);
364
+ }
365
+ }
366
+ const summary = [
367
+ ``,
368
+ `--- ${result.total_matches} match${result.total_matches === 1 ? "" : "es"} in ${result.searched_files} file${result.searched_files === 1 ? "" : "s"} (pattern: "${result.used_pattern}")${result.truncated ? ` [truncated at ${DEFAULT_MAX_MATCHES}]` : ""}`,
369
+ ];
370
+ return [...lines, ...summary].join("\n");
371
+ }
@@ -85,6 +85,7 @@ export interface SearchDocsParams {
85
85
  }
86
86
  /**
87
87
  * Response from the search_docs tool
88
+ * @deprecated Use SearchDocsResult for actual search results
88
89
  */
89
90
  export interface SearchDocsResponse {
90
91
  /** Instructions for the agent on how to search */
@@ -96,6 +97,55 @@ export interface SearchDocsResponse {
96
97
  /** The calculated local path for searching */
97
98
  path: string;
98
99
  }
100
+ /**
101
+ * A single line match from a file search
102
+ */
103
+ export interface SearchMatch {
104
+ /** Path to the file, relative to the docset root */
105
+ file: string;
106
+ /** 1-based line number of the match */
107
+ line: number;
108
+ /** The full content of the matched line (trimmed) */
109
+ content: string;
110
+ /** Lines immediately before the match (up to contextLines lines) */
111
+ context_before: string[];
112
+ /** Lines immediately after the match (up to contextLines lines) */
113
+ context_after: string[];
114
+ }
115
+ /**
116
+ * Result returned by the search_docs tool when performing an actual search
117
+ */
118
+ export interface SearchDocsResult {
119
+ /** All matched lines across all searched files */
120
+ matches: SearchMatch[];
121
+ /** Total number of matches found (may be higher than matches.length if truncated) */
122
+ total_matches: number;
123
+ /** Number of files inspected during the search */
124
+ searched_files: number;
125
+ /** The pattern that was actually used (may differ from input if fallback was triggered) */
126
+ used_pattern: string;
127
+ /** True when results were capped at the maximum match limit */
128
+ truncated: boolean;
129
+ }
130
+ /**
131
+ * Options controlling search behaviour
132
+ */
133
+ export interface SearchOptions {
134
+ /**
135
+ * Fallback pattern used when the primary pattern yields no results.
136
+ * Typically the value of the `generalized_keywords` tool parameter.
137
+ */
138
+ fallbackPattern?: string;
139
+ /** Number of context lines to include before and after each match (default: 2) */
140
+ contextLines?: number;
141
+ /** Maximum number of matches to return before truncating (default: 50) */
142
+ maxMatches?: number;
143
+ /**
144
+ * Glob-style pattern to restrict which files are searched (e.g. "*.md", "*.{ts,js}").
145
+ * When omitted all non-binary files are searched.
146
+ */
147
+ include?: string;
148
+ }
99
149
  /**
100
150
  * Response from the list_docsets tool
101
151
  */
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge-core",
3
- "version": "1.5.0",
3
+ "version": "1.6.1",
4
4
  "description": "Core functionality for agentic knowledge guidance system",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -29,7 +29,8 @@
29
29
  "typecheck": "tsc --noEmit"
30
30
  },
31
31
  "dependencies": {
32
- "js-yaml": "^4.1.0"
32
+ "js-yaml": "^4.1.0",
33
+ "minisearch": "^7.1.2"
33
34
  },
34
35
  "devDependencies": {
35
36
  "@eslint/js": "^9.34.0",
@@ -4,10 +4,18 @@
4
4
  import { Server } from "@modelcontextprotocol/sdk/server/index.js";
5
5
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
6
6
  import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
7
- import { loadConfig, findConfigPath, calculateLocalPath, processTemplate, createTemplateContext, getEffectiveTemplate, createStructuredResponse, ConfigManager, ensureKnowledgeGitignoreSync, } from "@codemcp/knowledge-core";
7
+ import { loadConfig, findConfigPath, calculateLocalPath, ConfigManager, ensureKnowledgeGitignoreSync, buildFileIndex, searchDocset, formatSearchResult, } from "@codemcp/knowledge-core";
8
8
  import { initDocset } from "@codemcp/knowledge-content-loader";
9
9
  import { existsSync } from "node:fs";
10
10
  import { resolve, dirname } from "node:path";
11
+ /** Shared keywords parameter description advertised to agents */
12
+ const KEYWORDS_DESCRIPTION = "Primary search terms or concepts you're looking for. " +
13
+ 'Supports full regex syntax (e.g. "log.*Error", "function\\s+\\w+", "auth|login"). ' +
14
+ "Returns file path, line number, matched line, and surrounding context lines. " +
15
+ 'Be specific: "authentication middleware", "useData hook", "sidebar.items".';
16
+ const GENERALIZED_KEYWORDS_DESCRIPTION = "Broader synonyms or related terms used as a fallback when the primary keywords " +
17
+ 'return no results (e.g. for "authentication" you might include "login|signin|oauth"). ' +
18
+ "Also supports regex syntax.";
11
19
  /**
12
20
  * Create an agentic knowledge MCP server
13
21
  * @returns MCP server instance
@@ -25,6 +33,8 @@ export function createAgenticKnowledgeServer() {
25
33
  let configCache = null;
26
34
  let configLoadTime = 0;
27
35
  const CONFIG_CACHE_TTL = 60000; // 1 minute cache
36
+ // Per-docset search index cache (keyed by docset id)
37
+ const indexCache = new Map();
28
38
  /**
29
39
  * Load configuration with caching (returns null if no config found)
30
40
  */
@@ -55,6 +65,35 @@ export function createAgenticKnowledgeServer() {
55
65
  return null;
56
66
  }
57
67
  }
68
+ /**
69
+ * Resolve the absolute local path for an initialized docset.
70
+ * Throws if the docset has not been initialized yet.
71
+ */
72
+ function resolveDocsetPath(docset, configPath) {
73
+ const primarySource = docset.sources?.[0];
74
+ const configDir = dirname(configPath);
75
+ if (primarySource?.type === "local_folder") {
76
+ const symlinkDir = resolve(configDir, "docsets", docset.id);
77
+ const metadataPath = resolve(symlinkDir, ".agentic-metadata.json");
78
+ if (!existsSync(metadataPath)) {
79
+ throw new Error(`Docset '${docset.id}' hasn't been initialized yet.`);
80
+ }
81
+ return symlinkDir;
82
+ }
83
+ if (primarySource?.type === "git_repo" ||
84
+ primarySource?.type === "archive") {
85
+ const localRelPath = calculateLocalPath(docset, configPath);
86
+ const projectRoot = dirname(configDir);
87
+ const absolutePath = resolve(projectRoot, localRelPath);
88
+ const metadataPath = resolve(absolutePath, ".agentic-metadata.json");
89
+ if (!existsSync(metadataPath)) {
90
+ throw new Error(`Docset '${docset.id}' hasn't been initialized yet.`);
91
+ }
92
+ return absolutePath;
93
+ }
94
+ // Fallback — unknown source type, no initialization check
95
+ return resolve(dirname(configDir), calculateLocalPath(docset, configPath));
96
+ }
58
97
  // Register tool handlers
59
98
  server.setRequestHandler(ListToolsRequestSchema, async () => {
60
99
  // Load configuration to get available docsets
@@ -65,7 +104,7 @@ export function createAgenticKnowledgeServer() {
65
104
  tools: [
66
105
  {
67
106
  name: "search_docs",
68
- description: `Search for documentation in configured docsets. Returns structured response with search instructions and parameters.
107
+ description: `Search for documentation in configured docsets. Returns file path, line number, matched content, and surrounding context.
69
108
 
70
109
  ⚠️ **NO DOCSETS CONFIGURED**
71
110
 
@@ -109,11 +148,11 @@ After configuring, the tool will show available docsets here.`,
109
148
  },
110
149
  keywords: {
111
150
  type: "string",
112
- description: 'Primary search terms or concepts you\'re looking for. Be specific about what you want to find (e.g., "authentication middleware", "user validation", "API rate limiting").',
151
+ description: KEYWORDS_DESCRIPTION,
113
152
  },
114
153
  generalized_keywords: {
115
154
  type: "string",
116
- description: "Related terms, synonyms, or contextual keywords that may appear alongside your primary keywords but are not your main target.",
155
+ description: GENERALIZED_KEYWORDS_DESCRIPTION,
117
156
  },
118
157
  },
119
158
  required: ["docset_id", "keywords"],
@@ -161,11 +200,8 @@ After configuring, the tool will show available docsets here.`,
161
200
  return `• **${docset.id}** (${docset.name})${description}`;
162
201
  })
163
202
  .join("\n");
164
- const searchDocsDescription = `Search for documentation in available docsets. Returns structured response with search instructions and parameters.
165
-
166
- 📚 **AVAILABLE DOCSETS:**
167
- ${docsetInfo}
168
- `;
203
+ const searchDocsDescription = `Search for documentation in available docsets. Returns file path, line number, matched content, and surrounding context lines.\n\n` +
204
+ `📚 **AVAILABLE DOCSETS:**\n${docsetInfo}`;
169
205
  return {
170
206
  tools: [
171
207
  {
@@ -181,11 +217,16 @@ ${docsetInfo}
181
217
  },
182
218
  keywords: {
183
219
  type: "string",
184
- description: 'Primary search terms or concepts you\'re looking for. Be specific about what you want to find (e.g., "authentication middleware", "user validation", "API rate limiting"). Include the exact terms you expect to appear in the documentation.',
220
+ description: KEYWORDS_DESCRIPTION,
185
221
  },
186
222
  generalized_keywords: {
187
223
  type: "string",
188
- description: 'Related terms, synonyms, or contextual keywords that may appear alongside your primary keywords but are not your main target. These help broaden the search context and catch relevant content that might use different terminology (e.g., for "authentication" you might include "login, signin, oauth, credentials, tokens"). Think of terms that would appear in the same sections or discussions as your main keywords.',
224
+ description: GENERALIZED_KEYWORDS_DESCRIPTION,
225
+ },
226
+ context_lines: {
227
+ type: "number",
228
+ description: "Number of lines to show before and after each matching line (default: 0). " +
229
+ "Increase to 1–3 when you need surrounding context to understand a match.",
189
230
  },
190
231
  },
191
232
  required: ["docset_id", "keywords"],
@@ -232,7 +273,7 @@ ${config.docsets.map((d) => `• **${d.id}** (${d.name})`).join("\n")}`,
232
273
  try {
233
274
  switch (name) {
234
275
  case "search_docs": {
235
- const { docset_id, keywords, generalized_keywords } = args;
276
+ const { docset_id, keywords, generalized_keywords, context_lines } = args;
236
277
  // Validate required parameters
237
278
  if (!docset_id || typeof docset_id !== "string") {
238
279
  throw new Error("docset_id is required and must be a string");
@@ -257,52 +298,27 @@ ${config.docsets.map((d) => `• **${d.id}** (${d.name})`).join("\n")}`,
257
298
  const docset = config.docsets.find((d) => d.id === docset_id);
258
299
  if (!docset) {
259
300
  const availableIds = config.docsets.map((d) => d.id).join(", ");
260
- throw new Error(`Docset '${docset_id}' not found.\n\n` +
261
- `Available docsets: ${availableIds}\n\n`);
262
- }
263
- // Determine path calculation method and validate initialization
264
- const primarySource = docset.sources?.[0];
265
- let localPath;
266
- if (primarySource?.type === "local_folder") {
267
- // For local folders, use symlinked path
268
- localPath = calculateLocalPath(docset, configPath);
269
- // Check if initialized by verifying .agentic-metadata.json exists
270
- const configDir = dirname(configPath);
271
- const symlinkDir = resolve(configDir, "docsets", docset.id);
272
- const metadataPath = resolve(symlinkDir, ".agentic-metadata.json");
273
- if (!existsSync(metadataPath)) {
274
- throw new Error(`Docset '${docset_id}' hasn't been initialized yet.`);
275
- }
276
- // Return the symlinked path for consistency
277
- localPath = resolve(configDir, "docsets", docset.id);
278
- const projectRoot2 = dirname(configDir);
279
- localPath = resolve(projectRoot2, localPath).replace(projectRoot2 + "/", "");
280
- }
281
- else if (primarySource?.type === "git_repo") {
282
- // For git repos, use standard path calculation
283
- localPath = calculateLocalPath(docset, configPath);
284
- // Check if .agentic-metadata.json exists
285
- const configDir = dirname(configPath);
286
- const projectRoot = dirname(configDir);
287
- const absolutePath = resolve(projectRoot, localPath);
288
- const metadataPath = resolve(absolutePath, ".agentic-metadata.json");
289
- if (!existsSync(metadataPath)) {
290
- throw new Error(`Docset '${docset_id}' hasn't been initialized yet.\n\n`);
291
- }
301
+ throw new Error(`Docset '${docset_id}' not found.\n\nAvailable docsets: ${availableIds}`);
292
302
  }
293
- else {
294
- // Fallback to standard calculation for unknown types
295
- localPath = calculateLocalPath(docset, configPath);
303
+ // Resolve the absolute local path (also validates initialization)
304
+ const absoluteLocalPath = resolveDocsetPath(docset, configPath);
305
+ // Get or build the search index for this docset
306
+ let index = indexCache.get(docset_id);
307
+ if (!index) {
308
+ index = await buildFileIndex(absoluteLocalPath);
309
+ indexCache.set(docset_id, index);
296
310
  }
297
- // Create template context with proper function signature
298
- const templateContext = createTemplateContext(localPath, keywords.trim(), (generalized_keywords || "").trim(), docset);
299
- // Get effective template and process it
300
- const effectiveTemplate = getEffectiveTemplate(docset, config.template);
301
- const instructions = processTemplate(effectiveTemplate, templateContext);
302
- // Create structured response
303
- const structuredResponse = createStructuredResponse(instructions, keywords.trim(), (generalized_keywords || "").trim(), localPath);
311
+ // Perform the search
312
+ const fallbackPattern = generalized_keywords?.trim();
313
+ const searchOptions = {};
314
+ if (fallbackPattern)
315
+ searchOptions.fallbackPattern = fallbackPattern;
316
+ if (typeof context_lines === "number")
317
+ searchOptions.contextLines = context_lines;
318
+ const result = await searchDocset(absoluteLocalPath, keywords.trim(), searchOptions, index);
319
+ const text = formatSearchResult(result);
304
320
  return {
305
- structuredContent: structuredResponse,
321
+ content: [{ type: "text", text }],
306
322
  };
307
323
  }
308
324
  case "list_docsets": {
@@ -380,9 +396,11 @@ ${config.docsets.map((d) => `• **${d.id}** (${d.name})`).join("\n")}`,
380
396
  }
381
397
  const configManager = new ConfigManager();
382
398
  const { config, configPath } = await configManager.loadConfig(process.cwd());
383
- // Invalidate cache so the next search_docs call sees the new state
399
+ // Invalidate config cache and search index cache so the next
400
+ // search_docs call sees the newly initialized content
384
401
  configCache = null;
385
402
  configLoadTime = 0;
403
+ indexCache.delete(docset_id);
386
404
  ensureKnowledgeGitignoreSync(configPath);
387
405
  const docset = config.docsets.find((d) => d.id === docset_id);
388
406
  if (!docset) {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codemcp/knowledge",
3
- "version": "1.5.0",
3
+ "version": "1.6.1",
4
4
  "description": "MCP server implementation for agentic knowledge guidance system",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",