agentic-knowledge-mcp 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/packages/cli/package.json +1 -1
- package/packages/content-loader/package.json +1 -1
- package/packages/core/dist/index.d.ts +1 -0
- package/packages/core/dist/index.js +2 -0
- package/packages/core/dist/search/searcher.d.ts +53 -0
- package/packages/core/dist/search/searcher.js +371 -0
- package/packages/core/dist/types.d.ts +50 -0
- package/packages/core/package.json +3 -2
- package/packages/mcp-server/dist/server.js +74 -56
- package/packages/mcp-server/package.json +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentic-knowledge-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.1",
|
|
4
4
|
"description": "A Model Context Protocol server for agentic knowledge guidance with web-based documentation loading and intelligent search instructions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "packages/cli/dist/index.js",
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"agentic-knowledge": "packages/cli/dist/index.js"
|
|
9
9
|
},
|
|
10
10
|
"engines": {
|
|
11
|
-
"node": ">=
|
|
11
|
+
"node": ">=20.0.0",
|
|
12
12
|
"pnpm": ">=9.0.0"
|
|
13
13
|
},
|
|
14
14
|
"files": [
|
|
@@ -29,9 +29,9 @@
|
|
|
29
29
|
"commander": "^12.0.0",
|
|
30
30
|
"js-yaml": "4.1.0",
|
|
31
31
|
"ora": "^8.0.1",
|
|
32
|
-
"@codemcp/knowledge": "1.
|
|
33
|
-
"@codemcp/knowledge
|
|
34
|
-
"@codemcp/knowledge-core": "1.
|
|
32
|
+
"@codemcp/knowledge-content-loader": "1.6.1",
|
|
33
|
+
"@codemcp/knowledge": "1.6.1",
|
|
34
|
+
"@codemcp/knowledge-core": "1.6.1"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@eslint/js": "^9.34.0",
|
|
@@ -13,3 +13,4 @@ export { createSymlinks, removeSymlinks } from "./paths/symlinks.js";
|
|
|
13
13
|
export { discoverDirectoryPatterns, discoverMinimalPatterns, } from "./paths/discovery.js";
|
|
14
14
|
export { safelyClearDirectory, containsSymlinks, getDirectoryInfo, } from "./paths/cleanup.js";
|
|
15
15
|
export { processTemplate, getEffectiveTemplate, validateTemplate, extractVariables, createTemplateContext, createStructuredResponse, } from "./templates/processor.js";
|
|
16
|
+
export { buildFileIndex, searchDocset, formatSearchResult, type DocsetIndex, } from "./search/searcher.js";
|
|
@@ -20,3 +20,5 @@ export { discoverDirectoryPatterns, discoverMinimalPatterns, } from "./paths/dis
|
|
|
20
20
|
export { safelyClearDirectory, containsSymlinks, getDirectoryInfo, } from "./paths/cleanup.js";
|
|
21
21
|
// Export template processing
|
|
22
22
|
export { processTemplate, getEffectiveTemplate, validateTemplate, extractVariables, createTemplateContext, createStructuredResponse, } from "./templates/processor.js";
|
|
23
|
+
// Export search functionality
|
|
24
|
+
export { buildFileIndex, searchDocset, formatSearchResult, } from "./search/searcher.js";
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File-content search for docsets.
|
|
3
|
+
*
|
|
4
|
+
* Strategy (ADR-001 Option C + optional MiniSearch pre-filter):
|
|
5
|
+
* 1. Walk the docset directory, skip binary files and ignored paths.
|
|
6
|
+
* 2. If MiniSearch is available AND the pattern looks like a plain term (no regex
|
|
7
|
+
* metacharacters), build/retrieve a lightweight in-memory index and use it to
|
|
8
|
+
* rank the most relevant files first — this keeps the hot path fast on large
|
|
9
|
+
* docsets without requiring any extra dependency.
|
|
10
|
+
* 3. Stream each candidate file line by line; test against the compiled RegExp.
|
|
11
|
+
* 4. Collect up to `maxMatches` results with surrounding context lines.
|
|
12
|
+
* 5. If 0 matches and a fallbackPattern is provided, repeat with that pattern.
|
|
13
|
+
* 6. If still 0, re-run without pre-filtering (safety net for exotic regex).
|
|
14
|
+
*/
|
|
15
|
+
import type { SearchDocsResult, SearchOptions } from "../types.js";
|
|
16
|
+
/** Opaque handle returned by {@link buildFileIndex}. */
|
|
17
|
+
export interface DocsetIndex {
|
|
18
|
+
/** MiniSearch instance (null when MiniSearch could not be loaded) */
|
|
19
|
+
_ms: {
|
|
20
|
+
search(_query: string, _opts?: Record<string, unknown>): Array<{
|
|
21
|
+
id: unknown;
|
|
22
|
+
score: number;
|
|
23
|
+
}>;
|
|
24
|
+
} | null;
|
|
25
|
+
/** Absolute path to the docset root used to build this index */
|
|
26
|
+
rootPath: string;
|
|
27
|
+
/** Map from numeric doc id → absolute file path */
|
|
28
|
+
_idToPath: Map<number, string>;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Build an in-memory full-text index over all text files in `rootPath`.
|
|
32
|
+
* Returns a {@link DocsetIndex} regardless of whether MiniSearch is available;
|
|
33
|
+
* when it is not, the index is a no-op stub that causes the caller to fall
|
|
34
|
+
* back to a full streaming search.
|
|
35
|
+
*/
|
|
36
|
+
export declare function buildFileIndex(rootPath: string): Promise<DocsetIndex>;
|
|
37
|
+
/**
|
|
38
|
+
* Search `rootPath` for lines matching `pattern` (a regex string).
|
|
39
|
+
*
|
|
40
|
+
* @param rootPath Absolute path to the docset directory.
|
|
41
|
+
* @param pattern Primary search pattern. Supports full JS regex syntax
|
|
42
|
+
* (e.g. `"auth|login"`, `"function\\s+\\w+"`, `"TODO.*fix"`).
|
|
43
|
+
* The match is always case-insensitive.
|
|
44
|
+
* @param options Optional tuning parameters.
|
|
45
|
+
* @param index Pre-built index for the docset. Pass one to avoid re-walking
|
|
46
|
+
* on repeated calls. Omit to build ad-hoc (no caching).
|
|
47
|
+
*/
|
|
48
|
+
export declare function searchDocset(rootPath: string, pattern: string, options?: SearchOptions, index?: DocsetIndex): Promise<SearchDocsResult>;
|
|
49
|
+
/**
|
|
50
|
+
* Format a {@link SearchDocsResult} as a human-readable, grep-style text block
|
|
51
|
+
* suitable for returning as MCP tool content.
|
|
52
|
+
*/
|
|
53
|
+
export declare function formatSearchResult(result: SearchDocsResult): string;
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File-content search for docsets.
|
|
3
|
+
*
|
|
4
|
+
* Strategy (ADR-001 Option C + optional MiniSearch pre-filter):
|
|
5
|
+
* 1. Walk the docset directory, skip binary files and ignored paths.
|
|
6
|
+
* 2. If MiniSearch is available AND the pattern looks like a plain term (no regex
|
|
7
|
+
* metacharacters), build/retrieve a lightweight in-memory index and use it to
|
|
8
|
+
* rank the most relevant files first — this keeps the hot path fast on large
|
|
9
|
+
* docsets without requiring any extra dependency.
|
|
10
|
+
* 3. Stream each candidate file line by line; test against the compiled RegExp.
|
|
11
|
+
* 4. Collect up to `maxMatches` results with surrounding context lines.
|
|
12
|
+
* 5. If 0 matches and a fallbackPattern is provided, repeat with that pattern.
|
|
13
|
+
* 6. If still 0, re-run without pre-filtering (safety net for exotic regex).
|
|
14
|
+
*/
|
|
15
|
+
import { createReadStream } from "node:fs";
|
|
16
|
+
import { readdir, stat } from "node:fs/promises";
|
|
17
|
+
import { join, relative } from "node:path";
|
|
18
|
+
import { createInterface } from "node:readline";
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Constants
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
const DEFAULT_CONTEXT_LINES = 0;
|
|
23
|
+
const DEFAULT_MAX_MATCHES = 50;
|
|
24
|
+
/** Directories / files that are never useful to search inside a docset. */
|
|
25
|
+
const IGNORED_NAMES = new Set([
|
|
26
|
+
"node_modules",
|
|
27
|
+
".git",
|
|
28
|
+
"dist",
|
|
29
|
+
"build",
|
|
30
|
+
".turbo",
|
|
31
|
+
".cache",
|
|
32
|
+
]);
|
|
33
|
+
/** Files that are always skipped regardless of directory. */
|
|
34
|
+
const IGNORED_FILES = new Set([".agentic-metadata.json", ".gitignore"]);
|
|
35
|
+
/**
|
|
36
|
+
* Regex metacharacters that indicate the user supplied a real regex pattern.
|
|
37
|
+
* When present we skip the MiniSearch pre-filter (it would tokenise the raw
|
|
38
|
+
* pattern incorrectly) and go straight to streaming grep.
|
|
39
|
+
*/
|
|
40
|
+
const REGEX_META = /[.+*?^${}()|[\]\\]/;
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
// MiniSearch integration (optional, best-effort)
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
/**
|
|
45
|
+
* Lazily attempt to load MiniSearch. Returns null when the package is absent
|
|
46
|
+
* so callers can degrade gracefully without throwing.
|
|
47
|
+
*/
|
|
48
|
+
async function tryLoadMiniSearch() {
|
|
49
|
+
try {
|
|
50
|
+
const mod = await import("minisearch");
|
|
51
|
+
return (mod.default ??
|
|
52
|
+
mod
|
|
53
|
+
.default);
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Build an in-memory full-text index over all text files in `rootPath`.
|
|
61
|
+
* Returns a {@link DocsetIndex} regardless of whether MiniSearch is available;
|
|
62
|
+
* when it is not, the index is a no-op stub that causes the caller to fall
|
|
63
|
+
* back to a full streaming search.
|
|
64
|
+
*/
|
|
65
|
+
export async function buildFileIndex(rootPath) {
|
|
66
|
+
const MiniSearch = await tryLoadMiniSearch();
|
|
67
|
+
if (!MiniSearch) {
|
|
68
|
+
return { _ms: null, rootPath, _idToPath: new Map() };
|
|
69
|
+
}
|
|
70
|
+
const ms = new MiniSearch({
|
|
71
|
+
fields: ["content"],
|
|
72
|
+
storeFields: [],
|
|
73
|
+
});
|
|
74
|
+
const idToPath = new Map();
|
|
75
|
+
let id = 0;
|
|
76
|
+
const batch = [];
|
|
77
|
+
for await (const absPath of walkFiles(rootPath)) {
|
|
78
|
+
const content = await readTextFile(absPath);
|
|
79
|
+
if (content === null)
|
|
80
|
+
continue; // binary or unreadable
|
|
81
|
+
batch.push({ id, content });
|
|
82
|
+
idToPath.set(id, absPath);
|
|
83
|
+
id++;
|
|
84
|
+
}
|
|
85
|
+
await ms.addAllAsync(batch);
|
|
86
|
+
return { _ms: ms, rootPath, _idToPath: idToPath };
|
|
87
|
+
}
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Main search entry point
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
/**
|
|
92
|
+
* Search `rootPath` for lines matching `pattern` (a regex string).
|
|
93
|
+
*
|
|
94
|
+
* @param rootPath Absolute path to the docset directory.
|
|
95
|
+
* @param pattern Primary search pattern. Supports full JS regex syntax
|
|
96
|
+
* (e.g. `"auth|login"`, `"function\\s+\\w+"`, `"TODO.*fix"`).
|
|
97
|
+
* The match is always case-insensitive.
|
|
98
|
+
* @param options Optional tuning parameters.
|
|
99
|
+
* @param index Pre-built index for the docset. Pass one to avoid re-walking
|
|
100
|
+
* on repeated calls. Omit to build ad-hoc (no caching).
|
|
101
|
+
*/
|
|
102
|
+
export async function searchDocset(rootPath, pattern, options = {}, index) {
|
|
103
|
+
const contextLines = options.contextLines ?? DEFAULT_CONTEXT_LINES;
|
|
104
|
+
const maxMatches = options.maxMatches ?? DEFAULT_MAX_MATCHES;
|
|
105
|
+
// --- primary search ---
|
|
106
|
+
const primary = await runSearch(rootPath, pattern, { contextLines, maxMatches, include: options.include }, index);
|
|
107
|
+
if (primary.total_matches > 0 || !options.fallbackPattern?.trim()) {
|
|
108
|
+
return primary;
|
|
109
|
+
}
|
|
110
|
+
// --- fallback search ---
|
|
111
|
+
const fallback = await runSearch(rootPath, options.fallbackPattern.trim(), { contextLines, maxMatches, include: options.include }, index);
|
|
112
|
+
return fallback;
|
|
113
|
+
}
|
|
114
|
+
async function runSearch(rootPath, pattern, opts, index) {
|
|
115
|
+
let regex;
|
|
116
|
+
try {
|
|
117
|
+
regex = new RegExp(pattern, "i");
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
// Invalid regex: treat as literal string
|
|
121
|
+
regex = new RegExp(escapeRegex(pattern), "i");
|
|
122
|
+
}
|
|
123
|
+
// Decide which files to scan
|
|
124
|
+
const useMiniSearch = index?._ms !== null && index !== undefined && !REGEX_META.test(pattern);
|
|
125
|
+
let candidateFiles;
|
|
126
|
+
if (useMiniSearch && index) {
|
|
127
|
+
// Use MiniSearch to rank and limit candidate files
|
|
128
|
+
const results = index._ms.search(pattern, {
|
|
129
|
+
prefix: true,
|
|
130
|
+
fuzzy: 0.2,
|
|
131
|
+
combineWith: "OR",
|
|
132
|
+
});
|
|
133
|
+
// Take top 20 ranked files; fall back to all files if no results
|
|
134
|
+
if (results.length > 0) {
|
|
135
|
+
candidateFiles = results
|
|
136
|
+
.slice(0, 20)
|
|
137
|
+
.map((r) => index._idToPath.get(r.id))
|
|
138
|
+
.filter((p) => p !== undefined);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
// MiniSearch found nothing — walk all files
|
|
142
|
+
candidateFiles = await collectFiles(rootPath, opts.include);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
candidateFiles = await collectFiles(rootPath, opts.include);
|
|
147
|
+
}
|
|
148
|
+
// Stream-grep the candidate files
|
|
149
|
+
const matches = [];
|
|
150
|
+
let totalMatches = 0;
|
|
151
|
+
let searchedFiles = 0;
|
|
152
|
+
let truncated = false;
|
|
153
|
+
for (const absPath of candidateFiles) {
|
|
154
|
+
if (truncated)
|
|
155
|
+
break;
|
|
156
|
+
const relPath = relative(rootPath, absPath).replace(/\\/g, "/");
|
|
157
|
+
searchedFiles++;
|
|
158
|
+
const fileMatches = await grepFile(absPath, relPath, regex, opts.contextLines, opts.maxMatches - totalMatches);
|
|
159
|
+
totalMatches += fileMatches.length;
|
|
160
|
+
matches.push(...fileMatches);
|
|
161
|
+
if (totalMatches >= opts.maxMatches) {
|
|
162
|
+
truncated = true;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return {
|
|
166
|
+
matches,
|
|
167
|
+
total_matches: totalMatches,
|
|
168
|
+
searched_files: searchedFiles,
|
|
169
|
+
used_pattern: pattern,
|
|
170
|
+
truncated,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
// ---------------------------------------------------------------------------
|
|
174
|
+
// File walking
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
/** Recursively yield absolute paths of all non-ignored files under `dir`. */
|
|
177
|
+
async function* walkFiles(dir) {
|
|
178
|
+
let entries;
|
|
179
|
+
try {
|
|
180
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
for (const entry of entries) {
|
|
186
|
+
const absPath = join(dir, entry.name);
|
|
187
|
+
// For symlinks, stat() follows the link to get the real type.
|
|
188
|
+
// entry.isDirectory() / entry.isFile() return false for symlinks.
|
|
189
|
+
let isDir = entry.isDirectory();
|
|
190
|
+
let isFile = entry.isFile();
|
|
191
|
+
if (entry.isSymbolicLink()) {
|
|
192
|
+
try {
|
|
193
|
+
const s = await stat(absPath);
|
|
194
|
+
isDir = s.isDirectory();
|
|
195
|
+
isFile = s.isFile();
|
|
196
|
+
}
|
|
197
|
+
catch {
|
|
198
|
+
continue; // broken symlink — skip
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
if (isDir) {
|
|
202
|
+
if (!IGNORED_NAMES.has(entry.name)) {
|
|
203
|
+
yield* walkFiles(absPath);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
else if (isFile) {
|
|
207
|
+
if (!IGNORED_FILES.has(entry.name)) {
|
|
208
|
+
yield absPath;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
/** Collect all walkable file paths into an array (respects optional glob include). */
|
|
214
|
+
async function collectFiles(rootPath, include) {
|
|
215
|
+
const files = [];
|
|
216
|
+
for await (const absPath of walkFiles(rootPath)) {
|
|
217
|
+
if (include && !matchGlob(absPath, include))
|
|
218
|
+
continue;
|
|
219
|
+
files.push(absPath);
|
|
220
|
+
}
|
|
221
|
+
return files;
|
|
222
|
+
}
|
|
223
|
+
// ---------------------------------------------------------------------------
|
|
224
|
+
// Per-file grep
|
|
225
|
+
// ---------------------------------------------------------------------------
|
|
226
|
+
/**
|
|
227
|
+
* Read `absPath` line by line; return up to `limit` matches with context.
|
|
228
|
+
* Returns an empty array for binary files.
|
|
229
|
+
*/
|
|
230
|
+
async function grepFile(absPath, relPath, regex, contextLines, limit) {
|
|
231
|
+
if (limit <= 0)
|
|
232
|
+
return [];
|
|
233
|
+
// Binary detection: read first 8 KB and check for null bytes
|
|
234
|
+
if (await isBinaryFile(absPath))
|
|
235
|
+
return [];
|
|
236
|
+
const lines = [];
|
|
237
|
+
const matchIndices = []; // 0-based indices into `lines`
|
|
238
|
+
try {
|
|
239
|
+
const rl = createInterface({
|
|
240
|
+
input: createReadStream(absPath, { encoding: "utf8" }),
|
|
241
|
+
crlfDelay: Infinity,
|
|
242
|
+
});
|
|
243
|
+
for await (const line of rl) {
|
|
244
|
+
lines.push(line);
|
|
245
|
+
if (regex.test(line)) {
|
|
246
|
+
matchIndices.push(lines.length - 1);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
catch {
|
|
251
|
+
// Unreadable file (permissions, encoding errors) — skip silently
|
|
252
|
+
return [];
|
|
253
|
+
}
|
|
254
|
+
const results = [];
|
|
255
|
+
for (const idx of matchIndices) {
|
|
256
|
+
if (results.length >= limit)
|
|
257
|
+
break;
|
|
258
|
+
const before = lines
|
|
259
|
+
.slice(Math.max(0, idx - contextLines), idx)
|
|
260
|
+
.map((l) => l.trimEnd());
|
|
261
|
+
const after = lines
|
|
262
|
+
.slice(idx + 1, idx + 1 + contextLines)
|
|
263
|
+
.map((l) => l.trimEnd());
|
|
264
|
+
results.push({
|
|
265
|
+
file: relPath,
|
|
266
|
+
line: idx + 1, // convert to 1-based
|
|
267
|
+
content: lines[idx].trimEnd(),
|
|
268
|
+
context_before: before,
|
|
269
|
+
context_after: after,
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
return results;
|
|
273
|
+
}
|
|
274
|
+
// ---------------------------------------------------------------------------
|
|
275
|
+
// Helpers
|
|
276
|
+
// ---------------------------------------------------------------------------
|
|
277
|
+
/** Read a file as UTF-8 text; returns null for binary or unreadable files. */
|
|
278
|
+
async function readTextFile(absPath) {
|
|
279
|
+
if (await isBinaryFile(absPath))
|
|
280
|
+
return null;
|
|
281
|
+
try {
|
|
282
|
+
const { readFile } = await import("node:fs/promises");
|
|
283
|
+
return await readFile(absPath, "utf8");
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Detect binary files by reading the first 8 KB and looking for a null byte.
|
|
291
|
+
* This is the same heuristic used by git and ripgrep.
|
|
292
|
+
*/
|
|
293
|
+
async function isBinaryFile(absPath) {
|
|
294
|
+
try {
|
|
295
|
+
const fileStat = await stat(absPath);
|
|
296
|
+
if (fileStat.size === 0)
|
|
297
|
+
return false;
|
|
298
|
+
const { open } = await import("node:fs/promises");
|
|
299
|
+
const fh = await open(absPath, "r");
|
|
300
|
+
try {
|
|
301
|
+
const buf = Buffer.alloc(Math.min(8192, fileStat.size));
|
|
302
|
+
const { bytesRead } = await fh.read(buf, 0, buf.length, 0);
|
|
303
|
+
for (let i = 0; i < bytesRead; i++) {
|
|
304
|
+
if (buf[i] === 0)
|
|
305
|
+
return true;
|
|
306
|
+
}
|
|
307
|
+
return false;
|
|
308
|
+
}
|
|
309
|
+
finally {
|
|
310
|
+
await fh.close();
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
catch {
|
|
314
|
+
return true; // treat unreadable as binary → skip
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
/** Escape all regex metacharacters in a literal string. */
|
|
318
|
+
function escapeRegex(s) {
|
|
319
|
+
return s.replace(/[.+*?^${}()|[\]\\]/g, "\\$&");
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Very lightweight glob matching supporting `*`, `**`, and `?`.
|
|
323
|
+
* Only used for the `include` file-filter option; not a full glob engine.
|
|
324
|
+
*/
|
|
325
|
+
function matchGlob(filePath, pattern) {
|
|
326
|
+
// Convert simple glob to regex.
|
|
327
|
+
// Use a rare Unicode placeholder (U+FFFE) to temporarily represent **
|
|
328
|
+
// so that the single-* replacement doesn't clobber it.
|
|
329
|
+
const DOUBLE_STAR = "\uFFFE";
|
|
330
|
+
const regexStr = pattern
|
|
331
|
+
.replace(/[.+^${}()|[\]\\]/g, "\\$&") // escape regex chars (not * and ?)
|
|
332
|
+
.replace(/\*\*/g, DOUBLE_STAR) // placeholder for **
|
|
333
|
+
.replace(/\*/g, "[^/]*") // * → any chars except /
|
|
334
|
+
.replace(/\?/g, "[^/]") // ? → single char except /
|
|
335
|
+
.replace(new RegExp(DOUBLE_STAR, "g"), ".*"); // ** → any chars including /
|
|
336
|
+
return new RegExp(regexStr + "$", "i").test(filePath);
|
|
337
|
+
}
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
// Formatting helpers (used by the MCP server layer)
|
|
340
|
+
// ---------------------------------------------------------------------------
|
|
341
|
+
/**
|
|
342
|
+
* Format a {@link SearchDocsResult} as a human-readable, grep-style text block
|
|
343
|
+
* suitable for returning as MCP tool content.
|
|
344
|
+
*/
|
|
345
|
+
export function formatSearchResult(result) {
|
|
346
|
+
if (result.matches.length === 0) {
|
|
347
|
+
return `No matches found for pattern: ${result.used_pattern}\n(searched ${result.searched_files} file${result.searched_files === 1 ? "" : "s"})`;
|
|
348
|
+
}
|
|
349
|
+
const lines = [];
|
|
350
|
+
let currentFile = "";
|
|
351
|
+
for (const match of result.matches) {
|
|
352
|
+
if (match.file !== currentFile) {
|
|
353
|
+
if (currentFile !== "")
|
|
354
|
+
lines.push(""); // blank separator between files
|
|
355
|
+
lines.push(`==> ${match.file} <==`);
|
|
356
|
+
currentFile = match.file;
|
|
357
|
+
}
|
|
358
|
+
for (const ctx of match.context_before) {
|
|
359
|
+
lines.push(` ${ctx}`);
|
|
360
|
+
}
|
|
361
|
+
lines.push(`${match.line}: ${match.content}`);
|
|
362
|
+
for (const ctx of match.context_after) {
|
|
363
|
+
lines.push(` ${ctx}`);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
const summary = [
|
|
367
|
+
``,
|
|
368
|
+
`--- ${result.total_matches} match${result.total_matches === 1 ? "" : "es"} in ${result.searched_files} file${result.searched_files === 1 ? "" : "s"} (pattern: "${result.used_pattern}")${result.truncated ? ` [truncated at ${DEFAULT_MAX_MATCHES}]` : ""}`,
|
|
369
|
+
];
|
|
370
|
+
return [...lines, ...summary].join("\n");
|
|
371
|
+
}
|
|
@@ -85,6 +85,7 @@ export interface SearchDocsParams {
|
|
|
85
85
|
}
|
|
86
86
|
/**
|
|
87
87
|
* Response from the search_docs tool
|
|
88
|
+
* @deprecated Use SearchDocsResult for actual search results
|
|
88
89
|
*/
|
|
89
90
|
export interface SearchDocsResponse {
|
|
90
91
|
/** Instructions for the agent on how to search */
|
|
@@ -96,6 +97,55 @@ export interface SearchDocsResponse {
|
|
|
96
97
|
/** The calculated local path for searching */
|
|
97
98
|
path: string;
|
|
98
99
|
}
|
|
100
|
+
/**
|
|
101
|
+
* A single line match from a file search
|
|
102
|
+
*/
|
|
103
|
+
export interface SearchMatch {
|
|
104
|
+
/** Path to the file, relative to the docset root */
|
|
105
|
+
file: string;
|
|
106
|
+
/** 1-based line number of the match */
|
|
107
|
+
line: number;
|
|
108
|
+
/** The full content of the matched line (trimmed) */
|
|
109
|
+
content: string;
|
|
110
|
+
/** Lines immediately before the match (up to contextLines lines) */
|
|
111
|
+
context_before: string[];
|
|
112
|
+
/** Lines immediately after the match (up to contextLines lines) */
|
|
113
|
+
context_after: string[];
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Result returned by the search_docs tool when performing an actual search
|
|
117
|
+
*/
|
|
118
|
+
export interface SearchDocsResult {
|
|
119
|
+
/** All matched lines across all searched files */
|
|
120
|
+
matches: SearchMatch[];
|
|
121
|
+
/** Total number of matches found (may be higher than matches.length if truncated) */
|
|
122
|
+
total_matches: number;
|
|
123
|
+
/** Number of files inspected during the search */
|
|
124
|
+
searched_files: number;
|
|
125
|
+
/** The pattern that was actually used (may differ from input if fallback was triggered) */
|
|
126
|
+
used_pattern: string;
|
|
127
|
+
/** True when results were capped at the maximum match limit */
|
|
128
|
+
truncated: boolean;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Options controlling search behaviour
|
|
132
|
+
*/
|
|
133
|
+
export interface SearchOptions {
|
|
134
|
+
/**
|
|
135
|
+
* Fallback pattern used when the primary pattern yields no results.
|
|
136
|
+
* Typically the value of the `generalized_keywords` tool parameter.
|
|
137
|
+
*/
|
|
138
|
+
fallbackPattern?: string;
|
|
139
|
+
/** Number of context lines to include before and after each match (default: 2) */
|
|
140
|
+
contextLines?: number;
|
|
141
|
+
/** Maximum number of matches to return before truncating (default: 50) */
|
|
142
|
+
maxMatches?: number;
|
|
143
|
+
/**
|
|
144
|
+
* Glob-style pattern to restrict which files are searched (e.g. "*.md", "*.{ts,js}").
|
|
145
|
+
* When omitted all non-binary files are searched.
|
|
146
|
+
*/
|
|
147
|
+
include?: string;
|
|
148
|
+
}
|
|
99
149
|
/**
|
|
100
150
|
* Response from the list_docsets tool
|
|
101
151
|
*/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codemcp/knowledge-core",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.1",
|
|
4
4
|
"description": "Core functionality for agentic knowledge guidance system",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -29,7 +29,8 @@
|
|
|
29
29
|
"typecheck": "tsc --noEmit"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"js-yaml": "^4.1.0"
|
|
32
|
+
"js-yaml": "^4.1.0",
|
|
33
|
+
"minisearch": "^7.1.2"
|
|
33
34
|
},
|
|
34
35
|
"devDependencies": {
|
|
35
36
|
"@eslint/js": "^9.34.0",
|
|
@@ -4,10 +4,18 @@
|
|
|
4
4
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
5
5
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6
6
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
|
|
7
|
-
import { loadConfig, findConfigPath, calculateLocalPath,
|
|
7
|
+
import { loadConfig, findConfigPath, calculateLocalPath, ConfigManager, ensureKnowledgeGitignoreSync, buildFileIndex, searchDocset, formatSearchResult, } from "@codemcp/knowledge-core";
|
|
8
8
|
import { initDocset } from "@codemcp/knowledge-content-loader";
|
|
9
9
|
import { existsSync } from "node:fs";
|
|
10
10
|
import { resolve, dirname } from "node:path";
|
|
11
|
+
/** Shared keywords parameter description advertised to agents */
|
|
12
|
+
const KEYWORDS_DESCRIPTION = "Primary search terms or concepts you're looking for. " +
|
|
13
|
+
'Supports full regex syntax (e.g. "log.*Error", "function\\s+\\w+", "auth|login"). ' +
|
|
14
|
+
"Returns file path, line number, matched line, and surrounding context lines. " +
|
|
15
|
+
'Be specific: "authentication middleware", "useData hook", "sidebar.items".';
|
|
16
|
+
const GENERALIZED_KEYWORDS_DESCRIPTION = "Broader synonyms or related terms used as a fallback when the primary keywords " +
|
|
17
|
+
'return no results (e.g. for "authentication" you might include "login|signin|oauth"). ' +
|
|
18
|
+
"Also supports regex syntax.";
|
|
11
19
|
/**
|
|
12
20
|
* Create an agentic knowledge MCP server
|
|
13
21
|
* @returns MCP server instance
|
|
@@ -25,6 +33,8 @@ export function createAgenticKnowledgeServer() {
|
|
|
25
33
|
let configCache = null;
|
|
26
34
|
let configLoadTime = 0;
|
|
27
35
|
const CONFIG_CACHE_TTL = 60000; // 1 minute cache
|
|
36
|
+
// Per-docset search index cache (keyed by docset id)
|
|
37
|
+
const indexCache = new Map();
|
|
28
38
|
/**
|
|
29
39
|
* Load configuration with caching (returns null if no config found)
|
|
30
40
|
*/
|
|
@@ -55,6 +65,35 @@ export function createAgenticKnowledgeServer() {
|
|
|
55
65
|
return null;
|
|
56
66
|
}
|
|
57
67
|
}
|
|
68
|
+
/**
|
|
69
|
+
* Resolve the absolute local path for an initialized docset.
|
|
70
|
+
* Throws if the docset has not been initialized yet.
|
|
71
|
+
*/
|
|
72
|
+
function resolveDocsetPath(docset, configPath) {
|
|
73
|
+
const primarySource = docset.sources?.[0];
|
|
74
|
+
const configDir = dirname(configPath);
|
|
75
|
+
if (primarySource?.type === "local_folder") {
|
|
76
|
+
const symlinkDir = resolve(configDir, "docsets", docset.id);
|
|
77
|
+
const metadataPath = resolve(symlinkDir, ".agentic-metadata.json");
|
|
78
|
+
if (!existsSync(metadataPath)) {
|
|
79
|
+
throw new Error(`Docset '${docset.id}' hasn't been initialized yet.`);
|
|
80
|
+
}
|
|
81
|
+
return symlinkDir;
|
|
82
|
+
}
|
|
83
|
+
if (primarySource?.type === "git_repo" ||
|
|
84
|
+
primarySource?.type === "archive") {
|
|
85
|
+
const localRelPath = calculateLocalPath(docset, configPath);
|
|
86
|
+
const projectRoot = dirname(configDir);
|
|
87
|
+
const absolutePath = resolve(projectRoot, localRelPath);
|
|
88
|
+
const metadataPath = resolve(absolutePath, ".agentic-metadata.json");
|
|
89
|
+
if (!existsSync(metadataPath)) {
|
|
90
|
+
throw new Error(`Docset '${docset.id}' hasn't been initialized yet.`);
|
|
91
|
+
}
|
|
92
|
+
return absolutePath;
|
|
93
|
+
}
|
|
94
|
+
// Fallback — unknown source type, no initialization check
|
|
95
|
+
return resolve(dirname(configDir), calculateLocalPath(docset, configPath));
|
|
96
|
+
}
|
|
58
97
|
// Register tool handlers
|
|
59
98
|
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
60
99
|
// Load configuration to get available docsets
|
|
@@ -65,7 +104,7 @@ export function createAgenticKnowledgeServer() {
|
|
|
65
104
|
tools: [
|
|
66
105
|
{
|
|
67
106
|
name: "search_docs",
|
|
68
|
-
description: `Search for documentation in configured docsets. Returns
|
|
107
|
+
description: `Search for documentation in configured docsets. Returns file path, line number, matched content, and surrounding context.
|
|
69
108
|
|
|
70
109
|
⚠️ **NO DOCSETS CONFIGURED**
|
|
71
110
|
|
|
@@ -109,11 +148,11 @@ After configuring, the tool will show available docsets here.`,
|
|
|
109
148
|
},
|
|
110
149
|
keywords: {
|
|
111
150
|
type: "string",
|
|
112
|
-
description:
|
|
151
|
+
description: KEYWORDS_DESCRIPTION,
|
|
113
152
|
},
|
|
114
153
|
generalized_keywords: {
|
|
115
154
|
type: "string",
|
|
116
|
-
description:
|
|
155
|
+
description: GENERALIZED_KEYWORDS_DESCRIPTION,
|
|
117
156
|
},
|
|
118
157
|
},
|
|
119
158
|
required: ["docset_id", "keywords"],
|
|
@@ -161,11 +200,8 @@ After configuring, the tool will show available docsets here.`,
|
|
|
161
200
|
return `• **${docset.id}** (${docset.name})${description}`;
|
|
162
201
|
})
|
|
163
202
|
.join("\n");
|
|
164
|
-
const searchDocsDescription = `Search for documentation in available docsets. Returns
|
|
165
|
-
|
|
166
|
-
📚 **AVAILABLE DOCSETS:**
|
|
167
|
-
${docsetInfo}
|
|
168
|
-
`;
|
|
203
|
+
const searchDocsDescription = `Search for documentation in available docsets. Returns file path, line number, matched content, and surrounding context lines.\n\n` +
|
|
204
|
+
`📚 **AVAILABLE DOCSETS:**\n${docsetInfo}`;
|
|
169
205
|
return {
|
|
170
206
|
tools: [
|
|
171
207
|
{
|
|
@@ -181,11 +217,16 @@ ${docsetInfo}
|
|
|
181
217
|
},
|
|
182
218
|
keywords: {
|
|
183
219
|
type: "string",
|
|
184
|
-
description:
|
|
220
|
+
description: KEYWORDS_DESCRIPTION,
|
|
185
221
|
},
|
|
186
222
|
generalized_keywords: {
|
|
187
223
|
type: "string",
|
|
188
|
-
description:
|
|
224
|
+
description: GENERALIZED_KEYWORDS_DESCRIPTION,
|
|
225
|
+
},
|
|
226
|
+
context_lines: {
|
|
227
|
+
type: "number",
|
|
228
|
+
description: "Number of lines to show before and after each matching line (default: 0). " +
|
|
229
|
+
"Increase to 1–3 when you need surrounding context to understand a match.",
|
|
189
230
|
},
|
|
190
231
|
},
|
|
191
232
|
required: ["docset_id", "keywords"],
|
|
@@ -232,7 +273,7 @@ ${config.docsets.map((d) => `• **${d.id}** (${d.name})`).join("\n")}`,
|
|
|
232
273
|
try {
|
|
233
274
|
switch (name) {
|
|
234
275
|
case "search_docs": {
|
|
235
|
-
const { docset_id, keywords, generalized_keywords } = args;
|
|
276
|
+
const { docset_id, keywords, generalized_keywords, context_lines } = args;
|
|
236
277
|
// Validate required parameters
|
|
237
278
|
if (!docset_id || typeof docset_id !== "string") {
|
|
238
279
|
throw new Error("docset_id is required and must be a string");
|
|
@@ -257,52 +298,27 @@ ${config.docsets.map((d) => `• **${d.id}** (${d.name})`).join("\n")}`,
|
|
|
257
298
|
const docset = config.docsets.find((d) => d.id === docset_id);
|
|
258
299
|
if (!docset) {
|
|
259
300
|
const availableIds = config.docsets.map((d) => d.id).join(", ");
|
|
260
|
-
throw new Error(`Docset '${docset_id}' not found.\n\
|
|
261
|
-
`Available docsets: ${availableIds}\n\n`);
|
|
262
|
-
}
|
|
263
|
-
// Determine path calculation method and validate initialization
|
|
264
|
-
const primarySource = docset.sources?.[0];
|
|
265
|
-
let localPath;
|
|
266
|
-
if (primarySource?.type === "local_folder") {
|
|
267
|
-
// For local folders, use symlinked path
|
|
268
|
-
localPath = calculateLocalPath(docset, configPath);
|
|
269
|
-
// Check if initialized by verifying .agentic-metadata.json exists
|
|
270
|
-
const configDir = dirname(configPath);
|
|
271
|
-
const symlinkDir = resolve(configDir, "docsets", docset.id);
|
|
272
|
-
const metadataPath = resolve(symlinkDir, ".agentic-metadata.json");
|
|
273
|
-
if (!existsSync(metadataPath)) {
|
|
274
|
-
throw new Error(`Docset '${docset_id}' hasn't been initialized yet.`);
|
|
275
|
-
}
|
|
276
|
-
// Return the symlinked path for consistency
|
|
277
|
-
localPath = resolve(configDir, "docsets", docset.id);
|
|
278
|
-
const projectRoot2 = dirname(configDir);
|
|
279
|
-
localPath = resolve(projectRoot2, localPath).replace(projectRoot2 + "/", "");
|
|
280
|
-
}
|
|
281
|
-
else if (primarySource?.type === "git_repo") {
|
|
282
|
-
// For git repos, use standard path calculation
|
|
283
|
-
localPath = calculateLocalPath(docset, configPath);
|
|
284
|
-
// Check if .agentic-metadata.json exists
|
|
285
|
-
const configDir = dirname(configPath);
|
|
286
|
-
const projectRoot = dirname(configDir);
|
|
287
|
-
const absolutePath = resolve(projectRoot, localPath);
|
|
288
|
-
const metadataPath = resolve(absolutePath, ".agentic-metadata.json");
|
|
289
|
-
if (!existsSync(metadataPath)) {
|
|
290
|
-
throw new Error(`Docset '${docset_id}' hasn't been initialized yet.\n\n`);
|
|
291
|
-
}
|
|
301
|
+
throw new Error(`Docset '${docset_id}' not found.\n\nAvailable docsets: ${availableIds}`);
|
|
292
302
|
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
303
|
+
// Resolve the absolute local path (also validates initialization)
|
|
304
|
+
const absoluteLocalPath = resolveDocsetPath(docset, configPath);
|
|
305
|
+
// Get or build the search index for this docset
|
|
306
|
+
let index = indexCache.get(docset_id);
|
|
307
|
+
if (!index) {
|
|
308
|
+
index = await buildFileIndex(absoluteLocalPath);
|
|
309
|
+
indexCache.set(docset_id, index);
|
|
296
310
|
}
|
|
297
|
-
//
|
|
298
|
-
const
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
311
|
+
// Perform the search
|
|
312
|
+
const fallbackPattern = generalized_keywords?.trim();
|
|
313
|
+
const searchOptions = {};
|
|
314
|
+
if (fallbackPattern)
|
|
315
|
+
searchOptions.fallbackPattern = fallbackPattern;
|
|
316
|
+
if (typeof context_lines === "number")
|
|
317
|
+
searchOptions.contextLines = context_lines;
|
|
318
|
+
const result = await searchDocset(absoluteLocalPath, keywords.trim(), searchOptions, index);
|
|
319
|
+
const text = formatSearchResult(result);
|
|
304
320
|
return {
|
|
305
|
-
|
|
321
|
+
content: [{ type: "text", text }],
|
|
306
322
|
};
|
|
307
323
|
}
|
|
308
324
|
case "list_docsets": {
|
|
@@ -380,9 +396,11 @@ ${config.docsets.map((d) => `• **${d.id}** (${d.name})`).join("\n")}`,
|
|
|
380
396
|
}
|
|
381
397
|
const configManager = new ConfigManager();
|
|
382
398
|
const { config, configPath } = await configManager.loadConfig(process.cwd());
|
|
383
|
-
// Invalidate cache
|
|
399
|
+
// Invalidate config cache and search index cache so the next
|
|
400
|
+
// search_docs call sees the newly initialized content
|
|
384
401
|
configCache = null;
|
|
385
402
|
configLoadTime = 0;
|
|
403
|
+
indexCache.delete(docset_id);
|
|
386
404
|
ensureKnowledgeGitignoreSync(configPath);
|
|
387
405
|
const docset = config.docsets.find((d) => d.id === docset_id);
|
|
388
406
|
if (!docset) {
|