ralph-hero-knowledge-index 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.mcp.json +1 -1
  3. package/README.md +109 -0
  4. package/dist/config.d.ts +32 -0
  5. package/dist/config.js +75 -0
  6. package/dist/config.js.map +1 -0
  7. package/dist/db.d.ts +7 -0
  8. package/dist/db.js +17 -0
  9. package/dist/db.js.map +1 -1
  10. package/dist/file-scanner.d.ts +13 -1
  11. package/dist/file-scanner.js +30 -3
  12. package/dist/file-scanner.js.map +1 -1
  13. package/dist/hybrid-search.d.ts +12 -0
  14. package/dist/hybrid-search.js +74 -5
  15. package/dist/hybrid-search.js.map +1 -1
  16. package/dist/ignore.d.ts +29 -0
  17. package/dist/ignore.js +65 -0
  18. package/dist/ignore.js.map +1 -0
  19. package/dist/index.d.ts +9 -1
  20. package/dist/index.js +166 -6
  21. package/dist/index.js.map +1 -1
  22. package/dist/llm-client.d.ts +41 -0
  23. package/dist/llm-client.js +98 -0
  24. package/dist/llm-client.js.map +1 -0
  25. package/dist/reindex.d.ts +22 -3
  26. package/dist/reindex.js +60 -8
  27. package/dist/reindex.js.map +1 -1
  28. package/dist/search.d.ts +12 -0
  29. package/dist/search.js +15 -1
  30. package/dist/search.js.map +1 -1
  31. package/package.json +2 -1
  32. package/src/__tests__/config.test.ts +173 -0
  33. package/src/__tests__/file-scanner.test.ts +88 -0
  34. package/src/__tests__/hybrid-search.test.ts +107 -0
  35. package/src/__tests__/ignore.test.ts +86 -0
  36. package/src/__tests__/index.test.ts +450 -0
  37. package/src/__tests__/llm-client.test.ts +349 -0
  38. package/src/__tests__/memory-stats.test.ts +204 -0
  39. package/src/__tests__/reindex.test.ts +148 -2
  40. package/src/__tests__/search.test.ts +37 -0
  41. package/src/config.ts +105 -0
  42. package/src/db.ts +17 -0
  43. package/src/file-scanner.ts +28 -3
  44. package/src/hybrid-search.ts +88 -5
  45. package/src/ignore.ts +82 -0
  46. package/src/index.ts +202 -7
  47. package/src/llm-client.ts +136 -0
  48. package/src/reindex.ts +80 -9
  49. package/src/search.ts +27 -1
@@ -1,4 +1,84 @@
1
1
  import { describe, it, expect } from "vitest";
2
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import type { KnowledgeDB } from "../db.js";
4
+
5
+ /**
6
+ * Deterministic mock embedding. Avoids the 16MB ONNX model download in tests.
7
+ */
8
+ function mockEmbed(seed: number): Float32Array {
9
+ const v = new Float32Array(384);
10
+ for (let i = 0; i < 384; i++) {
11
+ v[i] = Math.sin(seed * (i + 1) * 0.1);
12
+ }
13
+ let norm = 0;
14
+ for (let i = 0; i < v.length; i++) norm += v[i] * v[i];
15
+ norm = Math.sqrt(norm);
16
+ if (norm > 0) for (let i = 0; i < v.length; i++) v[i] /= norm;
17
+ return v;
18
+ }
19
+
20
+ function hashSeed(s: string): number {
21
+ let h = 0;
22
+ for (let i = 0; i < s.length; i++) h = (h * 31 + s.charCodeAt(i)) | 0;
23
+ return Math.abs(h) % 1000;
24
+ }
25
+
26
+ const mockEmbedFn = async (text: string) => mockEmbed(hashSeed(text));
27
+
28
+ /**
29
+ * Helper to call a registered MCP tool by name (mirrors the pattern in
30
+ * graph-tools.test.ts). McpServer stores handlers at `_registeredTools`.
31
+ */
32
+ async function callTool(
33
+ toolServer: McpServer,
34
+ name: string,
35
+ args: Record<string, unknown> = {},
36
+ ): Promise<{ content: Array<{ type: string; text: string }>; isError?: boolean }> {
37
+ const registeredTools = (toolServer as unknown as Record<string, unknown>)
38
+ ._registeredTools as Record<
39
+ string,
40
+ { handler: (args: Record<string, unknown>, extra: unknown) => Promise<unknown> }
41
+ >;
42
+ const tool = registeredTools?.[name];
43
+ if (!tool) {
44
+ throw new Error(`Tool "${name}" not registered`);
45
+ }
46
+ return tool.handler(args, {}) as Promise<{
47
+ content: Array<{ type: string; text: string }>;
48
+ isError?: boolean;
49
+ }>;
50
+ }
51
+
52
+ /**
53
+ * Ensure the v3 schema extensions (memory_tier column on documents, chunks
54
+ * table) exist on the test DB. Phase 1 (GH-762) owns the production schema
55
+ * migration; here we add them in test fixtures so Phase 8's features can be
56
+ * exercised independently of Phase 1's merge order.
57
+ */
58
+ function ensureV3Schema(db: KnowledgeDB): void {
59
+ const rows = db.db.prepare("PRAGMA table_info(documents)").all() as Array<{ name: string }>;
60
+ const hasTier = rows.some((r) => r.name === "memory_tier");
61
+ if (!hasTier) {
62
+ db.db.exec(
63
+ "ALTER TABLE documents ADD COLUMN memory_tier TEXT NOT NULL DEFAULT 'doc' CHECK(memory_tier IN ('doc','raw','reflection'))",
64
+ );
65
+ }
66
+ db.db.exec(
67
+ `CREATE TABLE IF NOT EXISTS chunks (
68
+ id TEXT PRIMARY KEY,
69
+ document_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
70
+ chunk_index INTEGER NOT NULL,
71
+ content TEXT NOT NULL,
72
+ char_start INTEGER NOT NULL,
73
+ char_end INTEGER NOT NULL,
74
+ context_prefix TEXT NOT NULL DEFAULT '',
75
+ UNIQUE(document_id, chunk_index)
76
+ )`,
77
+ );
78
+ db.db.exec(
79
+ "CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)",
80
+ );
81
+ }
2
82
 
3
83
  describe("knowledge-index server", () => {
4
84
  it("exports createServer function", async () => {
@@ -11,4 +91,374 @@ describe("knowledge-index server", () => {
11
91
  const { server } = mod.createServer(":memory:");
12
92
  expect(server).toBeTruthy();
13
93
  });
94
+
95
+ it("registers knowledge_memory_stats tool alongside search/traverse", async () => {
96
+ const mod = await import("../index.js");
97
+ const { server } = mod.createServer(":memory:");
98
+ const registered = (server as unknown as Record<string, unknown>)
99
+ ._registeredTools as Record<string, unknown>;
100
+ expect(registered).toHaveProperty("knowledge_memory_stats");
101
+ expect(registered).toHaveProperty("knowledge_search");
102
+ expect(registered).toHaveProperty("knowledge_traverse");
103
+ });
104
+
105
+ it("knowledge_search tool schema accepts memory_tier + return_chunk_meta", async () => {
106
+ const mod = await import("../index.js");
107
+ const { server } = mod.createServer(":memory:");
108
+ const registered = (server as unknown as Record<string, unknown>)
109
+ ._registeredTools as Record<string, { inputSchema?: { parse: (v: unknown) => unknown } }>;
110
+ const schema = registered.knowledge_search?.inputSchema;
111
+ expect(schema).toBeDefined();
112
+ // Valid inputs should pass zod validation without throwing
113
+ expect(() =>
114
+ schema!.parse({ query: "hello", memory_tier: "reflection", return_chunk_meta: true }),
115
+ ).not.toThrow();
116
+ expect(() => schema!.parse({ query: "hello", memory_tier: "any" })).not.toThrow();
117
+ // Invalid tier value must be rejected
118
+ expect(() => schema!.parse({ query: "hello", memory_tier: "garbage" })).toThrow();
119
+ });
120
+
121
+ it("knowledge_traverse schema accepts memory_tier", async () => {
122
+ const mod = await import("../index.js");
123
+ const { server } = mod.createServer(":memory:");
124
+ const registered = (server as unknown as Record<string, unknown>)
125
+ ._registeredTools as Record<string, { inputSchema?: { parse: (v: unknown) => unknown } }>;
126
+ const schema = registered.knowledge_traverse?.inputSchema;
127
+ expect(schema).toBeDefined();
128
+ expect(() =>
129
+ schema!.parse({ from: "doc-1", memory_tier: "reflection" }),
130
+ ).not.toThrow();
131
+ expect(() => schema!.parse({ from: "doc-1", memory_tier: "bad" })).toThrow();
132
+ });
133
+ });
134
+
135
+ describe("knowledge_search memory_tier + chunk_meta", () => {
136
+ it("filters to reflection when memory_tier=reflection", async () => {
137
+ const mod = await import("../index.js");
138
+ const { server, db, fts, vec } = mod.createServer(":memory:", { embedFn: mockEmbedFn });
139
+
140
+ ensureV3Schema(db);
141
+
142
+ // Seed 3 docs in doc/raw/reflection tiers with distinct content
143
+ db.upsertDocument({
144
+ id: "s-doc",
145
+ path: "s-doc.md",
146
+ title: "Curated Research",
147
+ date: "2026-03-01",
148
+ type: "research",
149
+ status: "draft",
150
+ githubIssue: null,
151
+ content: "Curated research about chunking retrieval strategies.",
152
+ });
153
+ db.upsertDocument({
154
+ id: "s-raw",
155
+ path: "s-raw.md",
156
+ title: "Raw Memory",
157
+ date: "2026-03-02",
158
+ type: null,
159
+ status: null,
160
+ githubIssue: null,
161
+ content: "Raw ingested note about chunking retrieval.",
162
+ });
163
+ db.upsertDocument({
164
+ id: "s-reflection",
165
+ path: "s-reflection.md",
166
+ title: "Reflection Synthesis",
167
+ date: "2026-03-03",
168
+ type: null,
169
+ status: null,
170
+ githubIssue: null,
171
+ content: "Reflection about chunking retrieval patterns.",
172
+ });
173
+ db.db.prepare("UPDATE documents SET memory_tier = ? WHERE id = ?").run("doc", "s-doc");
174
+ db.db.prepare("UPDATE documents SET memory_tier = ? WHERE id = ?").run("raw", "s-raw");
175
+ db.db
176
+ .prepare("UPDATE documents SET memory_tier = ? WHERE id = ?")
177
+ .run("reflection", "s-reflection");
178
+ fts.rebuildIndex();
179
+
180
+ // Ensure vec index exists so the query path runs — empty is fine.
181
+ vec.createIndex();
182
+
183
+ const result = await callTool(server, "knowledge_search", {
184
+ query: "chunking retrieval",
185
+ memory_tier: "reflection",
186
+ limit: 10,
187
+ });
188
+ expect(result.isError).not.toBe(true);
189
+ const payload = JSON.parse(result.content[0].text) as Array<{ id: string }>;
190
+ const ids = payload.map((r) => r.id);
191
+ expect(ids).toContain("s-reflection");
192
+ expect(ids).not.toContain("s-doc");
193
+ expect(ids).not.toContain("s-raw");
194
+ });
195
+
196
+ it("returns all tiers when memory_tier='any' (default)", async () => {
197
+ const mod = await import("../index.js");
198
+ const { server, db, fts, vec } = mod.createServer(":memory:", { embedFn: mockEmbedFn });
199
+
200
+ ensureV3Schema(db);
201
+
202
+ db.upsertDocument({
203
+ id: "a-doc",
204
+ path: "a-doc.md",
205
+ title: "Doc",
206
+ date: "2026-03-04",
207
+ type: null,
208
+ status: null,
209
+ githubIssue: null,
210
+ content: "Content about retrieval pipelines.",
211
+ });
212
+ db.upsertDocument({
213
+ id: "a-raw",
214
+ path: "a-raw.md",
215
+ title: "Raw",
216
+ date: "2026-03-04",
217
+ type: null,
218
+ status: null,
219
+ githubIssue: null,
220
+ content: "Raw content about retrieval pipelines.",
221
+ });
222
+ db.upsertDocument({
223
+ id: "a-reflect",
224
+ path: "a-reflect.md",
225
+ title: "Reflect",
226
+ date: "2026-03-04",
227
+ type: null,
228
+ status: null,
229
+ githubIssue: null,
230
+ content: "Reflection content about retrieval pipelines.",
231
+ });
232
+ db.db.prepare("UPDATE documents SET memory_tier = ? WHERE id = ?").run("doc", "a-doc");
233
+ db.db.prepare("UPDATE documents SET memory_tier = ? WHERE id = ?").run("raw", "a-raw");
234
+ db.db
235
+ .prepare("UPDATE documents SET memory_tier = ? WHERE id = ?")
236
+ .run("reflection", "a-reflect");
237
+ fts.rebuildIndex();
238
+ vec.createIndex();
239
+
240
+ const result = await callTool(server, "knowledge_search", {
241
+ query: "retrieval pipelines",
242
+ limit: 10,
243
+ });
244
+ expect(result.isError).not.toBe(true);
245
+ const payload = JSON.parse(result.content[0].text) as Array<{ id: string }>;
246
+ const ids = payload.map((r) => r.id);
247
+ expect(ids).toContain("a-doc");
248
+ expect(ids).toContain("a-raw");
249
+ expect(ids).toContain("a-reflect");
250
+ });
251
+
252
+ it("populates chunk_index when return_chunk_meta=true", async () => {
253
+ const mod = await import("../index.js");
254
+ const { server, db, fts, vec } = mod.createServer(":memory:", { embedFn: mockEmbedFn });
255
+ ensureV3Schema(db);
256
+
257
+ db.upsertDocument({
258
+ id: "c-doc",
259
+ path: "c-doc.md",
260
+ title: "Chunked Doc",
261
+ date: "2026-03-05",
262
+ type: null,
263
+ status: null,
264
+ githubIssue: null,
265
+ content: "The first portion of a long research document discussing retrieval.",
266
+ });
267
+ fts.rebuildIndex();
268
+
269
+ // Seed a chunk + a chunk-level vec row
270
+ db.db
271
+ .prepare(
272
+ `INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end, context_prefix)
273
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,
274
+ )
275
+ .run(
276
+ "c-doc#c0",
277
+ "c-doc",
278
+ 0,
279
+ "The first portion of a long research document discussing retrieval.",
280
+ 0,
281
+ 68,
282
+ "Research context.",
283
+ );
284
+
285
+ vec.createIndex();
286
+ const queryEmbedding = await mockEmbedFn("retrieval research chunking");
287
+ vec.upsertEmbedding("c-doc#c0", queryEmbedding);
288
+
289
+ const result = await callTool(server, "knowledge_search", {
290
+ query: "retrieval research chunking",
291
+ limit: 5,
292
+ return_chunk_meta: true,
293
+ });
294
+ expect(result.isError).not.toBe(true);
295
+ const payload = JSON.parse(result.content[0].text) as Array<Record<string, unknown>>;
296
+ const hit = payload.find((r) => r.id === "c-doc");
297
+ expect(hit).toBeDefined();
298
+ expect(hit!.chunk_index).toBe(0);
299
+ expect(hit!.char_start).toBe(0);
300
+ expect(hit!.char_end).toBe(68);
301
+ expect(hit!.context_prefix).toBe("Research context.");
302
+ expect(hit!.best_chunk_id).toBe("c-doc#c0");
303
+ });
304
+
305
+ it("omits chunk_index when return_chunk_meta is false (default)", async () => {
306
+ const mod = await import("../index.js");
307
+ const { server, db, fts, vec } = mod.createServer(":memory:", { embedFn: mockEmbedFn });
308
+ ensureV3Schema(db);
309
+
310
+ db.upsertDocument({
311
+ id: "d-doc",
312
+ path: "d-doc.md",
313
+ title: "Chunked Doc",
314
+ date: "2026-03-05",
315
+ type: null,
316
+ status: null,
317
+ githubIssue: null,
318
+ content: "Chunked document body about retrieval.",
319
+ });
320
+ fts.rebuildIndex();
321
+ db.db
322
+ .prepare(
323
+ `INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end, context_prefix)
324
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,
325
+ )
326
+ .run("d-doc#c0", "d-doc", 0, "Chunked document body about retrieval.", 0, 40, "");
327
+ vec.createIndex();
328
+ vec.upsertEmbedding("d-doc#c0", await mockEmbedFn("retrieval document"));
329
+
330
+ const result = await callTool(server, "knowledge_search", {
331
+ query: "retrieval document",
332
+ limit: 5,
333
+ // return_chunk_meta omitted (defaults to false)
334
+ });
335
+ expect(result.isError).not.toBe(true);
336
+ const payload = JSON.parse(result.content[0].text) as Array<Record<string, unknown>>;
337
+ const hit = payload.find((r) => r.id === "d-doc");
338
+ expect(hit).toBeDefined();
339
+ expect(hit!.chunk_index).toBeUndefined();
340
+ expect(hit!.best_chunk_id).toBeUndefined();
341
+ });
342
+ });
343
+
344
+ describe("knowledge_traverse memory_tier filter", () => {
345
+ it("drops non-reflection nodes when memory_tier=reflection", async () => {
346
+ const mod = await import("../index.js");
347
+ const { server, db } = mod.createServer(":memory:");
348
+
349
+ ensureV3Schema(db);
350
+
351
+ db.upsertDocument({
352
+ id: "t-root",
353
+ path: "t-root.md",
354
+ title: "Root",
355
+ date: null,
356
+ type: null,
357
+ status: null,
358
+ githubIssue: null,
359
+ content: "",
360
+ });
361
+ db.upsertDocument({
362
+ id: "t-doc-child",
363
+ path: "c1.md",
364
+ title: "Doc Child",
365
+ date: null,
366
+ type: null,
367
+ status: null,
368
+ githubIssue: null,
369
+ content: "",
370
+ });
371
+ db.upsertDocument({
372
+ id: "t-reflect-child",
373
+ path: "c2.md",
374
+ title: "Reflect Child",
375
+ date: null,
376
+ type: null,
377
+ status: null,
378
+ githubIssue: null,
379
+ content: "",
380
+ });
381
+ db.db.prepare("UPDATE documents SET memory_tier = ? WHERE id = ?").run("doc", "t-doc-child");
382
+ db.db
383
+ .prepare("UPDATE documents SET memory_tier = ? WHERE id = ?")
384
+ .run("reflection", "t-reflect-child");
385
+ db.addRelationship("t-root", "t-doc-child", "builds_on");
386
+ db.addRelationship("t-root", "t-reflect-child", "builds_on");
387
+
388
+ const result = await callTool(server, "knowledge_traverse", {
389
+ from: "t-root",
390
+ memory_tier: "reflection",
391
+ });
392
+ expect(result.isError).not.toBe(true);
393
+ const rows = JSON.parse(result.content[0].text) as Array<{ targetId: string }>;
394
+ const targetIds = rows.map((r) => r.targetId);
395
+ expect(targetIds).toContain("t-reflect-child");
396
+ expect(targetIds).not.toContain("t-doc-child");
397
+ });
398
+
399
+ it("returns all tiers when memory_tier='any' (default)", async () => {
400
+ const mod = await import("../index.js");
401
+ const { server, db } = mod.createServer(":memory:");
402
+
403
+ ensureV3Schema(db);
404
+
405
+ db.upsertDocument({
406
+ id: "any-root",
407
+ path: "r.md",
408
+ title: "R",
409
+ date: null,
410
+ type: null,
411
+ status: null,
412
+ githubIssue: null,
413
+ content: "",
414
+ });
415
+ db.upsertDocument({
416
+ id: "any-doc",
417
+ path: "a.md",
418
+ title: "A",
419
+ date: null,
420
+ type: null,
421
+ status: null,
422
+ githubIssue: null,
423
+ content: "",
424
+ });
425
+ db.upsertDocument({
426
+ id: "any-raw",
427
+ path: "b.md",
428
+ title: "B",
429
+ date: null,
430
+ type: null,
431
+ status: null,
432
+ githubIssue: null,
433
+ content: "",
434
+ });
435
+ db.upsertDocument({
436
+ id: "any-reflect",
437
+ path: "c.md",
438
+ title: "C",
439
+ date: null,
440
+ type: null,
441
+ status: null,
442
+ githubIssue: null,
443
+ content: "",
444
+ });
445
+ db.db.prepare("UPDATE documents SET memory_tier = ? WHERE id = ?").run("doc", "any-doc");
446
+ db.db.prepare("UPDATE documents SET memory_tier = ? WHERE id = ?").run("raw", "any-raw");
447
+ db.db
448
+ .prepare("UPDATE documents SET memory_tier = ? WHERE id = ?")
449
+ .run("reflection", "any-reflect");
450
+ db.addRelationship("any-root", "any-doc", "builds_on");
451
+ db.addRelationship("any-root", "any-raw", "builds_on");
452
+ db.addRelationship("any-root", "any-reflect", "builds_on");
453
+
454
+ const result = await callTool(server, "knowledge_traverse", {
455
+ from: "any-root",
456
+ });
457
+ expect(result.isError).not.toBe(true);
458
+ const rows = JSON.parse(result.content[0].text) as Array<{ targetId: string }>;
459
+ const targetIds = rows.map((r) => r.targetId);
460
+ expect(targetIds).toContain("any-doc");
461
+ expect(targetIds).toContain("any-raw");
462
+ expect(targetIds).toContain("any-reflect");
463
+ });
14
464
  });