ralph-hero-knowledge-index 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.mcp.json +1 -1
  3. package/README.md +109 -0
  4. package/dist/config.d.ts +32 -0
  5. package/dist/config.js +75 -0
  6. package/dist/config.js.map +1 -0
  7. package/dist/db.d.ts +7 -0
  8. package/dist/db.js +17 -0
  9. package/dist/db.js.map +1 -1
  10. package/dist/file-scanner.d.ts +13 -1
  11. package/dist/file-scanner.js +30 -3
  12. package/dist/file-scanner.js.map +1 -1
  13. package/dist/hybrid-search.d.ts +12 -0
  14. package/dist/hybrid-search.js +74 -5
  15. package/dist/hybrid-search.js.map +1 -1
  16. package/dist/ignore.d.ts +29 -0
  17. package/dist/ignore.js +65 -0
  18. package/dist/ignore.js.map +1 -0
  19. package/dist/index.d.ts +9 -1
  20. package/dist/index.js +166 -6
  21. package/dist/index.js.map +1 -1
  22. package/dist/llm-client.d.ts +41 -0
  23. package/dist/llm-client.js +98 -0
  24. package/dist/llm-client.js.map +1 -0
  25. package/dist/reindex.d.ts +22 -3
  26. package/dist/reindex.js +60 -8
  27. package/dist/reindex.js.map +1 -1
  28. package/dist/search.d.ts +12 -0
  29. package/dist/search.js +15 -1
  30. package/dist/search.js.map +1 -1
  31. package/package.json +2 -1
  32. package/src/__tests__/config.test.ts +173 -0
  33. package/src/__tests__/file-scanner.test.ts +88 -0
  34. package/src/__tests__/hybrid-search.test.ts +107 -0
  35. package/src/__tests__/ignore.test.ts +86 -0
  36. package/src/__tests__/index.test.ts +450 -0
  37. package/src/__tests__/llm-client.test.ts +349 -0
  38. package/src/__tests__/memory-stats.test.ts +204 -0
  39. package/src/__tests__/reindex.test.ts +148 -2
  40. package/src/__tests__/search.test.ts +37 -0
  41. package/src/config.ts +105 -0
  42. package/src/db.ts +17 -0
  43. package/src/file-scanner.ts +28 -3
  44. package/src/hybrid-search.ts +88 -5
  45. package/src/ignore.ts +82 -0
  46. package/src/index.ts +202 -7
  47. package/src/llm-client.ts +136 -0
  48. package/src/reindex.ts +80 -9
  49. package/src/search.ts +27 -1
@@ -1,4 +1,4 @@
1
- import { describe, it, expect, vi, beforeEach } from "vitest";
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
2
2
  import { mkdtempSync, writeFileSync, mkdirSync, unlinkSync, utimesSync } from "node:fs";
3
3
  import { join, resolve } from "node:path";
4
4
  import { tmpdir } from "node:os";
@@ -15,7 +15,7 @@ vi.mock("../embedder.js", () => ({
15
15
  }));
16
16
 
17
17
  import { embed } from "../embedder.js";
18
- import { reindex } from "../reindex.js";
18
+ import { reindex, resolveDirs } from "../reindex.js";
19
19
  import { KnowledgeDB } from "../db.js";
20
20
 
21
21
  const mockedEmbed = vi.mocked(embed);
@@ -353,4 +353,150 @@ describe("incremental reindex", () => {
353
353
  expect(results.some(r => r.id === "fresh-doc")).toBe(true);
354
354
  db1.close();
355
355
  });
356
+
357
+ it("scenario 13: reindex honors .ralphignore for file discovery", async () => {
358
+ writeFileSync(join(dir, "kept.md"), makeDoc("Kept"));
359
+ writeFileSync(join(dir, "skipped.md"), makeDoc("Skipped"));
360
+ writeFileSync(join(dir, ".ralphignore"), "skipped.md\n");
361
+
362
+ await reindex([dir], dbPath);
363
+ expect(mockedEmbed).toHaveBeenCalledTimes(1);
364
+
365
+ const db = new KnowledgeDB(dbPath);
366
+ expect(db.getDocument("kept")).toBeTruthy();
367
+ expect(db.getDocument("skipped")).toBeUndefined();
368
+ db.close();
369
+ });
370
+
371
+ it("scenario 14: reindex honors caller-supplied ignorePatterns arg", async () => {
372
+ writeFileSync(join(dir, "kept.md"), makeDoc("Kept"));
373
+ mkdirSync(join(dir, "drafts"));
374
+ writeFileSync(join(dir, "drafts", "wip.md"), makeDoc("WIP"));
375
+
376
+ await reindex([dir], dbPath, false, ["drafts/**"]);
377
+ // Only kept.md should have been embedded.
378
+ expect(mockedEmbed).toHaveBeenCalledTimes(1);
379
+
380
+ const db = new KnowledgeDB(dbPath);
381
+ expect(db.getDocument("kept")).toBeTruthy();
382
+ expect(db.getDocument("wip")).toBeUndefined();
383
+ db.close();
384
+ });
385
+ });
386
+
387
+ describe("resolveDirs precedence", () => {
388
+ const ORIGINAL_ARGV = process.argv;
389
+ const ORIGINAL_ENV = {
390
+ RALPH_KNOWLEDGE_DIRS: process.env.RALPH_KNOWLEDGE_DIRS,
391
+ RALPH_KNOWLEDGE_DB: process.env.RALPH_KNOWLEDGE_DB,
392
+ RALPH_KNOWLEDGE_CONFIG: process.env.RALPH_KNOWLEDGE_CONFIG,
393
+ };
394
+ let tmpHome: string;
395
+ let configDir: string;
396
+
397
+ beforeEach(() => {
398
+ process.argv = ["node", "reindex.js"];
399
+ delete process.env.RALPH_KNOWLEDGE_DIRS;
400
+ delete process.env.RALPH_KNOWLEDGE_DB;
401
+ configDir = mkdtempSync(join(tmpdir(), "resolve-dirs-"));
402
+ tmpHome = configDir;
403
+ process.env.RALPH_KNOWLEDGE_CONFIG = join(configDir, "knowledge.config.json");
404
+ });
405
+
406
+ afterEach(() => {
407
+ process.argv = ORIGINAL_ARGV;
408
+ for (const key of Object.keys(ORIGINAL_ENV) as (keyof typeof ORIGINAL_ENV)[]) {
409
+ const orig = ORIGINAL_ENV[key];
410
+ if (orig === undefined) {
411
+ delete process.env[key];
412
+ } else {
413
+ process.env[key] = orig;
414
+ }
415
+ }
416
+ });
417
+
418
+ it("CLI positional args beat env var even when both are set", () => {
419
+ writeFileSync(
420
+ process.env.RALPH_KNOWLEDGE_CONFIG!,
421
+ JSON.stringify({ roots: ["/from/config"] }),
422
+ );
423
+ process.argv = ["node", "reindex.js", "/from/cli"];
424
+ process.env.RALPH_KNOWLEDGE_DIRS = "/from/env";
425
+ const r = resolveDirs();
426
+ expect(r.source).toBe("cli");
427
+ expect(r.dirs).toEqual(["/from/cli"]);
428
+ });
429
+
430
+ it("env var beats config file roots when CLI is empty", () => {
431
+ writeFileSync(
432
+ process.env.RALPH_KNOWLEDGE_CONFIG!,
433
+ JSON.stringify({ roots: ["/from/config"] }),
434
+ );
435
+ process.env.RALPH_KNOWLEDGE_DIRS = "/from/env-a,/from/env-b";
436
+ const r = resolveDirs();
437
+ expect(r.source).toBe("env");
438
+ expect(r.dirs).toEqual(["/from/env-a", "/from/env-b"]);
439
+ });
440
+
441
+ it("config file roots beat fallback when CLI and env are absent", () => {
442
+ writeFileSync(
443
+ process.env.RALPH_KNOWLEDGE_CONFIG!,
444
+ JSON.stringify({ roots: ["/from/config-a", "/from/config-b"] }),
445
+ );
446
+ const r = resolveDirs();
447
+ expect(r.source).toBe("config");
448
+ expect(r.dirs).toEqual(["/from/config-a", "/from/config-b"]);
449
+ });
450
+
451
+ it("falls back to ../../thoughts when no source is configured", () => {
452
+ // Point env var at a nonexistent config path so loadConfig returns {}.
453
+ process.env.RALPH_KNOWLEDGE_CONFIG = join(configDir, "missing.json");
454
+ const r = resolveDirs();
455
+ expect(r.source).toBe("fallback");
456
+ expect(r.dirs).toEqual(["../../thoughts"]);
457
+ });
458
+
459
+ it("dbPath precedence: CLI arg > env var > config > default", () => {
460
+ writeFileSync(
461
+ process.env.RALPH_KNOWLEDGE_CONFIG!,
462
+ JSON.stringify({ roots: ["/x"], dbPath: "/from/config.db" }),
463
+ );
464
+ // CLI wins
465
+ process.argv = ["node", "reindex.js", "/cli/root", "/cli/override.db"];
466
+ process.env.RALPH_KNOWLEDGE_DB = "/from/env.db";
467
+ expect(resolveDirs().dbPath).toBe("/cli/override.db");
468
+
469
+ // Env wins over config when CLI is absent
470
+ process.argv = ["node", "reindex.js"];
471
+ process.env.RALPH_KNOWLEDGE_DIRS = "/env/root";
472
+ process.env.RALPH_KNOWLEDGE_DB = "/from/env.db";
473
+ expect(resolveDirs().dbPath).toBe("/from/env.db");
474
+
475
+ // Config wins when neither CLI nor env set dbPath
476
+ delete process.env.RALPH_KNOWLEDGE_DB;
477
+ expect(resolveDirs().dbPath).toBe("/from/config.db");
478
+ });
479
+
480
+ it("forwards config.ignorePatterns on the returned config object", () => {
481
+ writeFileSync(
482
+ process.env.RALPH_KNOWLEDGE_CONFIG!,
483
+ JSON.stringify({
484
+ roots: ["/r1"],
485
+ ignorePatterns: ["draft/**", "*.bak"],
486
+ }),
487
+ );
488
+ const r = resolveDirs();
489
+ expect(r.config.ignorePatterns).toEqual(["draft/**", "*.bak"]);
490
+ });
491
+
492
+ it("treats an empty RALPH_KNOWLEDGE_DIRS as unset and falls through", () => {
493
+ writeFileSync(
494
+ process.env.RALPH_KNOWLEDGE_CONFIG!,
495
+ JSON.stringify({ roots: ["/from/config"] }),
496
+ );
497
+ process.env.RALPH_KNOWLEDGE_DIRS = " , ";
498
+ const r = resolveDirs();
499
+ expect(r.source).toBe("config");
500
+ expect(r.dirs).toEqual(["/from/config"]);
501
+ });
356
502
  });
@@ -205,6 +205,43 @@ describe("FtsSearch", () => {
205
205
  });
206
206
  });
207
207
 
208
+ describe("memory_tier filter", () => {
209
+ it("filters by memory_tier when schema has the column", () => {
210
+ db.db
211
+ .prepare("UPDATE documents SET memory_tier = ? WHERE id = ?")
212
+ .run("reflection", "auth-doc");
213
+ fts.rebuildIndex();
214
+
215
+ // auth-doc is "reflection", so search for terms in auth-doc should hit.
216
+ const reflectionHits = fts.search("authentication", { memoryTier: "reflection" });
217
+ const ids = reflectionHits.map((r) => r.id);
218
+ expect(ids).toContain("auth-doc");
219
+
220
+ // A "doc" filter should omit the reflection-tagged doc.
221
+ const docHits = fts.search("authentication", { memoryTier: "doc" });
222
+ expect(docHits.some((r) => r.id === "auth-doc")).toBe(false);
223
+ });
224
+
225
+ it("ignores memory_tier silently when column is absent (v2 schema)", () => {
226
+ // beforeEach gives us a v2 schema — column does not exist.
227
+ const results = fts.search("cache", { memoryTier: "reflection" });
228
+ // Filter is a no-op on v2; regular FTS results come through.
229
+ expect(Array.isArray(results)).toBe(true);
230
+ });
231
+
232
+ it("returns all tiers when memoryTier='any'", () => {
233
+ db.db
234
+ .prepare("UPDATE documents SET memory_tier = ? WHERE id = ?")
235
+ .run("reflection", "auth-doc");
236
+ fts.rebuildIndex();
237
+
238
+ const authHits = fts.search("authentication", { memoryTier: "any" });
239
+ expect(authHits.some((r) => r.id === "auth-doc")).toBe(true);
240
+ const cacheHits = fts.search("cache", { memoryTier: "any" });
241
+ expect(cacheHits.some((r) => r.id === "cache-doc")).toBe(true);
242
+ });
243
+ });
244
+
208
245
  describe("ensureTable", () => {
209
246
  it("creates FTS table if it does not exist", () => {
210
247
  // Create a fresh DB without FTS table
package/src/config.ts ADDED
@@ -0,0 +1,105 @@
1
+ import { readFileSync, existsSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { homedir } from "node:os";
4
+
5
+ /**
6
+ * Shape of the optional `~/.ralph/knowledge.config.json` file.
7
+ *
8
+ * All fields are optional. Unknown fields are preserved at parse time but are
9
+ * not surfaced through this interface — callers should treat the file as
10
+ * forward-compatible.
11
+ */
12
+ export interface KnowledgeConfig {
13
+ /** Absolute or `~`-prefixed directories to index. */
14
+ roots?: string[];
15
+ /** Extra gitignore-syntax patterns layered on top of per-root `.ralphignore`. */
16
+ ignorePatterns?: string[];
17
+ /** Override for the SQLite database path. */
18
+ dbPath?: string;
19
+ }
20
+
21
+ /**
22
+ * Expand a leading `~` or `~/` segment in a path to the user's home directory.
23
+ * Paths that do not begin with `~` are returned unchanged.
24
+ */
25
+ export function expandHome(p: string): string {
26
+ if (!p) return p;
27
+ if (p === "~") return homedir();
28
+ if (p.startsWith("~/") || p.startsWith("~\\")) {
29
+ return join(homedir(), p.slice(2));
30
+ }
31
+ return p;
32
+ }
33
+
34
+ /**
35
+ * Resolve the knowledge config file path. Precedence:
36
+ * 1. `process.env.RALPH_KNOWLEDGE_CONFIG`
37
+ * 2. `~/.ralph/knowledge.config.json`
38
+ */
39
+ export function resolveConfigPath(): string {
40
+ const envPath = process.env.RALPH_KNOWLEDGE_CONFIG;
41
+ if (envPath && envPath.trim().length > 0) {
42
+ return expandHome(envPath);
43
+ }
44
+ return join(homedir(), ".ralph", "knowledge.config.json");
45
+ }
46
+
47
+ /**
48
+ * Load the optional `knowledge.config.json` file. Returns an empty object when
49
+ * the file is missing or malformed. Tilde-prefixed paths inside `roots` and
50
+ * `dbPath` are expanded eagerly so callers receive absolute paths.
51
+ */
52
+ export function loadConfig(): KnowledgeConfig {
53
+ const configPath = resolveConfigPath();
54
+ if (!existsSync(configPath)) {
55
+ return {};
56
+ }
57
+
58
+ let raw: string;
59
+ try {
60
+ raw = readFileSync(configPath, "utf-8");
61
+ } catch (e) {
62
+ console.warn(
63
+ `Failed to read knowledge config at ${configPath}: ${(e as Error).message}`,
64
+ );
65
+ return {};
66
+ }
67
+
68
+ let parsed: unknown;
69
+ try {
70
+ parsed = JSON.parse(raw);
71
+ } catch (e) {
72
+ console.warn(
73
+ `Malformed JSON in knowledge config at ${configPath}: ${(e as Error).message}`,
74
+ );
75
+ return {};
76
+ }
77
+
78
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
79
+ console.warn(
80
+ `Knowledge config at ${configPath} is not a JSON object; ignoring.`,
81
+ );
82
+ return {};
83
+ }
84
+
85
+ const obj = parsed as Record<string, unknown>;
86
+ const out: KnowledgeConfig = {};
87
+
88
+ if (Array.isArray(obj.roots)) {
89
+ out.roots = obj.roots
90
+ .filter((r): r is string => typeof r === "string" && r.length > 0)
91
+ .map(expandHome);
92
+ }
93
+
94
+ if (Array.isArray(obj.ignorePatterns)) {
95
+ out.ignorePatterns = obj.ignorePatterns.filter(
96
+ (p): p is string => typeof p === "string" && p.length > 0,
97
+ );
98
+ }
99
+
100
+ if (typeof obj.dbPath === "string" && obj.dbPath.length > 0) {
101
+ out.dbPath = expandHome(obj.dbPath);
102
+ }
103
+
104
+ return out;
105
+ }
package/src/db.ts CHANGED
@@ -468,6 +468,23 @@ export class KnowledgeDB {
468
468
  return row !== undefined;
469
469
  }
470
470
 
471
+ /**
472
+ * Returns the `memory_tier` for the given document id. Returns `undefined`
473
+ * when the document does not exist OR when the `memory_tier` column is
474
+ * absent from the schema (pre-v3 databases). Used by MCP `knowledge_*`
475
+ * tools that need to post-filter result sets by tier.
476
+ */
477
+ getMemoryTier(id: string): string | undefined {
478
+ const columns = this.db
479
+ .prepare("PRAGMA table_info(documents)")
480
+ .all() as Array<{ name: string }>;
481
+ if (!columns.some((c) => c.name === "memory_tier")) return undefined;
482
+ const row = this.db
483
+ .prepare("SELECT memory_tier AS memoryTier FROM documents WHERE id = ?")
484
+ .get(id) as { memoryTier: string } | undefined;
485
+ return row?.memoryTier;
486
+ }
487
+
471
488
  deleteDocument(id: string): void {
472
489
  this.db.prepare("DELETE FROM documents WHERE id = ?").run(id);
473
490
  }
@@ -1,14 +1,39 @@
1
1
  import { readdirSync } from "node:fs";
2
- import { join } from "node:path";
2
+ import { join, relative } from "node:path";
3
+ import type { IgnoreMatcher } from "./ignore.js";
3
4
 
4
- export function findMarkdownFiles(dir: string): string[] {
5
+ /**
6
+ * Recursively find all `.md` files under `dir`.
7
+ *
8
+ * Directory names beginning with `.` or `_` and file names beginning with `_`
9
+ * are always skipped (fast-path). When an {@link IgnoreMatcher} is supplied,
10
+ * each remaining path is additionally tested against it via its root-relative
11
+ * form; matches are skipped.
12
+ *
13
+ * @param dir root directory to walk
14
+ * @param matcher optional matcher built via `loadIgnoreForRoot(dir, …)`
15
+ */
16
+ export function findMarkdownFiles(dir: string, matcher?: IgnoreMatcher): string[] {
5
17
  const results: string[] = [];
6
18
  function walk(d: string) {
7
19
  for (const entry of readdirSync(d, { withFileTypes: true })) {
8
20
  const fullPath = join(d, entry.name);
9
- if (entry.isDirectory() && !entry.name.startsWith(".") && !entry.name.startsWith("_")) {
21
+ if (entry.isDirectory()) {
22
+ // Fast-path: hidden/underscored directories are always skipped.
23
+ if (entry.name.startsWith(".") || entry.name.startsWith("_")) continue;
24
+ if (matcher) {
25
+ // Test both bare and trailing-slash forms so gitignore-style
26
+ // directory-only patterns (e.g., `dist/`) match even when the
27
+ // directory itself has not yet been descended.
28
+ const rel = relative(dir, fullPath);
29
+ if (matcher.isIgnored(rel) || matcher.isIgnored(`${rel}/`)) continue;
30
+ }
10
31
  walk(fullPath);
11
32
  } else if (entry.isFile() && entry.name.endsWith(".md") && !entry.name.startsWith("_")) {
33
+ if (matcher) {
34
+ const rel = relative(dir, fullPath);
35
+ if (matcher.isIgnored(rel)) continue;
36
+ }
12
37
  results.push(fullPath);
13
38
  }
14
39
  }
@@ -4,6 +4,16 @@ import type { VectorSearch } from "./vector-search.js";
4
4
 
5
5
  export type EmbedFn = (text: string) => Promise<Float32Array>;
6
6
 
7
+ interface ChunkRow {
8
+ id: string;
9
+ document_id: string;
10
+ chunk_index: number;
11
+ char_start: number;
12
+ char_end: number;
13
+ context_prefix: string;
14
+ content: string;
15
+ }
16
+
7
17
  export class HybridSearch {
8
18
  private static readonly RRF_K = 60;
9
19
 
@@ -14,23 +24,64 @@ export class HybridSearch {
14
24
  private readonly embedFn: EmbedFn,
15
25
  ) {}
16
26
 
27
+ /**
28
+ * Returns true when the `chunks` table exists (schema v3+). When absent we
29
+ * behave as if all vector ids are doc ids (pre-chunking behavior).
30
+ */
31
+ private chunksTableExists(): boolean {
32
+ const row = this.db.db
33
+ .prepare(
34
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'",
35
+ )
36
+ .get();
37
+ return row !== undefined;
38
+ }
39
+
40
+ /**
41
+ * Given a vector-search id, return the `document_id` portion. Chunk ids
42
+ * follow the pattern `{doc_id}#c{index}` per Shared Constraint #6 of the
43
+ * GH-0761 plan. Legacy non-chunk ids pass through unchanged.
44
+ */
45
+ private docIdFromVecId(vecId: string): string {
46
+ const marker = vecId.lastIndexOf("#c");
47
+ if (marker === -1) return vecId;
48
+ const suffix = vecId.slice(marker + 2);
49
+ if (suffix.length === 0 || !/^\d+$/.test(suffix)) return vecId;
50
+ return vecId.slice(0, marker);
51
+ }
52
+
53
+ private fetchChunk(chunkId: string): ChunkRow | undefined {
54
+ if (!this.chunksTableExists()) return undefined;
55
+ return this.db.db
56
+ .prepare(
57
+ `SELECT id, document_id, chunk_index, char_start, char_end, context_prefix, content
58
+ FROM chunks WHERE id = ?`,
59
+ )
60
+ .get(chunkId) as ChunkRow | undefined;
61
+ }
62
+
17
63
  async search(
18
64
  query: string,
19
65
  options: SearchOptions = {},
20
66
  ): Promise<SearchResult[]> {
21
- const { type, tags, includeSuperseded = false, limit = 20 } = options;
67
+ const { type, tags, includeSuperseded = false, limit = 20, memoryTier } = options;
22
68
 
23
- // Run FTS and vector search
69
+ // Run FTS and vector search (FTS already applies memoryTier filter in SQL
70
+ // when the schema supports it).
24
71
  const ftsResults = this.fts.search(query, {
25
72
  includeSuperseded: true,
26
73
  limit: limit * 2,
74
+ memoryTier,
27
75
  });
28
76
 
29
77
  const queryEmbedding = await this.embedFn(query);
30
78
  const vecResults = this.vec.search(queryEmbedding, limit * 2);
31
79
 
32
- // Build RRF score map
80
+ // Build RRF score map, keyed by document_id. When vec ids are chunk ids
81
+ // like `{doc}#c{n}`, we collapse to the parent doc for scoring but
82
+ // remember the best-scoring chunk id per doc for later meta enrichment.
33
83
  const scores = new Map<string, number>();
84
+ const bestChunkByDoc = new Map<string, { chunkId: string; rank: number }>();
34
85
 
35
86
  for (let i = 0; i < ftsResults.length; i++) {
36
87
  const id = ftsResults[i].id;
@@ -39,9 +90,16 @@ export class HybridSearch {
39
90
  }
40
91
 
41
92
  for (let i = 0; i < vecResults.length; i++) {
42
- const id = vecResults[i].id;
93
+ const vecId = vecResults[i].id;
94
+ const docId = this.docIdFromVecId(vecId);
43
95
  const rrfScore = 1 / (HybridSearch.RRF_K + i + 1);
44
- scores.set(id, (scores.get(id) ?? 0) + rrfScore);
96
+ scores.set(docId, (scores.get(docId) ?? 0) + rrfScore);
97
+ if (vecId !== docId) {
98
+ const existing = bestChunkByDoc.get(docId);
99
+ if (!existing || i < existing.rank) {
100
+ bestChunkByDoc.set(docId, { chunkId: vecId, rank: i });
101
+ }
102
+ }
45
103
  }
46
104
 
47
105
  // Build a lookup of FTS results by id for quick access
@@ -98,6 +156,31 @@ export class HybridSearch {
98
156
  });
99
157
  }
100
158
 
159
+ // Post-filter: memory_tier for vector-only hits that bypassed the FTS
160
+ // SQL filter. Also covers the case where the FTS stage returned 0 rows
161
+ // but vec returned chunks from a doc in another tier.
162
+ if (memoryTier && memoryTier !== "any") {
163
+ filtered = filtered.filter((r) => {
164
+ const tier = this.db.getMemoryTier(r.id);
165
+ // When column absent (v2 schema) treat as "doc"
166
+ return (tier ?? "doc") === memoryTier;
167
+ });
168
+ }
169
+
170
+ // Enrich with chunk meta when chunk data is available (best-scoring
171
+ // chunk per doc).
172
+ for (const r of filtered) {
173
+ const best = bestChunkByDoc.get(r.id);
174
+ if (!best) continue;
175
+ const chunk = this.fetchChunk(best.chunkId);
176
+ if (!chunk) continue;
177
+ r.bestChunkId = chunk.id;
178
+ r.chunkIndex = chunk.chunk_index;
179
+ r.charStart = chunk.char_start;
180
+ r.charEnd = chunk.char_end;
181
+ r.contextPrefix = chunk.context_prefix;
182
+ }
183
+
101
184
  return filtered.slice(0, limit);
102
185
  }
103
186
  }
package/src/ignore.ts ADDED
@@ -0,0 +1,82 @@
1
+ import { readFileSync, existsSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import ignorePkg, { type Ignore } from "ignore";
4
+
5
+ // The `ignore` CJS module exposes the factory via `module.exports = factory`
6
+ // with `factory.default = factory` attached. Under `NodeNext` + ESM, depending
7
+ // on the interop mode, the default import can resolve to either the factory
8
+ // itself or the whole namespace. Probe and pick the callable form.
9
+ const ignore: (options?: { ignorecase?: boolean }) => Ignore = (
10
+ typeof (ignorePkg as unknown) === "function"
11
+ ? (ignorePkg as unknown as (options?: { ignorecase?: boolean }) => Ignore)
12
+ : ((ignorePkg as unknown as { default: (options?: { ignorecase?: boolean }) => Ignore }).default)
13
+ );
14
+
15
+ /**
16
+ * Default ignore patterns applied to every root even when no `.ralphignore`
17
+ * file or caller-supplied globals are provided. These target directories and
18
+ * files that should virtually never be indexed.
19
+ */
20
+ export const DEFAULT_IGNORE_PATTERNS: string[] = [
21
+ ".claude/",
22
+ "node_modules/",
23
+ "dist/",
24
+ ".git/",
25
+ "*.log",
26
+ ];
27
+
28
+ /**
29
+ * Opaque matcher returned by {@link loadIgnoreForRoot}. Given a path relative
30
+ * to the root used to construct the matcher, {@link isIgnored} reports whether
31
+ * the path should be skipped by the scanner.
32
+ */
33
+ export interface IgnoreMatcher {
34
+ isIgnored(relativePath: string): boolean;
35
+ }
36
+
37
+ /**
38
+ * Build an {@link IgnoreMatcher} for a given root directory. The matcher
39
+ * combines (in order):
40
+ * 1. {@link DEFAULT_IGNORE_PATTERNS} — always applied.
41
+ * 2. `globalPatterns` — caller-supplied patterns (typically from
42
+ * `knowledge.config.json`'s `ignorePatterns`).
43
+ * 3. Contents of `<rootDir>/.ralphignore`, if present.
44
+ *
45
+ * All patterns follow gitignore syntax via the `ignore` package.
46
+ *
47
+ * @param rootDir absolute path of the root being scanned
48
+ * @param globalPatterns optional extra patterns applied before the per-root
49
+ * `.ralphignore` file
50
+ */
51
+ export function loadIgnoreForRoot(
52
+ rootDir: string,
53
+ globalPatterns?: string[],
54
+ ): IgnoreMatcher {
55
+ const ign: Ignore = ignore();
56
+ ign.add(DEFAULT_IGNORE_PATTERNS);
57
+ if (globalPatterns && globalPatterns.length > 0) {
58
+ ign.add(globalPatterns);
59
+ }
60
+
61
+ const ralphIgnorePath = join(rootDir, ".ralphignore");
62
+ if (existsSync(ralphIgnorePath)) {
63
+ try {
64
+ const contents = readFileSync(ralphIgnorePath, "utf-8");
65
+ ign.add(contents);
66
+ } catch (e) {
67
+ console.warn(
68
+ `Failed to read .ralphignore at ${ralphIgnorePath}: ${(e as Error).message}`,
69
+ );
70
+ }
71
+ }
72
+
73
+ return {
74
+ isIgnored(relativePath: string): boolean {
75
+ if (!relativePath) return false;
76
+ // `ignore` package requires forward-slash paths with no leading slash.
77
+ const normalized = relativePath.replace(/\\/g, "/").replace(/^\/+/, "");
78
+ if (!normalized) return false;
79
+ return ign.ignores(normalized);
80
+ },
81
+ };
82
+ }