ralph-hero-knowledge-index 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.mcp.json +1 -1
  3. package/README.md +109 -0
  4. package/dist/config.d.ts +32 -0
  5. package/dist/config.js +75 -0
  6. package/dist/config.js.map +1 -0
  7. package/dist/db.d.ts +7 -0
  8. package/dist/db.js +17 -0
  9. package/dist/db.js.map +1 -1
  10. package/dist/file-scanner.d.ts +13 -1
  11. package/dist/file-scanner.js +30 -3
  12. package/dist/file-scanner.js.map +1 -1
  13. package/dist/hybrid-search.d.ts +12 -0
  14. package/dist/hybrid-search.js +74 -5
  15. package/dist/hybrid-search.js.map +1 -1
  16. package/dist/ignore.d.ts +29 -0
  17. package/dist/ignore.js +65 -0
  18. package/dist/ignore.js.map +1 -0
  19. package/dist/index.d.ts +9 -1
  20. package/dist/index.js +166 -6
  21. package/dist/index.js.map +1 -1
  22. package/dist/llm-client.d.ts +41 -0
  23. package/dist/llm-client.js +98 -0
  24. package/dist/llm-client.js.map +1 -0
  25. package/dist/reindex.d.ts +22 -3
  26. package/dist/reindex.js +60 -8
  27. package/dist/reindex.js.map +1 -1
  28. package/dist/search.d.ts +12 -0
  29. package/dist/search.js +15 -1
  30. package/dist/search.js.map +1 -1
  31. package/package.json +2 -1
  32. package/src/__tests__/config.test.ts +173 -0
  33. package/src/__tests__/file-scanner.test.ts +88 -0
  34. package/src/__tests__/hybrid-search.test.ts +107 -0
  35. package/src/__tests__/ignore.test.ts +86 -0
  36. package/src/__tests__/index.test.ts +450 -0
  37. package/src/__tests__/llm-client.test.ts +349 -0
  38. package/src/__tests__/memory-stats.test.ts +204 -0
  39. package/src/__tests__/reindex.test.ts +148 -2
  40. package/src/__tests__/search.test.ts +37 -0
  41. package/src/config.ts +105 -0
  42. package/src/db.ts +17 -0
  43. package/src/file-scanner.ts +28 -3
  44. package/src/hybrid-search.ts +88 -5
  45. package/src/ignore.ts +82 -0
  46. package/src/index.ts +202 -7
  47. package/src/llm-client.ts +136 -0
  48. package/src/reindex.ts +80 -9
  49. package/src/search.ts +27 -1
package/src/index.ts CHANGED
@@ -22,12 +22,60 @@ function resolveEnv(name: string): string | undefined {
22
22
  return val;
23
23
  }
24
24
 
25
- export function createServer(dbPath: string) {
25
+ /**
26
+ * True when the `chunks` table exists in the schema (v3+). When absent,
27
+ * `knowledge_memory_stats` reports 0 chunks-per-doc percentiles.
28
+ */
29
+ function chunksTableExists(db: KnowledgeDB): boolean {
30
+ const row = db.db
31
+ .prepare(
32
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'",
33
+ )
34
+ .get();
35
+ return row !== undefined;
36
+ }
37
+
38
+ /**
39
+ * True when the `documents.memory_tier` column exists (v3+). Used to decide
40
+ * whether tier-level stats can be produced from the schema at all.
41
+ */
42
+ function memoryTierColumnExists(db: KnowledgeDB): boolean {
43
+ const rows = db.db
44
+ .prepare("PRAGMA table_info(documents)")
45
+ .all() as Array<{ name: string }>;
46
+ return rows.some((r) => r.name === "memory_tier");
47
+ }
48
+
49
+ /**
50
+ * Percentile helper using nearest-rank. For n sorted values returns the value
51
+ * at index `floor(n * p)` clamped to [0, n-1]. Returns 0 on empty input.
52
+ * Matches the spec in Phase 8 Task 8.4: "pick index at floor(n*0.5)".
53
+ */
54
+ function percentile(sortedValues: number[], p: number): number {
55
+ if (sortedValues.length === 0) return 0;
56
+ const idx = Math.min(
57
+ sortedValues.length - 1,
58
+ Math.max(0, Math.floor(sortedValues.length * p)),
59
+ );
60
+ return sortedValues[idx];
61
+ }
62
+
63
+ /**
64
+ * Options for `createServer`. When `embedFn` is provided it replaces the
65
+ * production `embed` import, allowing tests to bypass the HuggingFace model
66
+ * download.
67
+ */
68
+ export interface CreateServerOptions {
69
+ embedFn?: (text: string) => Promise<Float32Array>;
70
+ }
71
+
72
+ export function createServer(dbPath: string, opts: CreateServerOptions = {}) {
26
73
  const server = new McpServer({ name: "ralph-hero-knowledge", version: "0.1.0" });
27
74
  const db = new KnowledgeDB(dbPath);
28
75
  const fts = new FtsSearch(db);
29
76
  const vec = new VectorSearch(db);
30
- const hybrid = new HybridSearch(db, fts, vec, embed);
77
+ const embedImpl = opts.embedFn ?? embed;
78
+ const hybrid = new HybridSearch(db, fts, vec, embedImpl);
31
79
  const traverser = new Traverser(db);
32
80
 
33
81
  server.tool(
@@ -40,6 +88,16 @@ export function createServer(dbPath: string) {
40
88
  limit: z.number().optional().describe("Max results (default: 10)"),
41
89
  includeSuperseded: z.boolean().optional().describe("Include superseded documents (default: false)"),
42
90
  brief: z.boolean().optional().describe("Return minimal metadata only (default: false)"),
91
+ memory_tier: z
92
+ .enum(["doc", "raw", "reflection", "any"])
93
+ .optional()
94
+ .default("any")
95
+ .describe("Filter by memory tier: 'doc' (curated), 'raw' (dream-loop ingest), 'reflection' (synthesized), 'any' (default)"),
96
+ return_chunk_meta: z
97
+ .boolean()
98
+ .optional()
99
+ .default(false)
100
+ .describe("Include chunk_index/char_start/char_end/context_prefix in each hit when chunk data is available"),
43
101
  },
44
102
  async (args) => {
45
103
  try {
@@ -48,18 +106,33 @@ export function createServer(dbPath: string) {
48
106
  type: args.type,
49
107
  limit: args.limit ?? 10,
50
108
  includeSuperseded: args.includeSuperseded,
109
+ memoryTier: args.memory_tier,
51
110
  });
52
- const enriched = results.map(r => {
53
- const base = { ...r, tags: db.getTags(r.id) };
111
+ const enriched = results.map((r) => {
112
+ // Start with the camelCase SearchResult shape so existing callers
113
+ // keep working, then optionally add snake_case aliases for new
114
+ // chunk fields and strip them when callers didn't opt in.
115
+ const { chunkIndex, charStart, charEnd, contextPrefix, bestChunkId, ...rest } = r;
116
+ const base: Record<string, unknown> = { ...rest, tags: db.getTags(r.id) };
117
+ if (args.return_chunk_meta) {
118
+ if (chunkIndex !== undefined) base.chunk_index = chunkIndex;
119
+ if (charStart !== undefined) base.char_start = charStart;
120
+ if (charEnd !== undefined) base.char_end = charEnd;
121
+ if (contextPrefix !== undefined) base.context_prefix = contextPrefix;
122
+ if (bestChunkId !== undefined) base.best_chunk_id = bestChunkId;
123
+ }
54
124
  // SearchResult does not carry githubIssue — fetch from documents table
55
125
  const doc = db.getDocument(r.id);
56
126
  if (doc?.githubIssue) {
57
127
  const outcomes = db.getOutcomeSummary(doc.githubIssue);
58
- if (outcomes) return { ...base, outcomes_summary: outcomes };
128
+ if (outcomes) base.outcomes_summary = outcomes;
59
129
  }
60
130
  return base;
61
131
  });
62
- const formatted = formatSearchResults(enriched, args.brief ?? false);
132
+ const formatted = formatSearchResults(
133
+ enriched as unknown as Parameters<typeof formatSearchResults>[0],
134
+ args.brief ?? false,
135
+ );
63
136
  return { content: [{ type: "text" as const, text: JSON.stringify(formatted, null, 2) }] };
64
137
  } catch (e) {
65
138
  return { content: [{ type: "text" as const, text: `Error: ${(e as Error).message}` }], isError: true };
@@ -76,13 +149,26 @@ export function createServer(dbPath: string) {
76
149
  depth: z.number().optional().describe("Max traversal depth (default: 3)"),
77
150
  direction: z.enum(["outgoing", "incoming"]).optional().describe("Edge direction (default: outgoing)"),
78
151
  brief: z.boolean().optional().describe("Return minimal metadata only (default: false)"),
152
+ memory_tier: z
153
+ .enum(["doc", "raw", "reflection", "any"])
154
+ .optional()
155
+ .default("any")
156
+ .describe("Filter traversed nodes by memory tier (default: 'any')"),
79
157
  },
80
158
  async (args) => {
81
159
  try {
82
160
  const opts = { type: args.type, depth: args.depth ?? 3 };
83
- const results = args.direction === "incoming"
161
+ let results = args.direction === "incoming"
84
162
  ? traverser.traverseIncoming(args.from, opts)
85
163
  : traverser.traverse(args.from, opts);
164
+ if (args.memory_tier && args.memory_tier !== "any") {
165
+ const wantedTier = args.memory_tier;
166
+ results = results.filter((r) => {
167
+ const tier = db.getMemoryTier(r.targetId);
168
+ // When memory_tier column is absent (pre-v3 DB) treat as "doc"
169
+ return (tier ?? "doc") === wantedTier;
170
+ });
171
+ }
86
172
  const formatted = formatTraverseResults(results, (id) => db.getTags(id), args.brief ?? false);
87
173
  return { content: [{ type: "text" as const, text: JSON.stringify(formatted, null, 2) }] };
88
174
  } catch (e) {
@@ -91,6 +177,115 @@ export function createServer(dbPath: string) {
91
177
  },
92
178
  );
93
179
 
180
+ server.tool(
181
+ "knowledge_memory_stats",
182
+ "Return counts of documents by memory_tier plus chunk percentiles and last-reflection timestamp. Used by the dream-loop to confirm ingest/reflection completion.",
183
+ {
184
+ since: z
185
+ .string()
186
+ .optional()
187
+ .describe("ISO timestamp — counts for 'new_since' are computed against this. Defaults to 24 hours ago."),
188
+ },
189
+ async (args) => {
190
+ try {
191
+ const since = args.since ?? new Date(Date.now() - 24 * 3600 * 1000).toISOString();
192
+ const hasTier = memoryTierColumnExists(db);
193
+ const hasChunks = chunksTableExists(db);
194
+
195
+ const totalRow = db.db
196
+ .prepare("SELECT COUNT(*) AS c FROM documents")
197
+ .get() as { c: number };
198
+ const totalDocuments = totalRow.c;
199
+
200
+ const byTier: Record<"doc" | "raw" | "reflection", number> = {
201
+ doc: 0,
202
+ raw: 0,
203
+ reflection: 0,
204
+ };
205
+ const newSince: Record<"doc" | "raw" | "reflection", number> = {
206
+ doc: 0,
207
+ raw: 0,
208
+ reflection: 0,
209
+ };
210
+
211
+ if (hasTier) {
212
+ const rows = db.db
213
+ .prepare(
214
+ `SELECT memory_tier AS tier, COUNT(*) AS c
215
+ FROM documents GROUP BY memory_tier`,
216
+ )
217
+ .all() as Array<{ tier: string; c: number }>;
218
+ for (const r of rows) {
219
+ if (r.tier === "doc" || r.tier === "raw" || r.tier === "reflection") {
220
+ byTier[r.tier] = r.c;
221
+ }
222
+ }
223
+ const newRows = db.db
224
+ .prepare(
225
+ `SELECT memory_tier AS tier, COUNT(*) AS c
226
+ FROM documents
227
+ WHERE date IS NOT NULL AND date >= @since
228
+ GROUP BY memory_tier`,
229
+ )
230
+ .all({ since }) as Array<{ tier: string; c: number }>;
231
+ for (const r of newRows) {
232
+ if (r.tier === "doc" || r.tier === "raw" || r.tier === "reflection") {
233
+ newSince[r.tier] = r.c;
234
+ }
235
+ }
236
+ } else {
237
+ // v2 schema — everything treated as "doc"
238
+ byTier.doc = totalDocuments;
239
+ const newDocRow = db.db
240
+ .prepare(
241
+ "SELECT COUNT(*) AS c FROM documents WHERE date IS NOT NULL AND date >= ?",
242
+ )
243
+ .get(since) as { c: number };
244
+ newSince.doc = newDocRow.c;
245
+ }
246
+
247
+ let chunksPerDocP50 = 0;
248
+ let chunksPerDocP90 = 0;
249
+ if (hasChunks) {
250
+ const perDoc = db.db
251
+ .prepare(
252
+ `SELECT COUNT(*) AS c FROM chunks GROUP BY document_id`,
253
+ )
254
+ .all() as Array<{ c: number }>;
255
+ const counts = perDoc.map((r) => r.c).sort((a, b) => a - b);
256
+ chunksPerDocP50 = percentile(counts, 0.5);
257
+ chunksPerDocP90 = percentile(counts, 0.9);
258
+ }
259
+
260
+ let lastReflectionAt: string | null = null;
261
+ if (hasTier) {
262
+ const row = db.db
263
+ .prepare(
264
+ `SELECT date FROM documents
265
+ WHERE memory_tier = 'reflection' AND date IS NOT NULL
266
+ ORDER BY date DESC LIMIT 1`,
267
+ )
268
+ .get() as { date: string } | undefined;
269
+ lastReflectionAt = row?.date ?? null;
270
+ }
271
+
272
+ const payload = {
273
+ total_documents: totalDocuments,
274
+ by_tier: byTier,
275
+ new_since: newSince,
276
+ chunks_per_doc_p50: chunksPerDocP50,
277
+ chunks_per_doc_p90: chunksPerDocP90,
278
+ last_reflection_at: lastReflectionAt,
279
+ since,
280
+ };
281
+
282
+ return { content: [{ type: "text" as const, text: JSON.stringify(payload, null, 2) }] };
283
+ } catch (e) {
284
+ return { content: [{ type: "text" as const, text: `Error: ${(e as Error).message}` }], isError: true };
285
+ }
286
+ },
287
+ );
288
+
94
289
  server.tool(
95
290
  "knowledge_record_outcome",
96
291
  "Record a pipeline outcome event (research, plan, phase, validation, etc.)",
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Minimal OpenAI-compatible LLM client for Contextual Retrieval.
3
+ *
4
+ * Probes `${baseUrl}/v1/models` for availability and calls
5
+ * `${baseUrl}/v1/chat/completions` to generate short context prefixes for
6
+ * document chunks. Uses native `fetch` + `AbortController` — no SDK dependency.
7
+ *
8
+ * Fail-open semantics: network errors, timeouts, non-200 responses, and
9
+ * malformed JSON all resolve without throwing. `available()` returns `false`;
10
+ * `contextualize()` returns an empty string. The caller is expected to treat
11
+ * an empty context prefix as "no context available" and continue.
12
+ *
13
+ * Defaults target `gemma-lab` at `http://localhost:8000` with the Gemma 4 26B
14
+ * MXFP8 model. Override via `RALPH_LLM_URL` / `RALPH_LLM_MODEL` env vars or
15
+ * explicit options.
16
+ */
17
+
18
+ export interface LlmClientOptions {
19
+ /** Base URL for the OpenAI-compatible endpoint. Default: RALPH_LLM_URL env or http://localhost:8000. */
20
+ baseUrl?: string;
21
+ /** Model identifier sent in chat completion requests. Default: RALPH_LLM_MODEL env or mlx-community/gemma-4-26b-a4b-it-mxfp8. */
22
+ model?: string;
23
+ /** Timeout for contextualize() in milliseconds. Default: 30000. */
24
+ timeoutMs?: number;
25
+ }
26
+
27
+ export interface LlmClient {
28
+ /**
29
+ * Probe the endpoint for availability.
30
+ * Returns `true` iff `${baseUrl}/v1/models` responds with HTTP 200 within 2000ms.
31
+ * Returns `false` on timeout, connection refused, non-200, or any thrown exception.
32
+ */
33
+ available(): Promise<boolean>;
34
+
35
+ /**
36
+ * Generate a short (≤100 token) context prefix situating `chunkContent`
37
+ * within `fullDocument`, using the Anthropic Contextual Retrieval prompt.
38
+ *
39
+ * Returns the trimmed content string on success, or `""` on any error
40
+ * (network failure, timeout, non-2xx response, missing choices, malformed JSON).
41
+ */
42
+ contextualize(fullDocument: string, chunkContent: string): Promise<string>;
43
+ }
44
+
45
+ const DEFAULT_BASE_URL = "http://localhost:8000";
46
+ const DEFAULT_MODEL = "mlx-community/gemma-4-26b-a4b-it-mxfp8";
47
+ const DEFAULT_TIMEOUT_MS = 30000;
48
+ const AVAILABLE_PROBE_TIMEOUT_MS = 2000;
49
+ const MAX_CONTEXT_TOKENS = 120;
50
+
51
+ /**
52
+ * Anthropic Contextual Retrieval prompt, verbatim from the parent plan Phase 2.
53
+ * Placeholders `{fullDocument}` and `{chunkContent}` are filled at call time.
54
+ */
55
+ function buildContextualizePrompt(fullDocument: string, chunkContent: string): string {
56
+ return `<document>
57
+ ${fullDocument}
58
+ </document>
59
+
60
+ Here is the chunk we want to situate within the whole document:
61
+
62
+ <chunk>
63
+ ${chunkContent}
64
+ </chunk>
65
+
66
+ Please give a short succinct context to situate this chunk within the overall
67
+ document for the purposes of improving search retrieval of the chunk. Answer only
68
+ with the succinct context and nothing else.`;
69
+ }
70
+
71
+ interface ChatCompletionResponse {
72
+ choices?: Array<{
73
+ message?: {
74
+ content?: string;
75
+ };
76
+ }>;
77
+ }
78
+
79
+ export function createLlmClient(opts: LlmClientOptions = {}): LlmClient {
80
+ const baseUrl = opts.baseUrl ?? process.env.RALPH_LLM_URL ?? DEFAULT_BASE_URL;
81
+ const model = opts.model ?? process.env.RALPH_LLM_MODEL ?? DEFAULT_MODEL;
82
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
83
+
84
+ async function available(): Promise<boolean> {
85
+ const controller = new AbortController();
86
+ const timer = setTimeout(() => controller.abort(), AVAILABLE_PROBE_TIMEOUT_MS);
87
+ try {
88
+ const response = await fetch(`${baseUrl}/v1/models`, {
89
+ method: "GET",
90
+ signal: controller.signal,
91
+ });
92
+ return response.status === 200;
93
+ } catch {
94
+ return false;
95
+ } finally {
96
+ clearTimeout(timer);
97
+ }
98
+ }
99
+
100
+ async function contextualize(fullDocument: string, chunkContent: string): Promise<string> {
101
+ const controller = new AbortController();
102
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
103
+ try {
104
+ const prompt = buildContextualizePrompt(fullDocument, chunkContent);
105
+ const response = await fetch(`${baseUrl}/v1/chat/completions`, {
106
+ method: "POST",
107
+ headers: {
108
+ "Content-Type": "application/json",
109
+ },
110
+ body: JSON.stringify({
111
+ model,
112
+ messages: [{ role: "user", content: prompt }],
113
+ max_tokens: MAX_CONTEXT_TOKENS,
114
+ }),
115
+ signal: controller.signal,
116
+ });
117
+
118
+ if (!response.ok) {
119
+ return "";
120
+ }
121
+
122
+ const data = (await response.json()) as ChatCompletionResponse;
123
+ const content = data?.choices?.[0]?.message?.content;
124
+ if (typeof content !== "string") {
125
+ return "";
126
+ }
127
+ return content.trim();
128
+ } catch {
129
+ return "";
130
+ } finally {
131
+ clearTimeout(timer);
132
+ }
133
+ }
134
+
135
+ return { available, contextualize };
136
+ }
package/src/reindex.ts CHANGED
@@ -8,8 +8,15 @@ import { embed, prepareTextForEmbedding } from "./embedder.js";
8
8
  import { parseDocument, type ParsedDocument } from "./parser.js";
9
9
  import { findMarkdownFiles } from "./file-scanner.js";
10
10
  import { generateIndexes } from "./generate-indexes.js";
11
+ import { loadConfig, type KnowledgeConfig } from "./config.js";
12
+ import { loadIgnoreForRoot } from "./ignore.js";
11
13
 
12
- export async function reindex(dirs: string[], dbPath: string, generate: boolean = false): Promise<void> {
14
+ export async function reindex(
15
+ dirs: string[],
16
+ dbPath: string,
17
+ generate: boolean = false,
18
+ ignorePatterns?: string[],
19
+ ): Promise<void> {
13
20
  console.log(`Indexing ${dirs.join(", ")} -> ${dbPath}`);
14
21
 
15
22
  const db = new KnowledgeDB(dbPath);
@@ -32,7 +39,8 @@ export async function reindex(dirs: string[], dbPath: string, generate: boolean
32
39
  // Phase 1: Discover files on disk
33
40
  const filesOnDisk: string[] = [];
34
41
  for (const dir of dirs) {
35
- const found = findMarkdownFiles(dir);
42
+ const matcher = loadIgnoreForRoot(dir, ignorePatterns);
43
+ const found = findMarkdownFiles(dir, matcher);
36
44
  console.log(` ${dir}: ${found.length} files`);
37
45
  filesOnDisk.push(...found);
38
46
  }
@@ -182,31 +190,94 @@ export async function reindex(dirs: string[], dbPath: string, generate: boolean
182
190
 
183
191
  const DEFAULT_DB_PATH = join(homedir(), ".ralph-hero", "knowledge.db");
184
192
 
185
- export function resolveDirs(): { dirs: string[]; dbPath: string; generate: boolean } {
193
+ export type ResolvedDirsSource = "cli" | "env" | "config" | "fallback";
194
+
195
+ export interface ResolvedDirs {
196
+ dirs: string[];
197
+ dbPath: string;
198
+ generate: boolean;
199
+ source: ResolvedDirsSource;
200
+ config: KnowledgeConfig;
201
+ }
202
+
203
+ /**
204
+ * Resolve the set of roots, database path, and generate flag for a reindex
205
+ * run. Precedence (highest to lowest):
206
+ * 1. CLI positional args
207
+ * 2. `RALPH_KNOWLEDGE_DIRS` environment variable
208
+ * 3. `config.roots` from `~/.ralph/knowledge.config.json`
209
+ * 4. `"../../thoughts"` fallback
210
+ *
211
+ * `dbPath` precedence is independent: CLI `.db` positional > `RALPH_KNOWLEDGE_DB`
212
+ * env var > `config.dbPath` > {@link DEFAULT_DB_PATH}.
213
+ *
214
+ * The returned `config` is forwarded to the caller so `ignorePatterns` can be
215
+ * threaded into {@link reindex}.
216
+ */
217
+ export function resolveDirs(): ResolvedDirs {
186
218
  const cliArgs = process.argv.slice(2);
187
219
  const noGenerate = cliArgs.includes("--no-generate");
188
220
  const positional = cliArgs.filter(a => !a.startsWith("--"));
189
221
  const cliDb = positional.find(a => a.endsWith(".db"));
190
222
  const cliDirs = positional.filter(a => !a.endsWith(".db"));
191
223
 
224
+ const config = loadConfig();
225
+
226
+ const resolveDbPath = (): string =>
227
+ cliDb ??
228
+ process.env.RALPH_KNOWLEDGE_DB ??
229
+ config.dbPath ??
230
+ DEFAULT_DB_PATH;
231
+
192
232
  if (cliDirs.length > 0) {
193
- return { dirs: cliDirs, dbPath: cliDb ?? DEFAULT_DB_PATH, generate: !noGenerate };
233
+ console.log("Using roots from: CLI");
234
+ return {
235
+ dirs: cliDirs,
236
+ dbPath: resolveDbPath(),
237
+ generate: !noGenerate,
238
+ source: "cli",
239
+ config,
240
+ };
194
241
  }
195
242
 
196
243
  const envDirs = process.env.RALPH_KNOWLEDGE_DIRS;
197
244
  if (envDirs) {
245
+ const parsed = envDirs.split(",").map(d => d.trim()).filter(Boolean);
246
+ if (parsed.length > 0) {
247
+ console.log("Using roots from: env");
248
+ return {
249
+ dirs: parsed,
250
+ dbPath: resolveDbPath(),
251
+ generate: !noGenerate,
252
+ source: "env",
253
+ config,
254
+ };
255
+ }
256
+ }
257
+
258
+ if (config.roots && config.roots.length > 0) {
259
+ console.log("Using roots from: config");
198
260
  return {
199
- dirs: envDirs.split(",").map(d => d.trim()).filter(Boolean),
200
- dbPath: cliDb ?? process.env.RALPH_KNOWLEDGE_DB ?? DEFAULT_DB_PATH,
261
+ dirs: config.roots,
262
+ dbPath: resolveDbPath(),
201
263
  generate: !noGenerate,
264
+ source: "config",
265
+ config,
202
266
  };
203
267
  }
204
268
 
205
- return { dirs: ["../../thoughts"], dbPath: cliDb ?? DEFAULT_DB_PATH, generate: !noGenerate };
269
+ console.log("Using roots from: fallback");
270
+ return {
271
+ dirs: ["../../thoughts"],
272
+ dbPath: resolveDbPath(),
273
+ generate: !noGenerate,
274
+ source: "fallback",
275
+ config,
276
+ };
206
277
  }
207
278
 
208
279
  const isMain = process.argv[1]?.endsWith("reindex.js");
209
280
  if (isMain) {
210
- const { dirs, dbPath, generate } = resolveDirs();
211
- reindex(dirs, dbPath, generate).catch(console.error);
281
+ const { dirs, dbPath, generate, config } = resolveDirs();
282
+ reindex(dirs, dbPath, generate, config.ignorePatterns).catch(console.error);
212
283
  }
package/src/search.ts CHANGED
@@ -1,10 +1,13 @@
1
1
  import type { KnowledgeDB } from "./db.js";
2
2
 
3
+ export type MemoryTier = "doc" | "raw" | "reflection" | "any";
4
+
3
5
  export interface SearchOptions {
4
6
  type?: string;
5
7
  tags?: string[];
6
8
  includeSuperseded?: boolean;
7
9
  limit?: number;
10
+ memoryTier?: MemoryTier;
8
11
  }
9
12
 
10
13
  export interface SearchResult {
@@ -16,6 +19,13 @@ export interface SearchResult {
16
19
  date: string | null;
17
20
  score: number;
18
21
  snippet: string;
22
+ // Optional chunk-level metadata. Populated when chunk data is available
23
+ // for the best-scoring chunk of this document.
24
+ chunkIndex?: number;
25
+ charStart?: number;
26
+ charEnd?: number;
27
+ contextPrefix?: string;
28
+ bestChunkId?: string;
19
29
  }
20
30
 
21
31
  export class FtsSearch {
@@ -99,8 +109,19 @@ export class FtsSearch {
99
109
  return tokens.map(t => '"' + t.replace(/"/g, '""') + '"').join(" ");
100
110
  }
101
111
 
112
+ /**
113
+ * Returns true when the `documents.memory_tier` column exists (schema v3+).
114
+ * On v2 schemas this is false and the memoryTier filter is silently ignored.
115
+ */
116
+ private memoryTierColumnExists(): boolean {
117
+ const rows = this.db.db
118
+ .prepare("PRAGMA table_info(documents)")
119
+ .all() as Array<{ name: string }>;
120
+ return rows.some((r) => r.name === "memory_tier");
121
+ }
122
+
102
123
  search(query: string, options: SearchOptions = {}): SearchResult[] {
103
- const { type, tags, includeSuperseded = false, limit = 20 } = options;
124
+ const { type, tags, includeSuperseded = false, limit = 20, memoryTier } = options;
104
125
 
105
126
  const conditions: string[] = ["documents_fts MATCH @query"];
106
127
  const params: Record<string, unknown> = { query: this.escapeFts5Query(query), limit };
@@ -114,6 +135,11 @@ export class FtsSearch {
114
135
  params.type = type;
115
136
  }
116
137
 
138
+ if (memoryTier && memoryTier !== "any" && this.memoryTierColumnExists()) {
139
+ conditions.push("d.memory_tier = @memoryTier");
140
+ params.memoryTier = memoryTier;
141
+ }
142
+
117
143
  let joinClause = "";
118
144
  if (tags && tags.length > 0) {
119
145
  joinClause = "JOIN tags t ON t.doc_id = d.id";