@winci/local-rag 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.claude-plugin/plugin.json +24 -0
  2. package/.mcp.json +11 -0
  3. package/LICENSE +21 -0
  4. package/README.md +567 -0
  5. package/hooks/hooks.json +25 -0
  6. package/hooks/scripts/reindex-file.sh +19 -0
  7. package/hooks/scripts/session-start.sh +11 -0
  8. package/package.json +52 -0
  9. package/skills/local-rag/SKILL.md +42 -0
  10. package/src/cli/commands/analytics.ts +58 -0
  11. package/src/cli/commands/benchmark.ts +30 -0
  12. package/src/cli/commands/checkpoint.ts +85 -0
  13. package/src/cli/commands/conversation.ts +102 -0
  14. package/src/cli/commands/demo.ts +119 -0
  15. package/src/cli/commands/eval.ts +31 -0
  16. package/src/cli/commands/index-cmd.ts +26 -0
  17. package/src/cli/commands/init.ts +35 -0
  18. package/src/cli/commands/map.ts +21 -0
  19. package/src/cli/commands/remove.ts +15 -0
  20. package/src/cli/commands/search-cmd.ts +59 -0
  21. package/src/cli/commands/serve.ts +5 -0
  22. package/src/cli/commands/status.ts +13 -0
  23. package/src/cli/index.ts +117 -0
  24. package/src/cli/progress.ts +21 -0
  25. package/src/cli/setup.ts +192 -0
  26. package/src/config/index.ts +101 -0
  27. package/src/conversation/indexer.ts +147 -0
  28. package/src/conversation/parser.ts +323 -0
  29. package/src/db/analytics.ts +116 -0
  30. package/src/db/annotations.ts +161 -0
  31. package/src/db/checkpoints.ts +166 -0
  32. package/src/db/conversation.ts +241 -0
  33. package/src/db/files.ts +146 -0
  34. package/src/db/graph.ts +250 -0
  35. package/src/db/index.ts +468 -0
  36. package/src/db/search.ts +244 -0
  37. package/src/db/types.ts +85 -0
  38. package/src/embeddings/embed.ts +73 -0
  39. package/src/graph/resolver.ts +305 -0
  40. package/src/indexing/chunker.ts +523 -0
  41. package/src/indexing/indexer.ts +263 -0
  42. package/src/indexing/parse.ts +99 -0
  43. package/src/indexing/watcher.ts +84 -0
  44. package/src/main.ts +8 -0
  45. package/src/search/benchmark.ts +139 -0
  46. package/src/search/eval.ts +171 -0
  47. package/src/search/hybrid.ts +194 -0
  48. package/src/search/reranker.ts +99 -0
  49. package/src/search/usages.ts +27 -0
  50. package/src/server/index.ts +126 -0
  51. package/src/tools/analytics-tools.ts +58 -0
  52. package/src/tools/annotation-tools.ts +89 -0
  53. package/src/tools/checkpoint-tools.ts +147 -0
  54. package/src/tools/conversation-tools.ts +86 -0
  55. package/src/tools/git-tools.ts +103 -0
  56. package/src/tools/graph-tools.ts +163 -0
  57. package/src/tools/index-tools.ts +91 -0
  58. package/src/tools/index.ts +33 -0
  59. package/src/tools/search.ts +238 -0
  60. package/src/types.ts +9 -0
  61. package/src/utils/log.ts +39 -0
@@ -0,0 +1,59 @@
1
+ import { resolve } from "path";
2
+ import { RagDB } from "../../db";
3
+ import { loadConfig } from "../../config";
4
+ import { search, searchChunks } from "../../search/hybrid";
5
+
6
+ export async function searchCommand(args: string[], getFlag: (flag: string) => string | undefined) {
7
+ const query = args[1];
8
+ if (!query) {
9
+ console.error("Usage: local-rag search <query> [--top N]");
10
+ process.exit(1);
11
+ }
12
+
13
+ const dir = resolve(getFlag("--dir") || ".");
14
+ const db = new RagDB(dir);
15
+ const config = await loadConfig(dir);
16
+ const top = parseInt(getFlag("--top") || String(config.searchTopK), 10);
17
+
18
+ const results = await search(query, db, top, 0, config.hybridWeight, config.enableReranking);
19
+
20
+ if (results.length === 0) {
21
+ console.log("No results found. Has the directory been indexed?");
22
+ } else {
23
+ for (const r of results) {
24
+ console.log(`${r.score.toFixed(4)} ${r.path}`);
25
+ const preview = r.snippets[0]?.slice(0, 120).replace(/\n/g, " ");
26
+ console.log(` ${preview}...`);
27
+ console.log();
28
+ }
29
+ }
30
+ db.close();
31
+ }
32
+
33
+ export async function readCommand(args: string[], getFlag: (flag: string) => string | undefined) {
34
+ const query = args[1];
35
+ if (!query) {
36
+ console.error("Usage: local-rag read <query> [--top N] [--threshold T] [--dir D]");
37
+ process.exit(1);
38
+ }
39
+
40
+ const dir = resolve(getFlag("--dir") || ".");
41
+ const db = new RagDB(dir);
42
+ const config = await loadConfig(dir);
43
+ const top = parseInt(getFlag("--top") || "8", 10);
44
+ const threshold = parseFloat(getFlag("--threshold") || "0.3");
45
+
46
+ const results = await searchChunks(query, db, top, threshold, config.hybridWeight, config.enableReranking);
47
+
48
+ if (results.length === 0) {
49
+ console.log("No relevant chunks found. Has the directory been indexed?");
50
+ } else {
51
+ for (const r of results) {
52
+ const entity = r.entityName ? ` • ${r.entityName}` : "";
53
+ console.log(`[${r.score.toFixed(2)}] ${r.path}${entity}`);
54
+ console.log(r.content);
55
+ console.log("\n---\n");
56
+ }
57
+ }
58
+ db.close();
59
+ }
@@ -0,0 +1,5 @@
1
+ import { startServer } from "../../server";
2
+
3
+ export async function serveCommand() {
4
+ await startServer();
5
+ }
@@ -0,0 +1,13 @@
1
+ import { resolve } from "path";
2
+ import { RagDB } from "../../db";
3
+
4
+ export async function statusCommand(args: string[]) {
5
+ const dir = resolve(args[1] && !args[1].startsWith("--") ? args[1] : ".");
6
+ const db = new RagDB(dir);
7
+ const status = db.getStatus();
8
+ console.log(`Index status for ${dir}:`);
9
+ console.log(` Files: ${status.totalFiles}`);
10
+ console.log(` Chunks: ${status.totalChunks}`);
11
+ console.log(` Last indexed: ${status.lastIndexed || "never"}`);
12
+ db.close();
13
+ }
@@ -0,0 +1,117 @@
1
+ import { initCommand } from "./commands/init";
2
+ import { indexCommand } from "./commands/index-cmd";
3
+ import { searchCommand, readCommand } from "./commands/search-cmd";
4
+ import { statusCommand } from "./commands/status";
5
+ import { removeCommand } from "./commands/remove";
6
+ import { analyticsCommand } from "./commands/analytics";
7
+ import { mapCommand } from "./commands/map";
8
+ import { benchmarkCommand } from "./commands/benchmark";
9
+ import { evalCommand } from "./commands/eval";
10
+ import { conversationCommand } from "./commands/conversation";
11
+ import { checkpointCommand } from "./commands/checkpoint";
12
+ import { serveCommand } from "./commands/serve";
13
+ import { demoCommand } from "./commands/demo";
14
+
15
+ const args = process.argv.slice(2);
16
+ const command = args[0];
17
+
18
+ function usage() {
19
+ console.log(`local-rag — Local RAG for semantic file search
20
+
21
+ Usage:
22
+ local-rag serve Start MCP server (stdio)
23
+ local-rag init [dir] Create default .rag/config.json
24
+ local-rag index [dir] [--patterns ...] Index files in directory
25
+ local-rag search <query> [--top N] Search indexed files
26
+ local-rag read <query> [--top N] Read relevant chunks (full content)
27
+ [--threshold T] [--dir D]
28
+ local-rag status [dir] Show index stats
29
+ local-rag remove <file> [dir] Remove file from index
30
+ local-rag analytics [dir] [--days N] Show search usage analytics
31
+ local-rag benchmark <file> [--dir D] Run search quality benchmark
32
+ [--top N]
33
+ local-rag eval <file> [--dir D] Run A/B eval (with/without RAG)
34
+ [--top N] [--out F]
35
+ local-rag map [dir] [--focus F] Generate project dependency graph
36
+ [--zoom file|directory] (Mermaid format)
37
+ [--max N]
38
+ local-rag conversation search <query> Search conversation history
39
+ [--dir D] [--top N]
40
+ local-rag conversation sessions List indexed sessions
41
+ [--dir D]
42
+ local-rag conversation index [--dir D] Index all sessions for a project
43
+ local-rag checkpoint create <type> Create a checkpoint
44
+ <title> <summary>
45
+ [--dir D] [--files f1,f2] [--tags t1,t2]
46
+ local-rag checkpoint list [--dir D] List checkpoints
47
+ [--type T] [--top N]
48
+ local-rag checkpoint search <query> Search checkpoints
49
+ [--dir D] [--type T] [--top N]
50
+ local-rag demo [dir] Run interactive feature demo
51
+
52
+ Options:
53
+ dir Project directory (default: current directory)
54
+ --top N Number of results (default: 5)
55
+ --patterns Comma-separated glob patterns to include`);
56
+ }
57
+
58
+ function getFlag(flag: string): string | undefined {
59
+ const idx = args.indexOf(flag);
60
+ return idx !== -1 ? args[idx + 1] : undefined;
61
+ }
62
+
63
+ export async function main() {
64
+ if (!command || command === "--help" || command === "-h") {
65
+ usage();
66
+ process.exit(0);
67
+ }
68
+
69
+ switch (command) {
70
+ case "serve":
71
+ await serveCommand();
72
+ break;
73
+ case "init":
74
+ await initCommand(args, getFlag);
75
+ break;
76
+ case "index":
77
+ await indexCommand(args, getFlag);
78
+ break;
79
+ case "search":
80
+ await searchCommand(args, getFlag);
81
+ break;
82
+ case "read":
83
+ await readCommand(args, getFlag);
84
+ break;
85
+ case "status":
86
+ await statusCommand(args);
87
+ break;
88
+ case "remove":
89
+ await removeCommand(args);
90
+ break;
91
+ case "analytics":
92
+ await analyticsCommand(args, getFlag);
93
+ break;
94
+ case "map":
95
+ await mapCommand(args, getFlag);
96
+ break;
97
+ case "benchmark":
98
+ await benchmarkCommand(args, getFlag);
99
+ break;
100
+ case "eval":
101
+ await evalCommand(args, getFlag);
102
+ break;
103
+ case "conversation":
104
+ await conversationCommand(args, getFlag);
105
+ break;
106
+ case "checkpoint":
107
+ await checkpointCommand(args, getFlag);
108
+ break;
109
+ case "demo":
110
+ await demoCommand(args);
111
+ break;
112
+ default:
113
+ console.error(`Unknown command: ${command}`);
114
+ usage();
115
+ process.exit(1);
116
+ }
117
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * CLI progress callback for indexDirectory.
3
+ * Transient messages (e.g. batch progress) overwrite the current line.
4
+ * Persistent messages print on a new line.
5
+ */
6
+ let lastWasTransient = false;
7
+
8
+ export function cliProgress(msg: string, opts?: { transient?: boolean }): void {
9
+ if (opts?.transient) {
10
+ const cols = process.stdout.columns || 80;
11
+ const truncated = msg.length > cols - 1 ? msg.slice(0, cols - 4) + "..." : msg;
12
+ process.stdout.write(`\r${truncated.padEnd(cols - 1)}`);
13
+ lastWasTransient = true;
14
+ } else {
15
+ if (lastWasTransient) {
16
+ process.stdout.write("\r" + " ".repeat((process.stdout.columns || 80) - 1) + "\r");
17
+ lastWasTransient = false;
18
+ }
19
+ console.log(msg);
20
+ }
21
+ }
@@ -0,0 +1,192 @@
1
+ import { existsSync } from "fs";
2
+ import { readFile, writeFile, mkdir } from "fs/promises";
3
+ import { join, resolve } from "path";
4
+ import { createInterface } from "readline";
5
+ import { writeDefaultConfig } from "../config";
6
+
7
+ const MARKER = "<!-- local-rag -->";
8
+
9
+ const INSTRUCTIONS_BLOCK = `## Using local-rag tools
10
+
11
+ This project has a local RAG index (local-rag). Use these MCP tools:
12
+
13
+ - **\`search\`**: Discover which files are relevant to a topic. Returns file paths
14
+ with snippet previews — use this when you need to know *where* something is.
15
+ - **\`read_relevant\`**: Get the actual content of relevant semantic chunks —
16
+ individual functions, classes, or markdown sections — ranked by relevance.
17
+ Results include exact line ranges (\`src/db.ts:42-67\`) so you can navigate
18
+ directly to the edit location. Use this instead of \`search\` + \`Read\` when
19
+ you need the content itself. Two chunks from the same file can both appear
20
+ (no file deduplication).
21
+ - **\`project_map\`**: When you need to understand how files relate to each other,
22
+ generate a dependency graph. Use \`focus\` to zoom into a specific file's
23
+ neighborhood. This is faster than reading import statements across many files.
24
+ - **\`search_conversation\`**: Search past conversation history to recall previous
25
+ decisions, discussions, and tool outputs. Use this before re-investigating
26
+ something that may have been discussed in an earlier session.
27
+ - **\`create_checkpoint\`**: Mark important moments — decisions, milestones,
28
+ blockers, direction changes. Do this liberally: after completing any feature
29
+ or task, after adding/modifying tools, after key technical decisions, before
30
+ and after large refactors, or when changing direction. If in doubt, create one.
31
+ - **\`list_checkpoints\`** / **\`search_checkpoints\`**: Review or search past
32
+ checkpoints to understand project history and prior decisions.
33
+ - **\`index_files\`**: If you've created or modified files and want them searchable,
34
+ re-index the project directory.
35
+ - **\`search_analytics\`**: Check what queries return no results or low-relevance
36
+ results — this reveals documentation gaps.
37
+ - **\`search_symbols\`**: When you know a symbol name (function, class, type, etc.),
38
+ find it directly by name instead of using semantic search.
39
+ - **\`find_usages\`**: Before changing a function or type, find all its call sites.
40
+ Use this to understand the blast radius of a rename or API change. Faster and
41
+ more reliable than semantic search for finding usages.
42
+ - **\`git_context\`**: At the start of a session (or any time you need orientation),
43
+ call this to see what files have already been modified, recent commits, and
44
+ which changed files are in the index. Avoids redundant searches and conflicting
45
+ edits on already-modified files.
46
+ - **\`annotate\`**: Attach a persistent note to a file or symbol — "known race
47
+ condition", "don't refactor until auth rewrite lands", etc. Notes appear as
48
+ \`[NOTE]\` blocks inline in \`read_relevant\` results automatically.
49
+ - **\`get_annotations\`**: Retrieve all notes for a file, or search semantically
50
+ across all annotations to find relevant caveats before editing.
51
+ - **\`write_relevant\`**: Before adding new code or docs, find the best insertion
52
+ point — returns the most semantically appropriate file and anchor.`;
53
+
54
+ const MDC_BLOCK = `${MARKER}
55
+ ---
56
+ description: local-rag tool usage instructions
57
+ alwaysApply: true
58
+ ---
59
+
60
+ ${INSTRUCTIONS_BLOCK}`;
61
+
62
+ const MARKDOWN_BLOCK = `${MARKER}
63
+ ${INSTRUCTIONS_BLOCK}`;
64
+
65
+ export interface SetupResult {
66
+ actions: string[];
67
+ }
68
+
69
+ export async function ensureConfig(projectDir: string): Promise<string | null> {
70
+ const configPath = join(projectDir, ".rag", "config.json");
71
+ if (existsSync(configPath)) return null;
72
+ await writeDefaultConfig(projectDir);
73
+ return "Created .rag/config.json";
74
+ }
75
+
76
+ export async function ensureGitignore(projectDir: string): Promise<string | null> {
77
+ const gitignorePath = join(projectDir, ".gitignore");
78
+ if (!existsSync(gitignorePath)) {
79
+ await writeFile(gitignorePath, "# local-rag index\n.rag/\n");
80
+ return "Created .gitignore with .rag/";
81
+ }
82
+ const content = await readFile(gitignorePath, "utf-8");
83
+ if (content.split("\n").some(line => line.trim() === ".rag/" || line.trim() === ".rag")) {
84
+ return null;
85
+ }
86
+ await writeFile(gitignorePath, content.trimEnd() + "\n\n# local-rag index\n.rag/\n");
87
+ return "Added .rag/ to .gitignore";
88
+ }
89
+
90
+ async function injectMarkdown(filePath: string, block: string): Promise<string | null> {
91
+ if (existsSync(filePath)) {
92
+ const content = await readFile(filePath, "utf-8");
93
+ if (content.includes(MARKER)) return null;
94
+ await writeFile(filePath, content.trimEnd() + "\n\n" + block + "\n");
95
+ return `Updated ${filePath}`;
96
+ }
97
+ await writeFile(filePath, block + "\n");
98
+ return `Created ${filePath}`;
99
+ }
100
+
101
+ async function injectMdc(filePath: string, dir: string): Promise<string | null> {
102
+ if (!existsSync(dir)) return null;
103
+ if (existsSync(filePath)) {
104
+ const content = await readFile(filePath, "utf-8");
105
+ if (content.includes(MARKER)) return null;
106
+ }
107
+ await mkdir(dir, { recursive: true });
108
+ await writeFile(filePath, MDC_BLOCK + "\n");
109
+ return `Created ${filePath}`;
110
+ }
111
+
112
+ export async function ensureAgentInstructions(projectDir: string): Promise<string[]> {
113
+ const actions: string[] = [];
114
+
115
+ // Claude Code — always create/update
116
+ const claudeAction = await injectMarkdown(join(projectDir, "CLAUDE.md"), MARKDOWN_BLOCK);
117
+ if (claudeAction) actions.push(claudeAction);
118
+
119
+ // Cursor — only if .cursor/ exists
120
+ const cursorAction = await injectMdc(
121
+ join(projectDir, ".cursor", "rules", "local-rag.mdc"),
122
+ join(projectDir, ".cursor")
123
+ );
124
+ if (cursorAction) actions.push(cursorAction);
125
+
126
+ // Windsurf — only if .windsurf/ exists
127
+ const windsurfAction = await injectMdc(
128
+ join(projectDir, ".windsurf", "rules", "local-rag.mdc"),
129
+ join(projectDir, ".windsurf")
130
+ );
131
+ if (windsurfAction) actions.push(windsurfAction);
132
+
133
+ // GitHub Copilot — only if .github/ exists
134
+ if (existsSync(join(projectDir, ".github"))) {
135
+ const copilotAction = await injectMarkdown(
136
+ join(projectDir, ".github", "copilot-instructions.md"),
137
+ MARKDOWN_BLOCK
138
+ );
139
+ if (copilotAction) actions.push(copilotAction);
140
+ }
141
+
142
+ return actions;
143
+ }
144
+
145
+ export function mcpConfigSnippet(projectDir: string): string {
146
+ const abs = resolve(projectDir);
147
+ return JSON.stringify({
148
+ "local-rag": {
149
+ command: "bunx",
150
+ args: ["@winci/local-rag@latest"],
151
+ env: { RAG_PROJECT_DIR: abs },
152
+ },
153
+ }, null, 2);
154
+ }
155
+
156
+ export function detectAgentHints(projectDir: string): string[] {
157
+ const hints: string[] = [];
158
+ if (existsSync(join(projectDir, ".mcp.json")))
159
+ hints.push("Claude Code: add to .mcp.json → mcpServers");
160
+ if (existsSync(join(projectDir, ".cursor")))
161
+ hints.push("Cursor: add to .cursor/mcp.json → mcpServers");
162
+ if (existsSync(join(projectDir, ".windsurf")))
163
+ hints.push("Windsurf: add to .windsurf/mcp.json → mcpServers");
164
+ if (hints.length === 0)
165
+ hints.push("Add to your agent's MCP config under mcpServers:");
166
+ return hints;
167
+ }
168
+
169
+ export function confirm(question: string): Promise<boolean> {
170
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
171
+ return new Promise((res) => {
172
+ rl.question(question, (answer) => {
173
+ rl.close();
174
+ res(answer.trim().toLowerCase() !== "n");
175
+ });
176
+ });
177
+ }
178
+
179
+ export async function runSetup(projectDir: string): Promise<SetupResult> {
180
+ const actions: string[] = [];
181
+
182
+ const configAction = await ensureConfig(projectDir);
183
+ if (configAction) actions.push(configAction);
184
+
185
+ const instructionActions = await ensureAgentInstructions(projectDir);
186
+ actions.push(...instructionActions);
187
+
188
+ const gitignoreAction = await ensureGitignore(projectDir);
189
+ if (gitignoreAction) actions.push(gitignoreAction);
190
+
191
+ return { actions };
192
+ }
@@ -0,0 +1,101 @@
1
+ import { readFile, writeFile, mkdir } from "fs/promises";
2
+ import { join } from "path";
3
+ import { existsSync } from "fs";
4
+ import { z } from "zod";
5
+ import { log } from "../utils/log";
6
+
7
+ const RagConfigSchema = z.object({
8
+ include: z.array(z.string()).default([]),
9
+ exclude: z.array(z.string()).default([]),
10
+ chunkSize: z.number().int().min(64).default(512),
11
+ chunkOverlap: z.number().int().min(0).default(50),
12
+ hybridWeight: z.number().min(0).max(1).default(0.7),
13
+ searchTopK: z.number().int().min(1).default(5),
14
+ indexBatchSize: z.number().int().min(1).optional(),
15
+ indexThreads: z.number().int().min(1).optional(),
16
+ enableReranking: z.boolean().default(true),
17
+ benchmarkTopK: z.number().int().min(1).default(5),
18
+ benchmarkMinRecall: z.number().min(0).max(1).default(0.8),
19
+ benchmarkMinMrr: z.number().min(0).max(1).default(0.6),
20
+ });
21
+
22
+ export type RagConfig = z.infer<typeof RagConfigSchema>;
23
+
24
+ const DEFAULT_CONFIG: RagConfig = {
25
+ include: [
26
+ // Markdown & plain text
27
+ "**/*.md", "**/*.txt",
28
+ // Build / task runners (no extension or prefix-named)
29
+ "**/Makefile", "**/makefile", "**/GNUmakefile",
30
+ "**/Dockerfile", "**/Dockerfile.*",
31
+ "**/Jenkinsfile", "**/Jenkinsfile.*",
32
+ "**/Vagrantfile", "**/Gemfile", "**/Rakefile",
33
+ "**/Brewfile", "**/Procfile",
34
+ // Structured data & config
35
+ "**/*.yaml", "**/*.yml",
36
+ "**/*.json",
37
+ "**/*.toml",
38
+ "**/*.xml",
39
+ // Shell & scripting
40
+ "**/*.sh", "**/*.bash", "**/*.zsh",
41
+ // Infrastructure / schema languages
42
+ "**/*.tf",
43
+ "**/*.proto",
44
+ "**/*.graphql", "**/*.gql",
45
+ "**/*.sql",
46
+ "**/*.mod",
47
+ "**/*.bru",
48
+ "**/*.css", "**/*.scss", "**/*.less",
49
+ ],
50
+ exclude: ["node_modules/**", ".git/**", "dist/**", ".rag/**"],
51
+ chunkSize: 512,
52
+ chunkOverlap: 50,
53
+ hybridWeight: 0.7,
54
+ searchTopK: 5,
55
+ enableReranking: true,
56
+ indexBatchSize: 50,
57
+ benchmarkTopK: 5,
58
+ benchmarkMinRecall: 0.8,
59
+ benchmarkMinMrr: 0.6,
60
+ };
61
+
62
+ /**
63
+ * Load config from .rag/config.json, merged with defaults.
64
+ * Note: array fields (include, exclude) from user config *replace* the defaults
65
+ * entirely — they are not merged. This lets users fully control which files are indexed.
66
+ */
67
+ export async function loadConfig(projectDir: string): Promise<RagConfig> {
68
+ const configPath = join(projectDir, ".rag", "config.json");
69
+
70
+ if (!existsSync(configPath)) {
71
+ return { ...DEFAULT_CONFIG };
72
+ }
73
+
74
+ const raw = await readFile(configPath, "utf-8");
75
+ let userConfig: unknown;
76
+ try {
77
+ userConfig = JSON.parse(raw);
78
+ } catch {
79
+ log.warn(`Invalid JSON in ${configPath}, using defaults`, "config");
80
+ return { ...DEFAULT_CONFIG };
81
+ }
82
+
83
+ const merged = { ...DEFAULT_CONFIG, ...(userConfig as Record<string, unknown>) };
84
+ const result = RagConfigSchema.safeParse(merged);
85
+
86
+ if (!result.success) {
87
+ const issues = result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ");
88
+ log.warn(`Config validation: ${issues}. Using defaults for invalid fields.`, "config");
89
+ return { ...DEFAULT_CONFIG };
90
+ }
91
+
92
+ return result.data;
93
+ }
94
+
95
+ export async function writeDefaultConfig(projectDir: string): Promise<string> {
96
+ const ragDir = join(projectDir, ".rag");
97
+ await mkdir(ragDir, { recursive: true });
98
+ const configPath = join(ragDir, "config.json");
99
+ await writeFile(configPath, JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n");
100
+ return configPath;
101
+ }
@@ -0,0 +1,147 @@
1
+ import { watch, statSync } from "fs";
2
+ import { readJSONL, parseTurns, buildTurnText, type ParsedTurn } from "./parser";
3
+ import { chunkText } from "../indexing/chunker";
4
+ import { embedBatch } from "../embeddings/embed";
5
+ import { type RagDB } from "../db";
6
+ import { type Watcher } from "../indexing/watcher";
7
+
8
+ const TAIL_DEBOUNCE_MS = 1500;
9
+
10
+ /**
11
+ * Index all turns from a JSONL transcript file.
12
+ * Returns the number of new turns indexed and the final byte offset.
13
+ */
14
+ export async function indexConversation(
15
+ jsonlPath: string,
16
+ sessionId: string,
17
+ db: RagDB,
18
+ fromOffset = 0,
19
+ startTurnIndex = 0,
20
+ onProgress?: (msg: string) => void
21
+ ): Promise<{ turnsIndexed: number; newOffset: number; totalTokens: number }> {
22
+ const { entries, newOffset } = readJSONL(jsonlPath, fromOffset);
23
+
24
+ if (entries.length === 0) {
25
+ return { turnsIndexed: 0, newOffset: fromOffset, totalTokens: 0 };
26
+ }
27
+
28
+ const turns = parseTurns(entries, sessionId, startTurnIndex);
29
+
30
+ let turnsIndexed = 0;
31
+ let totalTokens = 0;
32
+
33
+ for (const turn of turns) {
34
+ const indexed = await indexTurn(turn, db);
35
+ if (indexed) {
36
+ turnsIndexed++;
37
+ onProgress?.(`Indexed turn ${turn.turnIndex} (${turn.toolsUsed.join(", ") || "no tools"})`);
38
+ }
39
+ totalTokens += turn.tokenCost;
40
+ }
41
+
42
+ // Update session tracking
43
+ const existingSession = db.getSession(sessionId);
44
+ const totalTurnCount = (existingSession?.turnCount || 0) + turnsIndexed;
45
+ const stat = statSync(jsonlPath);
46
+
47
+ db.upsertSession(sessionId, jsonlPath, turns[0]?.timestamp || new Date().toISOString(), stat.mtimeMs, newOffset);
48
+ db.updateSessionStats(sessionId, totalTurnCount, totalTokens, newOffset);
49
+
50
+ return { turnsIndexed, newOffset, totalTokens };
51
+ }
52
+
53
+ /**
54
+ * Index a single parsed turn: chunk the text, embed chunks, store in DB.
55
+ */
56
+ async function indexTurn(turn: ParsedTurn, db: RagDB): Promise<boolean> {
57
+ const text = buildTurnText(turn);
58
+ if (!text.trim()) return false;
59
+
60
+ // Chunk the turn text (use .md extension for paragraph-style splitting)
61
+ const textChunks = await chunkText(text, ".md", 512, 50);
62
+
63
+ // Embed all chunks in one batch
64
+ const embeddings = await embedBatch(textChunks.map(c => c.text));
65
+ const embeddedChunks = textChunks.map((chunk, i) => ({
66
+ snippet: chunk.text,
67
+ embedding: embeddings[i],
68
+ }));
69
+
70
+ // Store in DB — returns 0 if this turn was already indexed (duplicate)
71
+ const turnId = db.insertTurn(
72
+ turn.sessionId,
73
+ turn.turnIndex,
74
+ turn.timestamp,
75
+ turn.userText,
76
+ turn.assistantText,
77
+ turn.toolsUsed,
78
+ turn.filesReferenced,
79
+ turn.tokenCost,
80
+ turn.summary,
81
+ embeddedChunks
82
+ );
83
+
84
+ return turnId !== 0;
85
+ }
86
+
87
+ /**
88
+ * Start tailing a JSONL file for live conversation indexing.
89
+ * Watches for file changes and indexes new turns as they appear.
90
+ */
91
+ export function startConversationTail(
92
+ jsonlPath: string,
93
+ sessionId: string,
94
+ db: RagDB,
95
+ onEvent?: (msg: string) => void
96
+ ): Watcher {
97
+ let currentOffset = 0;
98
+ let currentTurnIndex = 0;
99
+ let pending: NodeJS.Timeout | null = null;
100
+
101
+ // Load existing state
102
+ const session = db.getSession(sessionId);
103
+ if (session) {
104
+ currentOffset = session.readOffset;
105
+ currentTurnIndex = session.turnCount;
106
+ }
107
+
108
+ async function processNewData() {
109
+ try {
110
+ const result = await indexConversation(
111
+ jsonlPath,
112
+ sessionId,
113
+ db,
114
+ currentOffset,
115
+ currentTurnIndex,
116
+ onEvent
117
+ );
118
+
119
+ if (result.turnsIndexed > 0) {
120
+ currentOffset = result.newOffset;
121
+ currentTurnIndex += result.turnsIndexed;
122
+ onEvent?.(`Conversation: ${result.turnsIndexed} new turns indexed (total: ${currentTurnIndex})`);
123
+ }
124
+ } catch (err) {
125
+ onEvent?.(`Conversation index error: ${(err as Error).message}`);
126
+ }
127
+ }
128
+
129
+ const watcher = watch(jsonlPath, () => {
130
+ if (pending) clearTimeout(pending);
131
+ pending = setTimeout(() => {
132
+ pending = null;
133
+ processNewData();
134
+ }, TAIL_DEBOUNCE_MS);
135
+ });
136
+
137
+ // Do initial index
138
+ processNewData();
139
+
140
+ onEvent?.(`Tailing conversation: ${jsonlPath}`);
141
+ return {
142
+ close() {
143
+ if (pending) { clearTimeout(pending); pending = null; }
144
+ watcher.close();
145
+ },
146
+ };
147
+ }