@winci/local-rag 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +24 -0
- package/.mcp.json +11 -0
- package/LICENSE +21 -0
- package/README.md +567 -0
- package/hooks/hooks.json +25 -0
- package/hooks/scripts/reindex-file.sh +19 -0
- package/hooks/scripts/session-start.sh +11 -0
- package/package.json +52 -0
- package/skills/local-rag/SKILL.md +42 -0
- package/src/cli/commands/analytics.ts +58 -0
- package/src/cli/commands/benchmark.ts +30 -0
- package/src/cli/commands/checkpoint.ts +85 -0
- package/src/cli/commands/conversation.ts +102 -0
- package/src/cli/commands/demo.ts +119 -0
- package/src/cli/commands/eval.ts +31 -0
- package/src/cli/commands/index-cmd.ts +26 -0
- package/src/cli/commands/init.ts +35 -0
- package/src/cli/commands/map.ts +21 -0
- package/src/cli/commands/remove.ts +15 -0
- package/src/cli/commands/search-cmd.ts +59 -0
- package/src/cli/commands/serve.ts +5 -0
- package/src/cli/commands/status.ts +13 -0
- package/src/cli/index.ts +117 -0
- package/src/cli/progress.ts +21 -0
- package/src/cli/setup.ts +192 -0
- package/src/config/index.ts +101 -0
- package/src/conversation/indexer.ts +147 -0
- package/src/conversation/parser.ts +323 -0
- package/src/db/analytics.ts +116 -0
- package/src/db/annotations.ts +161 -0
- package/src/db/checkpoints.ts +166 -0
- package/src/db/conversation.ts +241 -0
- package/src/db/files.ts +146 -0
- package/src/db/graph.ts +250 -0
- package/src/db/index.ts +468 -0
- package/src/db/search.ts +244 -0
- package/src/db/types.ts +85 -0
- package/src/embeddings/embed.ts +73 -0
- package/src/graph/resolver.ts +305 -0
- package/src/indexing/chunker.ts +523 -0
- package/src/indexing/indexer.ts +263 -0
- package/src/indexing/parse.ts +99 -0
- package/src/indexing/watcher.ts +84 -0
- package/src/main.ts +8 -0
- package/src/search/benchmark.ts +139 -0
- package/src/search/eval.ts +171 -0
- package/src/search/hybrid.ts +194 -0
- package/src/search/reranker.ts +99 -0
- package/src/search/usages.ts +27 -0
- package/src/server/index.ts +126 -0
- package/src/tools/analytics-tools.ts +58 -0
- package/src/tools/annotation-tools.ts +89 -0
- package/src/tools/checkpoint-tools.ts +147 -0
- package/src/tools/conversation-tools.ts +86 -0
- package/src/tools/git-tools.ts +103 -0
- package/src/tools/graph-tools.ts +163 -0
- package/src/tools/index-tools.ts +91 -0
- package/src/tools/index.ts +33 -0
- package/src/tools/search.ts +238 -0
- package/src/types.ts +9 -0
- package/src/utils/log.ts +39 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { resolve } from "path";
|
|
2
|
+
import { RagDB } from "../../db";
|
|
3
|
+
import { loadConfig } from "../../config";
|
|
4
|
+
import { search, searchChunks } from "../../search/hybrid";
|
|
5
|
+
|
|
6
|
+
export async function searchCommand(args: string[], getFlag: (flag: string) => string | undefined) {
|
|
7
|
+
const query = args[1];
|
|
8
|
+
if (!query) {
|
|
9
|
+
console.error("Usage: local-rag search <query> [--top N]");
|
|
10
|
+
process.exit(1);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const dir = resolve(getFlag("--dir") || ".");
|
|
14
|
+
const db = new RagDB(dir);
|
|
15
|
+
const config = await loadConfig(dir);
|
|
16
|
+
const top = parseInt(getFlag("--top") || String(config.searchTopK), 10);
|
|
17
|
+
|
|
18
|
+
const results = await search(query, db, top, 0, config.hybridWeight, config.enableReranking);
|
|
19
|
+
|
|
20
|
+
if (results.length === 0) {
|
|
21
|
+
console.log("No results found. Has the directory been indexed?");
|
|
22
|
+
} else {
|
|
23
|
+
for (const r of results) {
|
|
24
|
+
console.log(`${r.score.toFixed(4)} ${r.path}`);
|
|
25
|
+
const preview = r.snippets[0]?.slice(0, 120).replace(/\n/g, " ");
|
|
26
|
+
console.log(` ${preview}...`);
|
|
27
|
+
console.log();
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
db.close();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export async function readCommand(args: string[], getFlag: (flag: string) => string | undefined) {
|
|
34
|
+
const query = args[1];
|
|
35
|
+
if (!query) {
|
|
36
|
+
console.error("Usage: local-rag read <query> [--top N] [--threshold T] [--dir D]");
|
|
37
|
+
process.exit(1);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const dir = resolve(getFlag("--dir") || ".");
|
|
41
|
+
const db = new RagDB(dir);
|
|
42
|
+
const config = await loadConfig(dir);
|
|
43
|
+
const top = parseInt(getFlag("--top") || "8", 10);
|
|
44
|
+
const threshold = parseFloat(getFlag("--threshold") || "0.3");
|
|
45
|
+
|
|
46
|
+
const results = await searchChunks(query, db, top, threshold, config.hybridWeight, config.enableReranking);
|
|
47
|
+
|
|
48
|
+
if (results.length === 0) {
|
|
49
|
+
console.log("No relevant chunks found. Has the directory been indexed?");
|
|
50
|
+
} else {
|
|
51
|
+
for (const r of results) {
|
|
52
|
+
const entity = r.entityName ? ` • ${r.entityName}` : "";
|
|
53
|
+
console.log(`[${r.score.toFixed(2)}] ${r.path}${entity}`);
|
|
54
|
+
console.log(r.content);
|
|
55
|
+
console.log("\n---\n");
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
db.close();
|
|
59
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { resolve } from "path";
|
|
2
|
+
import { RagDB } from "../../db";
|
|
3
|
+
|
|
4
|
+
export async function statusCommand(args: string[]) {
|
|
5
|
+
const dir = resolve(args[1] && !args[1].startsWith("--") ? args[1] : ".");
|
|
6
|
+
const db = new RagDB(dir);
|
|
7
|
+
const status = db.getStatus();
|
|
8
|
+
console.log(`Index status for ${dir}:`);
|
|
9
|
+
console.log(` Files: ${status.totalFiles}`);
|
|
10
|
+
console.log(` Chunks: ${status.totalChunks}`);
|
|
11
|
+
console.log(` Last indexed: ${status.lastIndexed || "never"}`);
|
|
12
|
+
db.close();
|
|
13
|
+
}
|
package/src/cli/index.ts
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { initCommand } from "./commands/init";
|
|
2
|
+
import { indexCommand } from "./commands/index-cmd";
|
|
3
|
+
import { searchCommand, readCommand } from "./commands/search-cmd";
|
|
4
|
+
import { statusCommand } from "./commands/status";
|
|
5
|
+
import { removeCommand } from "./commands/remove";
|
|
6
|
+
import { analyticsCommand } from "./commands/analytics";
|
|
7
|
+
import { mapCommand } from "./commands/map";
|
|
8
|
+
import { benchmarkCommand } from "./commands/benchmark";
|
|
9
|
+
import { evalCommand } from "./commands/eval";
|
|
10
|
+
import { conversationCommand } from "./commands/conversation";
|
|
11
|
+
import { checkpointCommand } from "./commands/checkpoint";
|
|
12
|
+
import { serveCommand } from "./commands/serve";
|
|
13
|
+
import { demoCommand } from "./commands/demo";
|
|
14
|
+
|
|
15
|
+
const args = process.argv.slice(2);
|
|
16
|
+
const command = args[0];
|
|
17
|
+
|
|
18
|
+
function usage() {
|
|
19
|
+
console.log(`local-rag — Local RAG for semantic file search
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
local-rag serve Start MCP server (stdio)
|
|
23
|
+
local-rag init [dir] Create default .rag/config.json
|
|
24
|
+
local-rag index [dir] [--patterns ...] Index files in directory
|
|
25
|
+
local-rag search <query> [--top N] Search indexed files
|
|
26
|
+
local-rag read <query> [--top N] Read relevant chunks (full content)
|
|
27
|
+
[--threshold T] [--dir D]
|
|
28
|
+
local-rag status [dir] Show index stats
|
|
29
|
+
local-rag remove <file> [dir] Remove file from index
|
|
30
|
+
local-rag analytics [dir] [--days N] Show search usage analytics
|
|
31
|
+
local-rag benchmark <file> [--dir D] Run search quality benchmark
|
|
32
|
+
[--top N]
|
|
33
|
+
local-rag eval <file> [--dir D] Run A/B eval (with/without RAG)
|
|
34
|
+
[--top N] [--out F]
|
|
35
|
+
local-rag map [dir] [--focus F] Generate project dependency graph
|
|
36
|
+
[--zoom file|directory] (Mermaid format)
|
|
37
|
+
[--max N]
|
|
38
|
+
local-rag conversation search <query> Search conversation history
|
|
39
|
+
[--dir D] [--top N]
|
|
40
|
+
local-rag conversation sessions List indexed sessions
|
|
41
|
+
[--dir D]
|
|
42
|
+
local-rag conversation index [--dir D] Index all sessions for a project
|
|
43
|
+
local-rag checkpoint create <type> Create a checkpoint
|
|
44
|
+
<title> <summary>
|
|
45
|
+
[--dir D] [--files f1,f2] [--tags t1,t2]
|
|
46
|
+
local-rag checkpoint list [--dir D] List checkpoints
|
|
47
|
+
[--type T] [--top N]
|
|
48
|
+
local-rag checkpoint search <query> Search checkpoints
|
|
49
|
+
[--dir D] [--type T] [--top N]
|
|
50
|
+
local-rag demo [dir] Run interactive feature demo
|
|
51
|
+
|
|
52
|
+
Options:
|
|
53
|
+
dir Project directory (default: current directory)
|
|
54
|
+
--top N Number of results (default: 5)
|
|
55
|
+
--patterns Comma-separated glob patterns to include`);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function getFlag(flag: string): string | undefined {
|
|
59
|
+
const idx = args.indexOf(flag);
|
|
60
|
+
return idx !== -1 ? args[idx + 1] : undefined;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export async function main() {
|
|
64
|
+
if (!command || command === "--help" || command === "-h") {
|
|
65
|
+
usage();
|
|
66
|
+
process.exit(0);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
switch (command) {
|
|
70
|
+
case "serve":
|
|
71
|
+
await serveCommand();
|
|
72
|
+
break;
|
|
73
|
+
case "init":
|
|
74
|
+
await initCommand(args, getFlag);
|
|
75
|
+
break;
|
|
76
|
+
case "index":
|
|
77
|
+
await indexCommand(args, getFlag);
|
|
78
|
+
break;
|
|
79
|
+
case "search":
|
|
80
|
+
await searchCommand(args, getFlag);
|
|
81
|
+
break;
|
|
82
|
+
case "read":
|
|
83
|
+
await readCommand(args, getFlag);
|
|
84
|
+
break;
|
|
85
|
+
case "status":
|
|
86
|
+
await statusCommand(args);
|
|
87
|
+
break;
|
|
88
|
+
case "remove":
|
|
89
|
+
await removeCommand(args);
|
|
90
|
+
break;
|
|
91
|
+
case "analytics":
|
|
92
|
+
await analyticsCommand(args, getFlag);
|
|
93
|
+
break;
|
|
94
|
+
case "map":
|
|
95
|
+
await mapCommand(args, getFlag);
|
|
96
|
+
break;
|
|
97
|
+
case "benchmark":
|
|
98
|
+
await benchmarkCommand(args, getFlag);
|
|
99
|
+
break;
|
|
100
|
+
case "eval":
|
|
101
|
+
await evalCommand(args, getFlag);
|
|
102
|
+
break;
|
|
103
|
+
case "conversation":
|
|
104
|
+
await conversationCommand(args, getFlag);
|
|
105
|
+
break;
|
|
106
|
+
case "checkpoint":
|
|
107
|
+
await checkpointCommand(args, getFlag);
|
|
108
|
+
break;
|
|
109
|
+
case "demo":
|
|
110
|
+
await demoCommand(args);
|
|
111
|
+
break;
|
|
112
|
+
default:
|
|
113
|
+
console.error(`Unknown command: ${command}`);
|
|
114
|
+
usage();
|
|
115
|
+
process.exit(1);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI progress callback for indexDirectory.
|
|
3
|
+
* Transient messages (e.g. batch progress) overwrite the current line.
|
|
4
|
+
* Persistent messages print on a new line.
|
|
5
|
+
*/
|
|
6
|
+
let lastWasTransient = false;
|
|
7
|
+
|
|
8
|
+
export function cliProgress(msg: string, opts?: { transient?: boolean }): void {
|
|
9
|
+
if (opts?.transient) {
|
|
10
|
+
const cols = process.stdout.columns || 80;
|
|
11
|
+
const truncated = msg.length > cols - 1 ? msg.slice(0, cols - 4) + "..." : msg;
|
|
12
|
+
process.stdout.write(`\r${truncated.padEnd(cols - 1)}`);
|
|
13
|
+
lastWasTransient = true;
|
|
14
|
+
} else {
|
|
15
|
+
if (lastWasTransient) {
|
|
16
|
+
process.stdout.write("\r" + " ".repeat((process.stdout.columns || 80) - 1) + "\r");
|
|
17
|
+
lastWasTransient = false;
|
|
18
|
+
}
|
|
19
|
+
console.log(msg);
|
|
20
|
+
}
|
|
21
|
+
}
|
package/src/cli/setup.ts
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { existsSync } from "fs";
|
|
2
|
+
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
3
|
+
import { join, resolve } from "path";
|
|
4
|
+
import { createInterface } from "readline";
|
|
5
|
+
import { writeDefaultConfig } from "../config";
|
|
6
|
+
|
|
7
|
+
const MARKER = "<!-- local-rag -->";
|
|
8
|
+
|
|
9
|
+
const INSTRUCTIONS_BLOCK = `## Using local-rag tools
|
|
10
|
+
|
|
11
|
+
This project has a local RAG index (local-rag). Use these MCP tools:
|
|
12
|
+
|
|
13
|
+
- **\`search\`**: Discover which files are relevant to a topic. Returns file paths
|
|
14
|
+
with snippet previews — use this when you need to know *where* something is.
|
|
15
|
+
- **\`read_relevant\`**: Get the actual content of relevant semantic chunks —
|
|
16
|
+
individual functions, classes, or markdown sections — ranked by relevance.
|
|
17
|
+
Results include exact line ranges (\`src/db.ts:42-67\`) so you can navigate
|
|
18
|
+
directly to the edit location. Use this instead of \`search\` + \`Read\` when
|
|
19
|
+
you need the content itself. Two chunks from the same file can both appear
|
|
20
|
+
(no file deduplication).
|
|
21
|
+
- **\`project_map\`**: When you need to understand how files relate to each other,
|
|
22
|
+
generate a dependency graph. Use \`focus\` to zoom into a specific file's
|
|
23
|
+
neighborhood. This is faster than reading import statements across many files.
|
|
24
|
+
- **\`search_conversation\`**: Search past conversation history to recall previous
|
|
25
|
+
decisions, discussions, and tool outputs. Use this before re-investigating
|
|
26
|
+
something that may have been discussed in an earlier session.
|
|
27
|
+
- **\`create_checkpoint\`**: Mark important moments — decisions, milestones,
|
|
28
|
+
blockers, direction changes. Do this liberally: after completing any feature
|
|
29
|
+
or task, after adding/modifying tools, after key technical decisions, before
|
|
30
|
+
and after large refactors, or when changing direction. If in doubt, create one.
|
|
31
|
+
- **\`list_checkpoints\`** / **\`search_checkpoints\`**: Review or search past
|
|
32
|
+
checkpoints to understand project history and prior decisions.
|
|
33
|
+
- **\`index_files\`**: If you've created or modified files and want them searchable,
|
|
34
|
+
re-index the project directory.
|
|
35
|
+
- **\`search_analytics\`**: Check what queries return no results or low-relevance
|
|
36
|
+
results — this reveals documentation gaps.
|
|
37
|
+
- **\`search_symbols\`**: When you know a symbol name (function, class, type, etc.),
|
|
38
|
+
find it directly by name instead of using semantic search.
|
|
39
|
+
- **\`find_usages\`**: Before changing a function or type, find all its call sites.
|
|
40
|
+
Use this to understand the blast radius of a rename or API change. Faster and
|
|
41
|
+
more reliable than semantic search for finding usages.
|
|
42
|
+
- **\`git_context\`**: At the start of a session (or any time you need orientation),
|
|
43
|
+
call this to see what files have already been modified, recent commits, and
|
|
44
|
+
which changed files are in the index. Avoids redundant searches and conflicting
|
|
45
|
+
edits on already-modified files.
|
|
46
|
+
- **\`annotate\`**: Attach a persistent note to a file or symbol — "known race
|
|
47
|
+
condition", "don't refactor until auth rewrite lands", etc. Notes appear as
|
|
48
|
+
\`[NOTE]\` blocks inline in \`read_relevant\` results automatically.
|
|
49
|
+
- **\`get_annotations\`**: Retrieve all notes for a file, or search semantically
|
|
50
|
+
across all annotations to find relevant caveats before editing.
|
|
51
|
+
- **\`write_relevant\`**: Before adding new code or docs, find the best insertion
|
|
52
|
+
point — returns the most semantically appropriate file and anchor.`;
|
|
53
|
+
|
|
54
|
+
const MDC_BLOCK = `${MARKER}
|
|
55
|
+
---
|
|
56
|
+
description: local-rag tool usage instructions
|
|
57
|
+
alwaysApply: true
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
${INSTRUCTIONS_BLOCK}`;
|
|
61
|
+
|
|
62
|
+
const MARKDOWN_BLOCK = `${MARKER}
|
|
63
|
+
${INSTRUCTIONS_BLOCK}`;
|
|
64
|
+
|
|
65
|
+
export interface SetupResult {
|
|
66
|
+
actions: string[];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export async function ensureConfig(projectDir: string): Promise<string | null> {
|
|
70
|
+
const configPath = join(projectDir, ".rag", "config.json");
|
|
71
|
+
if (existsSync(configPath)) return null;
|
|
72
|
+
await writeDefaultConfig(projectDir);
|
|
73
|
+
return "Created .rag/config.json";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export async function ensureGitignore(projectDir: string): Promise<string | null> {
|
|
77
|
+
const gitignorePath = join(projectDir, ".gitignore");
|
|
78
|
+
if (!existsSync(gitignorePath)) {
|
|
79
|
+
await writeFile(gitignorePath, "# local-rag index\n.rag/\n");
|
|
80
|
+
return "Created .gitignore with .rag/";
|
|
81
|
+
}
|
|
82
|
+
const content = await readFile(gitignorePath, "utf-8");
|
|
83
|
+
if (content.split("\n").some(line => line.trim() === ".rag/" || line.trim() === ".rag")) {
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
await writeFile(gitignorePath, content.trimEnd() + "\n\n# local-rag index\n.rag/\n");
|
|
87
|
+
return "Added .rag/ to .gitignore";
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async function injectMarkdown(filePath: string, block: string): Promise<string | null> {
|
|
91
|
+
if (existsSync(filePath)) {
|
|
92
|
+
const content = await readFile(filePath, "utf-8");
|
|
93
|
+
if (content.includes(MARKER)) return null;
|
|
94
|
+
await writeFile(filePath, content.trimEnd() + "\n\n" + block + "\n");
|
|
95
|
+
return `Updated ${filePath}`;
|
|
96
|
+
}
|
|
97
|
+
await writeFile(filePath, block + "\n");
|
|
98
|
+
return `Created ${filePath}`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
async function injectMdc(filePath: string, dir: string): Promise<string | null> {
|
|
102
|
+
if (!existsSync(dir)) return null;
|
|
103
|
+
if (existsSync(filePath)) {
|
|
104
|
+
const content = await readFile(filePath, "utf-8");
|
|
105
|
+
if (content.includes(MARKER)) return null;
|
|
106
|
+
}
|
|
107
|
+
await mkdir(dir, { recursive: true });
|
|
108
|
+
await writeFile(filePath, MDC_BLOCK + "\n");
|
|
109
|
+
return `Created ${filePath}`;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export async function ensureAgentInstructions(projectDir: string): Promise<string[]> {
|
|
113
|
+
const actions: string[] = [];
|
|
114
|
+
|
|
115
|
+
// Claude Code — always create/update
|
|
116
|
+
const claudeAction = await injectMarkdown(join(projectDir, "CLAUDE.md"), MARKDOWN_BLOCK);
|
|
117
|
+
if (claudeAction) actions.push(claudeAction);
|
|
118
|
+
|
|
119
|
+
// Cursor — only if .cursor/ exists
|
|
120
|
+
const cursorAction = await injectMdc(
|
|
121
|
+
join(projectDir, ".cursor", "rules", "local-rag.mdc"),
|
|
122
|
+
join(projectDir, ".cursor")
|
|
123
|
+
);
|
|
124
|
+
if (cursorAction) actions.push(cursorAction);
|
|
125
|
+
|
|
126
|
+
// Windsurf — only if .windsurf/ exists
|
|
127
|
+
const windsurfAction = await injectMdc(
|
|
128
|
+
join(projectDir, ".windsurf", "rules", "local-rag.mdc"),
|
|
129
|
+
join(projectDir, ".windsurf")
|
|
130
|
+
);
|
|
131
|
+
if (windsurfAction) actions.push(windsurfAction);
|
|
132
|
+
|
|
133
|
+
// GitHub Copilot — only if .github/ exists
|
|
134
|
+
if (existsSync(join(projectDir, ".github"))) {
|
|
135
|
+
const copilotAction = await injectMarkdown(
|
|
136
|
+
join(projectDir, ".github", "copilot-instructions.md"),
|
|
137
|
+
MARKDOWN_BLOCK
|
|
138
|
+
);
|
|
139
|
+
if (copilotAction) actions.push(copilotAction);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return actions;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export function mcpConfigSnippet(projectDir: string): string {
|
|
146
|
+
const abs = resolve(projectDir);
|
|
147
|
+
return JSON.stringify({
|
|
148
|
+
"local-rag": {
|
|
149
|
+
command: "bunx",
|
|
150
|
+
args: ["@winci/local-rag@latest"],
|
|
151
|
+
env: { RAG_PROJECT_DIR: abs },
|
|
152
|
+
},
|
|
153
|
+
}, null, 2);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export function detectAgentHints(projectDir: string): string[] {
|
|
157
|
+
const hints: string[] = [];
|
|
158
|
+
if (existsSync(join(projectDir, ".mcp.json")))
|
|
159
|
+
hints.push("Claude Code: add to .mcp.json → mcpServers");
|
|
160
|
+
if (existsSync(join(projectDir, ".cursor")))
|
|
161
|
+
hints.push("Cursor: add to .cursor/mcp.json → mcpServers");
|
|
162
|
+
if (existsSync(join(projectDir, ".windsurf")))
|
|
163
|
+
hints.push("Windsurf: add to .windsurf/mcp.json → mcpServers");
|
|
164
|
+
if (hints.length === 0)
|
|
165
|
+
hints.push("Add to your agent's MCP config under mcpServers:");
|
|
166
|
+
return hints;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export function confirm(question: string): Promise<boolean> {
|
|
170
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
171
|
+
return new Promise((res) => {
|
|
172
|
+
rl.question(question, (answer) => {
|
|
173
|
+
rl.close();
|
|
174
|
+
res(answer.trim().toLowerCase() !== "n");
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export async function runSetup(projectDir: string): Promise<SetupResult> {
|
|
180
|
+
const actions: string[] = [];
|
|
181
|
+
|
|
182
|
+
const configAction = await ensureConfig(projectDir);
|
|
183
|
+
if (configAction) actions.push(configAction);
|
|
184
|
+
|
|
185
|
+
const instructionActions = await ensureAgentInstructions(projectDir);
|
|
186
|
+
actions.push(...instructionActions);
|
|
187
|
+
|
|
188
|
+
const gitignoreAction = await ensureGitignore(projectDir);
|
|
189
|
+
if (gitignoreAction) actions.push(gitignoreAction);
|
|
190
|
+
|
|
191
|
+
return { actions };
|
|
192
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
import { existsSync } from "fs";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import { log } from "../utils/log";
|
|
6
|
+
|
|
7
|
+
const RagConfigSchema = z.object({
|
|
8
|
+
include: z.array(z.string()).default([]),
|
|
9
|
+
exclude: z.array(z.string()).default([]),
|
|
10
|
+
chunkSize: z.number().int().min(64).default(512),
|
|
11
|
+
chunkOverlap: z.number().int().min(0).default(50),
|
|
12
|
+
hybridWeight: z.number().min(0).max(1).default(0.7),
|
|
13
|
+
searchTopK: z.number().int().min(1).default(5),
|
|
14
|
+
indexBatchSize: z.number().int().min(1).optional(),
|
|
15
|
+
indexThreads: z.number().int().min(1).optional(),
|
|
16
|
+
enableReranking: z.boolean().default(true),
|
|
17
|
+
benchmarkTopK: z.number().int().min(1).default(5),
|
|
18
|
+
benchmarkMinRecall: z.number().min(0).max(1).default(0.8),
|
|
19
|
+
benchmarkMinMrr: z.number().min(0).max(1).default(0.6),
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
export type RagConfig = z.infer<typeof RagConfigSchema>;
|
|
23
|
+
|
|
24
|
+
const DEFAULT_CONFIG: RagConfig = {
|
|
25
|
+
include: [
|
|
26
|
+
// Markdown & plain text
|
|
27
|
+
"**/*.md", "**/*.txt",
|
|
28
|
+
// Build / task runners (no extension or prefix-named)
|
|
29
|
+
"**/Makefile", "**/makefile", "**/GNUmakefile",
|
|
30
|
+
"**/Dockerfile", "**/Dockerfile.*",
|
|
31
|
+
"**/Jenkinsfile", "**/Jenkinsfile.*",
|
|
32
|
+
"**/Vagrantfile", "**/Gemfile", "**/Rakefile",
|
|
33
|
+
"**/Brewfile", "**/Procfile",
|
|
34
|
+
// Structured data & config
|
|
35
|
+
"**/*.yaml", "**/*.yml",
|
|
36
|
+
"**/*.json",
|
|
37
|
+
"**/*.toml",
|
|
38
|
+
"**/*.xml",
|
|
39
|
+
// Shell & scripting
|
|
40
|
+
"**/*.sh", "**/*.bash", "**/*.zsh",
|
|
41
|
+
// Infrastructure / schema languages
|
|
42
|
+
"**/*.tf",
|
|
43
|
+
"**/*.proto",
|
|
44
|
+
"**/*.graphql", "**/*.gql",
|
|
45
|
+
"**/*.sql",
|
|
46
|
+
"**/*.mod",
|
|
47
|
+
"**/*.bru",
|
|
48
|
+
"**/*.css", "**/*.scss", "**/*.less",
|
|
49
|
+
],
|
|
50
|
+
exclude: ["node_modules/**", ".git/**", "dist/**", ".rag/**"],
|
|
51
|
+
chunkSize: 512,
|
|
52
|
+
chunkOverlap: 50,
|
|
53
|
+
hybridWeight: 0.7,
|
|
54
|
+
searchTopK: 5,
|
|
55
|
+
enableReranking: true,
|
|
56
|
+
indexBatchSize: 50,
|
|
57
|
+
benchmarkTopK: 5,
|
|
58
|
+
benchmarkMinRecall: 0.8,
|
|
59
|
+
benchmarkMinMrr: 0.6,
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Load config from .rag/config.json, merged with defaults.
|
|
64
|
+
* Note: array fields (include, exclude) from user config *replace* the defaults
|
|
65
|
+
* entirely — they are not merged. This lets users fully control which files are indexed.
|
|
66
|
+
*/
|
|
67
|
+
export async function loadConfig(projectDir: string): Promise<RagConfig> {
|
|
68
|
+
const configPath = join(projectDir, ".rag", "config.json");
|
|
69
|
+
|
|
70
|
+
if (!existsSync(configPath)) {
|
|
71
|
+
return { ...DEFAULT_CONFIG };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const raw = await readFile(configPath, "utf-8");
|
|
75
|
+
let userConfig: unknown;
|
|
76
|
+
try {
|
|
77
|
+
userConfig = JSON.parse(raw);
|
|
78
|
+
} catch {
|
|
79
|
+
log.warn(`Invalid JSON in ${configPath}, using defaults`, "config");
|
|
80
|
+
return { ...DEFAULT_CONFIG };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const merged = { ...DEFAULT_CONFIG, ...(userConfig as Record<string, unknown>) };
|
|
84
|
+
const result = RagConfigSchema.safeParse(merged);
|
|
85
|
+
|
|
86
|
+
if (!result.success) {
|
|
87
|
+
const issues = result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ");
|
|
88
|
+
log.warn(`Config validation: ${issues}. Using defaults for invalid fields.`, "config");
|
|
89
|
+
return { ...DEFAULT_CONFIG };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return result.data;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export async function writeDefaultConfig(projectDir: string): Promise<string> {
|
|
96
|
+
const ragDir = join(projectDir, ".rag");
|
|
97
|
+
await mkdir(ragDir, { recursive: true });
|
|
98
|
+
const configPath = join(ragDir, "config.json");
|
|
99
|
+
await writeFile(configPath, JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n");
|
|
100
|
+
return configPath;
|
|
101
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { watch, statSync } from "fs";
|
|
2
|
+
import { readJSONL, parseTurns, buildTurnText, type ParsedTurn } from "./parser";
|
|
3
|
+
import { chunkText } from "../indexing/chunker";
|
|
4
|
+
import { embedBatch } from "../embeddings/embed";
|
|
5
|
+
import { type RagDB } from "../db";
|
|
6
|
+
import { type Watcher } from "../indexing/watcher";
|
|
7
|
+
|
|
8
|
+
const TAIL_DEBOUNCE_MS = 1500;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Index all turns from a JSONL transcript file.
|
|
12
|
+
* Returns the number of new turns indexed and the final byte offset.
|
|
13
|
+
*/
|
|
14
|
+
export async function indexConversation(
|
|
15
|
+
jsonlPath: string,
|
|
16
|
+
sessionId: string,
|
|
17
|
+
db: RagDB,
|
|
18
|
+
fromOffset = 0,
|
|
19
|
+
startTurnIndex = 0,
|
|
20
|
+
onProgress?: (msg: string) => void
|
|
21
|
+
): Promise<{ turnsIndexed: number; newOffset: number; totalTokens: number }> {
|
|
22
|
+
const { entries, newOffset } = readJSONL(jsonlPath, fromOffset);
|
|
23
|
+
|
|
24
|
+
if (entries.length === 0) {
|
|
25
|
+
return { turnsIndexed: 0, newOffset: fromOffset, totalTokens: 0 };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const turns = parseTurns(entries, sessionId, startTurnIndex);
|
|
29
|
+
|
|
30
|
+
let turnsIndexed = 0;
|
|
31
|
+
let totalTokens = 0;
|
|
32
|
+
|
|
33
|
+
for (const turn of turns) {
|
|
34
|
+
const indexed = await indexTurn(turn, db);
|
|
35
|
+
if (indexed) {
|
|
36
|
+
turnsIndexed++;
|
|
37
|
+
onProgress?.(`Indexed turn ${turn.turnIndex} (${turn.toolsUsed.join(", ") || "no tools"})`);
|
|
38
|
+
}
|
|
39
|
+
totalTokens += turn.tokenCost;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Update session tracking
|
|
43
|
+
const existingSession = db.getSession(sessionId);
|
|
44
|
+
const totalTurnCount = (existingSession?.turnCount || 0) + turnsIndexed;
|
|
45
|
+
const stat = statSync(jsonlPath);
|
|
46
|
+
|
|
47
|
+
db.upsertSession(sessionId, jsonlPath, turns[0]?.timestamp || new Date().toISOString(), stat.mtimeMs, newOffset);
|
|
48
|
+
db.updateSessionStats(sessionId, totalTurnCount, totalTokens, newOffset);
|
|
49
|
+
|
|
50
|
+
return { turnsIndexed, newOffset, totalTokens };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Index a single parsed turn: chunk the text, embed chunks, store in DB.
|
|
55
|
+
*/
|
|
56
|
+
async function indexTurn(turn: ParsedTurn, db: RagDB): Promise<boolean> {
|
|
57
|
+
const text = buildTurnText(turn);
|
|
58
|
+
if (!text.trim()) return false;
|
|
59
|
+
|
|
60
|
+
// Chunk the turn text (use .md extension for paragraph-style splitting)
|
|
61
|
+
const textChunks = await chunkText(text, ".md", 512, 50);
|
|
62
|
+
|
|
63
|
+
// Embed all chunks in one batch
|
|
64
|
+
const embeddings = await embedBatch(textChunks.map(c => c.text));
|
|
65
|
+
const embeddedChunks = textChunks.map((chunk, i) => ({
|
|
66
|
+
snippet: chunk.text,
|
|
67
|
+
embedding: embeddings[i],
|
|
68
|
+
}));
|
|
69
|
+
|
|
70
|
+
// Store in DB — returns 0 if this turn was already indexed (duplicate)
|
|
71
|
+
const turnId = db.insertTurn(
|
|
72
|
+
turn.sessionId,
|
|
73
|
+
turn.turnIndex,
|
|
74
|
+
turn.timestamp,
|
|
75
|
+
turn.userText,
|
|
76
|
+
turn.assistantText,
|
|
77
|
+
turn.toolsUsed,
|
|
78
|
+
turn.filesReferenced,
|
|
79
|
+
turn.tokenCost,
|
|
80
|
+
turn.summary,
|
|
81
|
+
embeddedChunks
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
return turnId !== 0;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Start tailing a JSONL file for live conversation indexing.
|
|
89
|
+
* Watches for file changes and indexes new turns as they appear.
|
|
90
|
+
*/
|
|
91
|
+
export function startConversationTail(
|
|
92
|
+
jsonlPath: string,
|
|
93
|
+
sessionId: string,
|
|
94
|
+
db: RagDB,
|
|
95
|
+
onEvent?: (msg: string) => void
|
|
96
|
+
): Watcher {
|
|
97
|
+
let currentOffset = 0;
|
|
98
|
+
let currentTurnIndex = 0;
|
|
99
|
+
let pending: NodeJS.Timeout | null = null;
|
|
100
|
+
|
|
101
|
+
// Load existing state
|
|
102
|
+
const session = db.getSession(sessionId);
|
|
103
|
+
if (session) {
|
|
104
|
+
currentOffset = session.readOffset;
|
|
105
|
+
currentTurnIndex = session.turnCount;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async function processNewData() {
|
|
109
|
+
try {
|
|
110
|
+
const result = await indexConversation(
|
|
111
|
+
jsonlPath,
|
|
112
|
+
sessionId,
|
|
113
|
+
db,
|
|
114
|
+
currentOffset,
|
|
115
|
+
currentTurnIndex,
|
|
116
|
+
onEvent
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
if (result.turnsIndexed > 0) {
|
|
120
|
+
currentOffset = result.newOffset;
|
|
121
|
+
currentTurnIndex += result.turnsIndexed;
|
|
122
|
+
onEvent?.(`Conversation: ${result.turnsIndexed} new turns indexed (total: ${currentTurnIndex})`);
|
|
123
|
+
}
|
|
124
|
+
} catch (err) {
|
|
125
|
+
onEvent?.(`Conversation index error: ${(err as Error).message}`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const watcher = watch(jsonlPath, () => {
|
|
130
|
+
if (pending) clearTimeout(pending);
|
|
131
|
+
pending = setTimeout(() => {
|
|
132
|
+
pending = null;
|
|
133
|
+
processNewData();
|
|
134
|
+
}, TAIL_DEBOUNCE_MS);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// Do initial index
|
|
138
|
+
processNewData();
|
|
139
|
+
|
|
140
|
+
onEvent?.(`Tailing conversation: ${jsonlPath}`);
|
|
141
|
+
return {
|
|
142
|
+
close() {
|
|
143
|
+
if (pending) { clearTimeout(pending); pending = null; }
|
|
144
|
+
watcher.close();
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
}
|