@winci/local-rag 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/cli/setup.ts +3 -2
- package/src/config/index.ts +25 -19
- package/src/indexing/chunker.ts +40 -12
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@winci/local-rag",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "Semantic search for your codebase — local-first RAG MCP server with hybrid search, AST-aware chunking, and usage analytics",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"dependencies": {
|
|
42
42
|
"@huggingface/transformers": "^3.4.0",
|
|
43
43
|
"@modelcontextprotocol/sdk": "^1.12.0",
|
|
44
|
-
"
|
|
44
|
+
"@winci/bun-chunk": "^0.1.0",
|
|
45
45
|
"gray-matter": "^4.0.3",
|
|
46
46
|
"sqlite-vec": "^0.1.6",
|
|
47
47
|
"zod": "^4.3.6"
|
package/src/cli/setup.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { existsSync } from "fs";
|
|
|
2
2
|
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
3
3
|
import { join, resolve } from "path";
|
|
4
4
|
import { createInterface } from "readline";
|
|
5
|
-
import {
|
|
5
|
+
import { loadConfig } from "../config";
|
|
6
6
|
|
|
7
7
|
const MARKER = "<!-- local-rag -->";
|
|
8
8
|
|
|
@@ -69,7 +69,8 @@ export interface SetupResult {
|
|
|
69
69
|
export async function ensureConfig(projectDir: string): Promise<string | null> {
|
|
70
70
|
const configPath = join(projectDir, ".rag", "config.json");
|
|
71
71
|
if (existsSync(configPath)) return null;
|
|
72
|
-
|
|
72
|
+
// loadConfig auto-creates the file with defaults if missing
|
|
73
|
+
await loadConfig(projectDir);
|
|
73
74
|
return "Created .rag/config.json";
|
|
74
75
|
}
|
|
75
76
|
|
package/src/config/index.ts
CHANGED
|
@@ -23,28 +23,40 @@ export type RagConfig = z.infer<typeof RagConfigSchema>;
|
|
|
23
23
|
|
|
24
24
|
const DEFAULT_CONFIG: RagConfig = {
|
|
25
25
|
include: [
|
|
26
|
+
// Source code — AST-aware chunking
|
|
27
|
+
"**/*.ts", "**/*.tsx", "**/*.js", "**/*.jsx",
|
|
28
|
+
"**/*.py",
|
|
29
|
+
"**/*.go",
|
|
30
|
+
"**/*.rs",
|
|
31
|
+
"**/*.java",
|
|
32
|
+
// Source code — heuristic chunking
|
|
33
|
+
"**/*.c", "**/*.cpp", "**/*.h", "**/*.hpp",
|
|
34
|
+
"**/*.rb",
|
|
35
|
+
"**/*.swift",
|
|
26
36
|
// Markdown & plain text
|
|
27
|
-
"**/*.md", "**/*.txt",
|
|
37
|
+
"**/*.md", "**/*.mdx", "**/*.markdown", "**/*.txt",
|
|
28
38
|
// Build / task runners (no extension or prefix-named)
|
|
29
39
|
"**/Makefile", "**/makefile", "**/GNUmakefile",
|
|
30
40
|
"**/Dockerfile", "**/Dockerfile.*",
|
|
31
41
|
"**/Jenkinsfile", "**/Jenkinsfile.*",
|
|
32
42
|
"**/Vagrantfile", "**/Gemfile", "**/Rakefile",
|
|
33
43
|
"**/Brewfile", "**/Procfile",
|
|
44
|
+
// Shell & scripting
|
|
45
|
+
"**/*.sh", "**/*.bash", "**/*.zsh", "**/*.fish",
|
|
34
46
|
// Structured data & config
|
|
35
47
|
"**/*.yaml", "**/*.yml",
|
|
36
48
|
"**/*.json",
|
|
37
49
|
"**/*.toml",
|
|
38
50
|
"**/*.xml",
|
|
39
|
-
// Shell & scripting
|
|
40
|
-
"**/*.sh", "**/*.bash", "**/*.zsh",
|
|
41
51
|
// Infrastructure / schema languages
|
|
42
52
|
"**/*.tf",
|
|
43
53
|
"**/*.proto",
|
|
44
54
|
"**/*.graphql", "**/*.gql",
|
|
45
55
|
"**/*.sql",
|
|
46
56
|
"**/*.mod",
|
|
57
|
+
// API collections
|
|
47
58
|
"**/*.bru",
|
|
59
|
+
// Stylesheets
|
|
48
60
|
"**/*.css", "**/*.scss", "**/*.less",
|
|
49
61
|
],
|
|
50
62
|
exclude: ["node_modules/**", ".git/**", "dist/**", ".rag/**"],
|
|
@@ -60,28 +72,30 @@ const DEFAULT_CONFIG: RagConfig = {
|
|
|
60
72
|
};
|
|
61
73
|
|
|
62
74
|
/**
|
|
63
|
-
* Load config from .rag/config.json
|
|
64
|
-
*
|
|
65
|
-
*
|
|
75
|
+
* Load config from .rag/config.json.
|
|
76
|
+
* If the file doesn't exist, writes the defaults there first so users can
|
|
77
|
+
* edit the file directly — no hidden merge logic, what's on disk is what runs.
|
|
66
78
|
*/
|
|
67
79
|
export async function loadConfig(projectDir: string): Promise<RagConfig> {
|
|
68
|
-
const
|
|
80
|
+
const ragDir = join(projectDir, ".rag");
|
|
81
|
+
const configPath = join(ragDir, "config.json");
|
|
69
82
|
|
|
70
83
|
if (!existsSync(configPath)) {
|
|
84
|
+
await mkdir(ragDir, { recursive: true });
|
|
85
|
+
await writeFile(configPath, JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n");
|
|
71
86
|
return { ...DEFAULT_CONFIG };
|
|
72
87
|
}
|
|
73
88
|
|
|
74
89
|
const raw = await readFile(configPath, "utf-8");
|
|
75
|
-
let
|
|
90
|
+
let parsed: unknown;
|
|
76
91
|
try {
|
|
77
|
-
|
|
92
|
+
parsed = JSON.parse(raw);
|
|
78
93
|
} catch {
|
|
79
94
|
log.warn(`Invalid JSON in ${configPath}, using defaults`, "config");
|
|
80
95
|
return { ...DEFAULT_CONFIG };
|
|
81
96
|
}
|
|
82
97
|
|
|
83
|
-
const
|
|
84
|
-
const result = RagConfigSchema.safeParse(merged);
|
|
98
|
+
const result = RagConfigSchema.safeParse(parsed);
|
|
85
99
|
|
|
86
100
|
if (!result.success) {
|
|
87
101
|
const issues = result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ");
|
|
@@ -91,11 +105,3 @@ export async function loadConfig(projectDir: string): Promise<RagConfig> {
|
|
|
91
105
|
|
|
92
106
|
return result.data;
|
|
93
107
|
}
|
|
94
|
-
|
|
95
|
-
export async function writeDefaultConfig(projectDir: string): Promise<string> {
|
|
96
|
-
const ragDir = join(projectDir, ".rag");
|
|
97
|
-
await mkdir(ragDir, { recursive: true });
|
|
98
|
-
const configPath = join(ragDir, "config.json");
|
|
99
|
-
await writeFile(configPath, JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n");
|
|
100
|
-
return configPath;
|
|
101
|
-
}
|
package/src/indexing/chunker.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { chunk as astChunk } from "
|
|
1
|
+
import { chunk as astChunk } from "@winci/bun-chunk";
|
|
2
2
|
import { log } from "../utils/log";
|
|
3
3
|
|
|
4
4
|
export interface ChunkImport {
|
|
@@ -97,18 +97,23 @@ async function _chunkText(
|
|
|
97
97
|
// Try AST-aware chunking for supported code files (even small ones, for import/export extraction)
|
|
98
98
|
if (AST_SUPPORTED.has(extension)) {
|
|
99
99
|
try {
|
|
100
|
-
const astChunks = await astChunk(filePath || `file${extension}`, text
|
|
101
|
-
maxChunkSize: chunkSize,
|
|
102
|
-
});
|
|
100
|
+
const astChunks = await astChunk(filePath || `file${extension}`, text);
|
|
103
101
|
if (astChunks.length > 0) {
|
|
104
|
-
return astChunks.map((c, i) =>
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
102
|
+
return astChunks.map((c, i) => {
|
|
103
|
+
const chunk: Chunk = {
|
|
104
|
+
text: c.text,
|
|
105
|
+
index: i,
|
|
106
|
+
startLine: c.startLine + 1, // bun-chunk is 0-indexed, local-rag is 1-indexed
|
|
107
|
+
endLine: c.endLine + 1,
|
|
108
|
+
};
|
|
109
|
+
if (c.type === "import") {
|
|
110
|
+
chunk.imports = parseImportText(c.text);
|
|
111
|
+
}
|
|
112
|
+
if (c.name && c.type !== "import" && c.type !== "block") {
|
|
113
|
+
chunk.exports = [{ name: c.name, type: c.type }];
|
|
114
|
+
}
|
|
115
|
+
return chunk;
|
|
116
|
+
});
|
|
112
117
|
}
|
|
113
118
|
} catch (err) {
|
|
114
119
|
log.debug(`AST chunking failed for ${filePath || extension}, using heuristic: ${err instanceof Error ? err.message : err}`, "chunker");
|
|
@@ -201,6 +206,29 @@ function assignLineNumbers(chunks: Chunk[], fullText: string): void {
|
|
|
201
206
|
}
|
|
202
207
|
}
|
|
203
208
|
|
|
209
|
+
/** Extract import name and source from an import statement text */
|
|
210
|
+
function parseImportText(text: string): ChunkImport[] {
|
|
211
|
+
// Match: import { foo } from "bar" / import foo from "bar" / import * as foo from "bar"
|
|
212
|
+
const match = text.match(/from\s+["']([^"']+)["']/);
|
|
213
|
+
if (!match) {
|
|
214
|
+
// import "side-effect" or Python: import foo / from foo import bar
|
|
215
|
+
const pyImport = text.match(/^import\s+(\S+)/);
|
|
216
|
+
if (pyImport) return [{ name: pyImport[1], source: pyImport[1] }];
|
|
217
|
+
const pyFrom = text.match(/^from\s+(\S+)\s+import\s+(.+)/);
|
|
218
|
+
if (pyFrom) return [{ name: pyFrom[2].trim(), source: pyFrom[1] }];
|
|
219
|
+
// Go/Rust: use/import with path
|
|
220
|
+
const quotedPath = text.match(/["']([^"']+)["']/);
|
|
221
|
+
if (quotedPath) return [{ name: quotedPath[1].split("/").pop()!, source: quotedPath[1] }];
|
|
222
|
+
return [];
|
|
223
|
+
}
|
|
224
|
+
const source = match[1];
|
|
225
|
+
const nameMatch = text.match(/import\s+(?:\{([^}]+)\}|(\w+)|\*\s+as\s+(\w+))/);
|
|
226
|
+
const name = nameMatch
|
|
227
|
+
? (nameMatch[1]?.trim() || nameMatch[2] || nameMatch[3] || source)
|
|
228
|
+
: source;
|
|
229
|
+
return [{ name, source }];
|
|
230
|
+
}
|
|
231
|
+
|
|
204
232
|
function splitMarkdown(text: string): string[] {
|
|
205
233
|
// Split on heading boundaries (## or ###)
|
|
206
234
|
const parts = text.split(/(?=^#{1,3}\s)/m);
|