@winci/local-rag 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@winci/local-rag",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "Semantic search for your codebase — local-first RAG MCP server with hybrid search, AST-aware chunking, and usage analytics",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -41,7 +41,7 @@
41
41
  "dependencies": {
42
42
  "@huggingface/transformers": "^3.4.0",
43
43
  "@modelcontextprotocol/sdk": "^1.12.0",
44
- "code-chunk": "^0.1.13",
44
+ "@winci/bun-chunk": "^0.1.0",
45
45
  "gray-matter": "^4.0.3",
46
46
  "sqlite-vec": "^0.1.6",
47
47
  "zod": "^4.3.6"
package/src/cli/setup.ts CHANGED
@@ -2,7 +2,7 @@ import { existsSync } from "fs";
2
2
  import { readFile, writeFile, mkdir } from "fs/promises";
3
3
  import { join, resolve } from "path";
4
4
  import { createInterface } from "readline";
5
- import { writeDefaultConfig } from "../config";
5
+ import { loadConfig } from "../config";
6
6
 
7
7
  const MARKER = "<!-- local-rag -->";
8
8
 
@@ -69,7 +69,8 @@ export interface SetupResult {
69
69
  export async function ensureConfig(projectDir: string): Promise<string | null> {
70
70
  const configPath = join(projectDir, ".rag", "config.json");
71
71
  if (existsSync(configPath)) return null;
72
- await writeDefaultConfig(projectDir);
72
+ // loadConfig auto-creates the file with defaults if missing
73
+ await loadConfig(projectDir);
73
74
  return "Created .rag/config.json";
74
75
  }
75
76
 
@@ -23,28 +23,40 @@ export type RagConfig = z.infer<typeof RagConfigSchema>;
23
23
 
24
24
  const DEFAULT_CONFIG: RagConfig = {
25
25
  include: [
26
+ // Source code — AST-aware chunking
27
+ "**/*.ts", "**/*.tsx", "**/*.js", "**/*.jsx",
28
+ "**/*.py",
29
+ "**/*.go",
30
+ "**/*.rs",
31
+ "**/*.java",
32
+ // Source code — heuristic chunking
33
+ "**/*.c", "**/*.cpp", "**/*.h", "**/*.hpp",
34
+ "**/*.rb",
35
+ "**/*.swift",
26
36
  // Markdown & plain text
27
- "**/*.md", "**/*.txt",
37
+ "**/*.md", "**/*.mdx", "**/*.markdown", "**/*.txt",
28
38
  // Build / task runners (no extension or prefix-named)
29
39
  "**/Makefile", "**/makefile", "**/GNUmakefile",
30
40
  "**/Dockerfile", "**/Dockerfile.*",
31
41
  "**/Jenkinsfile", "**/Jenkinsfile.*",
32
42
  "**/Vagrantfile", "**/Gemfile", "**/Rakefile",
33
43
  "**/Brewfile", "**/Procfile",
44
+ // Shell & scripting
45
+ "**/*.sh", "**/*.bash", "**/*.zsh", "**/*.fish",
34
46
  // Structured data & config
35
47
  "**/*.yaml", "**/*.yml",
36
48
  "**/*.json",
37
49
  "**/*.toml",
38
50
  "**/*.xml",
39
- // Shell & scripting
40
- "**/*.sh", "**/*.bash", "**/*.zsh",
41
51
  // Infrastructure / schema languages
42
52
  "**/*.tf",
43
53
  "**/*.proto",
44
54
  "**/*.graphql", "**/*.gql",
45
55
  "**/*.sql",
46
56
  "**/*.mod",
57
+ // API collections
47
58
  "**/*.bru",
59
+ // Stylesheets
48
60
  "**/*.css", "**/*.scss", "**/*.less",
49
61
  ],
50
62
  exclude: ["node_modules/**", ".git/**", "dist/**", ".rag/**"],
@@ -60,28 +72,30 @@ const DEFAULT_CONFIG: RagConfig = {
60
72
  };
61
73
 
62
74
  /**
63
- * Load config from .rag/config.json, merged with defaults.
64
- * Note: array fields (include, exclude) from user config *replace* the defaults
65
- * entirely they are not merged. This lets users fully control which files are indexed.
75
+ * Load config from .rag/config.json.
76
+ * If the file doesn't exist, writes the defaults there first so users can
77
+ * edit the file directly no hidden merge logic, what's on disk is what runs.
66
78
  */
67
79
  export async function loadConfig(projectDir: string): Promise<RagConfig> {
68
- const configPath = join(projectDir, ".rag", "config.json");
80
+ const ragDir = join(projectDir, ".rag");
81
+ const configPath = join(ragDir, "config.json");
69
82
 
70
83
  if (!existsSync(configPath)) {
84
+ await mkdir(ragDir, { recursive: true });
85
+ await writeFile(configPath, JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n");
71
86
  return { ...DEFAULT_CONFIG };
72
87
  }
73
88
 
74
89
  const raw = await readFile(configPath, "utf-8");
75
- let userConfig: unknown;
90
+ let parsed: unknown;
76
91
  try {
77
- userConfig = JSON.parse(raw);
92
+ parsed = JSON.parse(raw);
78
93
  } catch {
79
94
  log.warn(`Invalid JSON in ${configPath}, using defaults`, "config");
80
95
  return { ...DEFAULT_CONFIG };
81
96
  }
82
97
 
83
- const merged = { ...DEFAULT_CONFIG, ...(userConfig as Record<string, unknown>) };
84
- const result = RagConfigSchema.safeParse(merged);
98
+ const result = RagConfigSchema.safeParse(parsed);
85
99
 
86
100
  if (!result.success) {
87
101
  const issues = result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join(", ");
@@ -91,11 +105,3 @@ export async function loadConfig(projectDir: string): Promise<RagConfig> {
91
105
 
92
106
  return result.data;
93
107
  }
94
-
95
- export async function writeDefaultConfig(projectDir: string): Promise<string> {
96
- const ragDir = join(projectDir, ".rag");
97
- await mkdir(ragDir, { recursive: true });
98
- const configPath = join(ragDir, "config.json");
99
- await writeFile(configPath, JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n");
100
- return configPath;
101
- }
@@ -1,4 +1,4 @@
1
- import { chunk as astChunk } from "code-chunk";
1
+ import { chunk as astChunk } from "@winci/bun-chunk";
2
2
  import { log } from "../utils/log";
3
3
 
4
4
  export interface ChunkImport {
@@ -97,18 +97,23 @@ async function _chunkText(
97
97
  // Try AST-aware chunking for supported code files (even small ones, for import/export extraction)
98
98
  if (AST_SUPPORTED.has(extension)) {
99
99
  try {
100
- const astChunks = await astChunk(filePath || `file${extension}`, text, {
101
- maxChunkSize: chunkSize,
102
- });
100
+ const astChunks = await astChunk(filePath || `file${extension}`, text);
103
101
  if (astChunks.length > 0) {
104
- return astChunks.map((c, i) => ({
105
- text: c.text,
106
- index: i,
107
- imports: c.context.imports.map((im) => ({ name: im.name, source: im.source })),
108
- exports: c.context.entities
109
- .filter((e) => e.type === "export" || e.type === "function" || e.type === "class" || e.type === "interface" || e.type === "type" || e.type === "enum")
110
- .map((e) => ({ name: e.name, type: e.type })),
111
- }));
102
+ return astChunks.map((c, i) => {
103
+ const chunk: Chunk = {
104
+ text: c.text,
105
+ index: i,
106
+ startLine: c.startLine + 1, // bun-chunk is 0-indexed, local-rag is 1-indexed
107
+ endLine: c.endLine + 1,
108
+ };
109
+ if (c.type === "import") {
110
+ chunk.imports = parseImportText(c.text);
111
+ }
112
+ if (c.name && c.type !== "import" && c.type !== "block") {
113
+ chunk.exports = [{ name: c.name, type: c.type }];
114
+ }
115
+ return chunk;
116
+ });
112
117
  }
113
118
  } catch (err) {
114
119
  log.debug(`AST chunking failed for ${filePath || extension}, using heuristic: ${err instanceof Error ? err.message : err}`, "chunker");
@@ -201,6 +206,29 @@ function assignLineNumbers(chunks: Chunk[], fullText: string): void {
201
206
  }
202
207
  }
203
208
 
209
+ /** Extract import name and source from an import statement text */
210
+ function parseImportText(text: string): ChunkImport[] {
211
+ // Match: import { foo } from "bar" / import foo from "bar" / import * as foo from "bar"
212
+ const match = text.match(/from\s+["']([^"']+)["']/);
213
+ if (!match) {
214
+ // import "side-effect" or Python: import foo / from foo import bar
215
+ const pyImport = text.match(/^import\s+(\S+)/);
216
+ if (pyImport) return [{ name: pyImport[1], source: pyImport[1] }];
217
+ const pyFrom = text.match(/^from\s+(\S+)\s+import\s+(.+)/);
218
+ if (pyFrom) return [{ name: pyFrom[2].trim(), source: pyFrom[1] }];
219
+ // Go/Rust: use/import with path
220
+ const quotedPath = text.match(/["']([^"']+)["']/);
221
+ if (quotedPath) return [{ name: quotedPath[1].split("/").pop()!, source: quotedPath[1] }];
222
+ return [];
223
+ }
224
+ const source = match[1];
225
+ const nameMatch = text.match(/import\s+(?:\{([^}]+)\}|(\w+)|\*\s+as\s+(\w+))/);
226
+ const name = nameMatch
227
+ ? (nameMatch[1]?.trim() || nameMatch[2] || nameMatch[3] || source)
228
+ : source;
229
+ return [{ name, source }];
230
+ }
231
+
204
232
  function splitMarkdown(text: string): string[] {
205
233
  // Split on heading boundaries (## or ###)
206
234
  const parts = text.split(/(?=^#{1,3}\s)/m);