opencode-rag-plugin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/LICENSE +21 -0
  2. package/ReadMe.md +423 -0
  3. package/dist/chunker/base.d.ts +10 -0
  4. package/dist/chunker/base.js +34 -0
  5. package/dist/chunker/base.js.map +1 -0
  6. package/dist/chunker/c.d.ts +8 -0
  7. package/dist/chunker/c.js +16 -0
  8. package/dist/chunker/c.js.map +1 -0
  9. package/dist/chunker/cpp.d.ts +8 -0
  10. package/dist/chunker/cpp.js +17 -0
  11. package/dist/chunker/cpp.js.map +1 -0
  12. package/dist/chunker/csharp.d.ts +8 -0
  13. package/dist/chunker/csharp.js +17 -0
  14. package/dist/chunker/csharp.js.map +1 -0
  15. package/dist/chunker/css.d.ts +8 -0
  16. package/dist/chunker/css.js +14 -0
  17. package/dist/chunker/css.js.map +1 -0
  18. package/dist/chunker/factory.d.ts +27 -0
  19. package/dist/chunker/factory.js +138 -0
  20. package/dist/chunker/factory.js.map +1 -0
  21. package/dist/chunker/fallback.d.ts +8 -0
  22. package/dist/chunker/fallback.js +34 -0
  23. package/dist/chunker/fallback.js.map +1 -0
  24. package/dist/chunker/go.d.ts +8 -0
  25. package/dist/chunker/go.js +13 -0
  26. package/dist/chunker/go.js.map +1 -0
  27. package/dist/chunker/grammar.d.ts +12 -0
  28. package/dist/chunker/grammar.js +43 -0
  29. package/dist/chunker/grammar.js.map +1 -0
  30. package/dist/chunker/html.d.ts +8 -0
  31. package/dist/chunker/html.js +12 -0
  32. package/dist/chunker/html.js.map +1 -0
  33. package/dist/chunker/java.d.ts +8 -0
  34. package/dist/chunker/java.js +14 -0
  35. package/dist/chunker/java.js.map +1 -0
  36. package/dist/chunker/javascript.d.ts +8 -0
  37. package/dist/chunker/javascript.js +15 -0
  38. package/dist/chunker/javascript.js.map +1 -0
  39. package/dist/chunker/json.d.ts +8 -0
  40. package/dist/chunker/json.js +11 -0
  41. package/dist/chunker/json.js.map +1 -0
  42. package/dist/chunker/kotlin.d.ts +8 -0
  43. package/dist/chunker/kotlin.js +15 -0
  44. package/dist/chunker/kotlin.js.map +1 -0
  45. package/dist/chunker/loader.d.ts +2 -0
  46. package/dist/chunker/loader.js +27 -0
  47. package/dist/chunker/loader.js.map +1 -0
  48. package/dist/chunker/markdown.d.ts +7 -0
  49. package/dist/chunker/markdown.js +96 -0
  50. package/dist/chunker/markdown.js.map +1 -0
  51. package/dist/chunker/pdf.d.ts +8 -0
  52. package/dist/chunker/pdf.js +93 -0
  53. package/dist/chunker/pdf.js.map +1 -0
  54. package/dist/chunker/python.d.ts +8 -0
  55. package/dist/chunker/python.js +13 -0
  56. package/dist/chunker/python.js.map +1 -0
  57. package/dist/chunker/razor.d.ts +7 -0
  58. package/dist/chunker/razor.js +85 -0
  59. package/dist/chunker/razor.js.map +1 -0
  60. package/dist/chunker/ruby.d.ts +8 -0
  61. package/dist/chunker/ruby.js +14 -0
  62. package/dist/chunker/ruby.js.map +1 -0
  63. package/dist/chunker/rust.d.ts +8 -0
  64. package/dist/chunker/rust.js +17 -0
  65. package/dist/chunker/rust.js.map +1 -0
  66. package/dist/chunker/sln.d.ts +9 -0
  67. package/dist/chunker/sln.js +65 -0
  68. package/dist/chunker/sln.js.map +1 -0
  69. package/dist/chunker/swift.d.ts +8 -0
  70. package/dist/chunker/swift.js +17 -0
  71. package/dist/chunker/swift.js.map +1 -0
  72. package/dist/chunker/tex.d.ts +7 -0
  73. package/dist/chunker/tex.js +93 -0
  74. package/dist/chunker/tex.js.map +1 -0
  75. package/dist/chunker/typescript.d.ts +8 -0
  76. package/dist/chunker/typescript.js +17 -0
  77. package/dist/chunker/typescript.js.map +1 -0
  78. package/dist/chunker/uuid.d.ts +1 -0
  79. package/dist/chunker/uuid.js +8 -0
  80. package/dist/chunker/uuid.js.map +1 -0
  81. package/dist/chunker/xml.d.ts +8 -0
  82. package/dist/chunker/xml.js +11 -0
  83. package/dist/chunker/xml.js.map +1 -0
  84. package/dist/cli.d.ts +1 -0
  85. package/dist/cli.js +291 -0
  86. package/dist/cli.js.map +1 -0
  87. package/dist/core/config.d.ts +59 -0
  88. package/dist/core/config.js +127 -0
  89. package/dist/core/config.js.map +1 -0
  90. package/dist/core/fileLogger.d.ts +6 -0
  91. package/dist/core/fileLogger.js +32 -0
  92. package/dist/core/fileLogger.js.map +1 -0
  93. package/dist/core/interfaces.d.ts +31 -0
  94. package/dist/core/interfaces.js +2 -0
  95. package/dist/core/interfaces.js.map +1 -0
  96. package/dist/core/manifest.d.ts +21 -0
  97. package/dist/core/manifest.js +48 -0
  98. package/dist/core/manifest.js.map +1 -0
  99. package/dist/embedder/factory.d.ts +4 -0
  100. package/dist/embedder/factory.js +27 -0
  101. package/dist/embedder/factory.js.map +1 -0
  102. package/dist/embedder/http.d.ts +11 -0
  103. package/dist/embedder/http.js +309 -0
  104. package/dist/embedder/http.js.map +1 -0
  105. package/dist/embedder/ollama.d.ts +14 -0
  106. package/dist/embedder/ollama.js +60 -0
  107. package/dist/embedder/ollama.js.map +1 -0
  108. package/dist/embedder/openai.d.ts +12 -0
  109. package/dist/embedder/openai.js +33 -0
  110. package/dist/embedder/openai.js.map +1 -0
  111. package/dist/index.d.ts +10 -0
  112. package/dist/index.js +10 -0
  113. package/dist/index.js.map +1 -0
  114. package/dist/indexer.d.ts +49 -0
  115. package/dist/indexer.js +336 -0
  116. package/dist/indexer.js.map +1 -0
  117. package/dist/plugin-entry.d.ts +4 -0
  118. package/dist/plugin-entry.js +5 -0
  119. package/dist/plugin-entry.js.map +1 -0
  120. package/dist/plugin.d.ts +22 -0
  121. package/dist/plugin.js +477 -0
  122. package/dist/plugin.js.map +1 -0
  123. package/dist/retriever/retriever.d.ts +5 -0
  124. package/dist/retriever/retriever.js +14 -0
  125. package/dist/retriever/retriever.js.map +1 -0
  126. package/dist/types/opencode-plugin.d.ts +51 -0
  127. package/dist/vectorstore/lancedb.d.ts +18 -0
  128. package/dist/vectorstore/lancedb.js +196 -0
  129. package/dist/vectorstore/lancedb.js.map +1 -0
  130. package/dist/watcher.d.ts +14 -0
  131. package/dist/watcher.js +88 -0
  132. package/dist/watcher.js.map +1 -0
  133. package/package.json +82 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Christoph Döllinger
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/ReadMe.md ADDED
@@ -0,0 +1,423 @@
1
+ # OpenCodeRAG
2
+
3
+ Local-first RAG plugin for OpenCode — semantic code search powered by
4
+ embeddings and vector similarity.
5
+
6
+ **Note: This is an early pre-release and may not work correctly in all cases.
7
+ If you find bugs, please create an issue.**
8
+
9
+ ## Features
10
+
11
+ - **AST-aware chunking** — splits code into functions, classes, methods using
12
+ tree-sitter for 16 languages, plus regex-based chunking for 4 markup/config/doc
13
+ formats (Markdown, Razor, .sln, LaTeX). Falls back to line-based chunking for
14
+ unrecognized formats.
15
+ - **Incremental indexing** — manifest-backed indexing skips unchanged files,
16
+ removes deleted entries, and updates only changed files.
17
+ - **Watch mode** — `index --watch` re-indexes on file changes with debounced,
18
+ serialized passes.
19
+ - **Pluggable chunkers** — add custom language chunkers via config or programmatic API.
20
+ - **Configurable embeddings** — Ollama (default) or OpenAI-compatible providers.
21
+ Batch embedding with configurable batch size.
22
+ - **Local vector store** — LanceDB with L2 distance scoring, memory mode for
23
+ testing.
24
+ - **CLI** — index, query, clear, status commands.
25
+ - **OpenCode plugin** — exposes a chunk retrieval tool and suggests relevant files after each user message via the `chat.message` hook.
26
+
27
+ ## Architecture
28
+
29
+ ```
30
+ Workspace Files
31
+
32
+
33
+ ┌──────────────┐
34
+ │ Chunker │ AST-based (tree-sitter) or line-based fallback
35
+ └──────┬───────┘
36
+ │ chunks
37
+
38
+ ┌──────────────┐
39
+ │ Embedder │ Ollama / OpenAI-compatible API
40
+ └──────┬───────┘
41
+ │ vectors
42
+
43
+ ┌──────────────┐
44
+ │ VectorStore │ LanceDB (local files or memory:// for tests)
45
+ └──────┬───────┘
46
+ │ + manifest.json
47
+
48
+ ┌──────────────┐
49
+ │ Indexer/Retr.│ incremental index or query/search
50
+ └──────┬───────┘
51
+ │ results
52
+
53
+ LLM Context
54
+ ```
55
+
56
+ ## Tech Stack
57
+
58
+ | Layer | Technology |
59
+ | ----------- | --------------------------------------------------- |
60
+ | Runtime | Node.js v22 + tsx (ESM) |
61
+ | Language | TypeScript 5.8 |
62
+ | Chunking | web-tree-sitter (WASM) + tree-sitter-wasm grammars |
63
+ | Embeddings | Ollama / OpenAI-compatible (native fetch) |
64
+ | Vector DB | LanceDB (`@lancedb/lancedb`) |
65
+ | CLI | commander |
66
+ | Tests | Node built-in test runner (`node --test`) |
67
+ | Package mgr | npm (with `--legacy-peer-deps`) |
68
+
69
+ ## Installation
70
+
71
+ ```bash
72
+ git clone <repo-url>
73
+ cd OpenCodeRAG
74
+ npm install --legacy-peer-deps
75
+ ```
76
+
77
+ ### Dependencies
78
+
79
+ - **Node.js v22+** for native ESM and fetch support
80
+ - **apache-arrow** — peer dependency for LanceDB (auto-installed)
81
+ - **tree-sitter-wasm** — ships pre-built WASM grammars for all supported languages
82
+
83
+ ## Configuration
84
+
85
+ Create `opencode-rag.json` in the project root (auto-detected) or pass via
86
+ `--config`. The repository's own [`opencode-rag.json`](./opencode-rag.json) serves
87
+ as a complete example with all available options.
88
+
89
+ Config files support partial overrides — missing keys fall back to defaults.
90
+ Deep merging is applied per section.
91
+
92
+ ### Embedding Providers
93
+
94
+ | Provider | `baseUrl` example | Notes |
95
+ | -------- | --------------------------------- | ---------------------------- |
96
+ | ollama | `http://localhost:11434/api` | Default. No apiKey required. Proxy is disabled when `embedding.proxy.url` is empty. |
97
+ | openai | `https://api.openai.com/v1` | Requires apiKey. |
98
+
99
+ `embedding.timeoutMs` defaults to 30000 ms. Increase it if your local model has a slow cold start.
100
+
101
+ OpenAI provider sends all texts in a single request. Ollama sends one request
102
+ per request to `/api/embed`. Set `embedding.proxy.url` to use the standard
103
+ proxy-aware HTTP path instead of the direct socket path.
104
+
105
+ ## Usage
106
+
107
+ This extension consists of two main interfaces:
108
+ 1. **CLI** — for manual indexing and querying from the terminal
109
+ 2. **OpenCode plugin** — for automatic retrieval and file suggestions within the chat interface
110
+
111
+ ### CLI
112
+
113
+ ```bash
114
+ # Index the workspace incrementally
115
+ npx tsx src/cli.ts index
116
+
117
+ # Force full re-index (clears existing data first)
118
+ npx tsx src/cli.ts index --force
119
+
120
+ # Watch workspace and incrementally re-index on changes
121
+ npx tsx src/cli.ts index --watch
122
+
123
+ # Semantic search
124
+ npx tsx src/cli.ts query "How is authentication handled?"
125
+
126
+ # Limit results
127
+ npx tsx src/cli.ts query "error handling" --top-k 5
128
+
129
+ # Show indexing stats
130
+ npx tsx src/cli.ts status
131
+
132
+ # Example output:
133
+ # Indexed chunks: 1247
134
+ # Store path: /home/user/project/.opencode/rag_db
135
+ # Embedding provider: ollama
136
+ # Embedding model: nomic-embed-text
137
+ # Manifest status: ok
138
+ # Manifest entries: 42
139
+ # Last indexed: 2026-05-28 10:45:02
140
+ # Up-to-date files: 42
141
+ # Pending files: 0
142
+ # Watch mode: off
143
+
144
+ # Clear all indexed data
145
+ npx tsx src/cli.ts clear
146
+
147
+ # Use custom config
148
+ npx tsx src/cli.ts index --config ./my-config.json
149
+ ```
150
+
151
+ `index` is incremental by default. A sidecar manifest is stored at
152
+ `<vectorStore.path>/manifest.json` and tracks file hashes, chunk counts, and the
153
+ last successful index timestamp. If the manifest is missing or corrupt while the
154
+ vector store already contains data, the next index pass clears and rebuilds the
155
+ store to avoid duplicates.
156
+
157
+ ### Watch workflow
158
+
159
+ Start a watch session:
160
+
161
+ ```bash
162
+ npx tsx src/cli.ts index --watch
163
+ ```
164
+
165
+ The initial pass indexes the workspace, then watches for file changes. On each
166
+ `add`, `change`, `unlink`, or `unlinkDir` event, the watch debounces (300 ms)
167
+ and triggers a new incremental pass. If a pass is already running, the re-index
168
+ queues one follow-up pass and runs it as soon as the current pass finishes.
169
+
170
+ The watcher ignores excluded directories, the vector store path, and the
171
+ manifest file itself. Press `Ctrl+C` to stop.
172
+
173
+ ### OpenCode Plugin
174
+
175
+ The plugin registers:
176
+
177
+ 1. **`opencode-rag-context`** — a custom retrieval tool for chunk-level evidence
178
+ 2. **`chat.message`** — after each user message, automatically retrieves relevant indexed files and appends a compact suggestion list to the message text
179
+
180
+ #### Chat Message File Suggestions
181
+
182
+ After you send a message, the plugin:
183
+ 1. Extracts the user's message text
184
+ 2. Runs semantic retrieval against the indexed workspace
185
+ 3. Groups results by file, sorts by best chunk score, and formats a compact file list:
186
+ ```
187
+ src/plugin.ts (typescript, lines 10-42)
188
+ src/core/config.ts (typescript, lines 66-145)
189
+ ```
190
+ 4. Appends the list (max 10 files) to your message text
191
+
192
+ Only file paths, language, and line ranges are shown — no scores or code snippets. This gives the agent lightweight hints about which files are relevant without inflating the context window.
193
+
194
+ **Config:**
195
+
196
+ | Option | Default | Description |
197
+ | ------ | ------- | ----------- |
198
+ | `openCode.overrideRead` | `false` | Set to `true` to restore the legacy RAG-backed `read` tool (deprecated) |
199
+ | `openCode.maxContextChunks` | `5` | Maximum chunks per retrieval (affects `opencode-rag-context` tool output) |
200
+ | `retrieval.topK` | `10` | Number of chunks fetched per query (controls chat.message file suggestion breadth) |
201
+
202
+ Errors during retrieval are silently caught — a failed search won't break the
203
+ chat.
204
+
205
+ #### Install from source
206
+
207
+ After cloning and installing dependencies:
208
+
209
+ ```bash
210
+ # Option 1: Use the project-local auto-loaded plugin
211
+ # The repo already includes .opencode/plugins/rag-plugin.ts
212
+
213
+ # Option 2: Build and install via npm pack
214
+ npm run build
215
+ npm pack
216
+ opencode plugin .\opencode-rag-0.1.0.tgz
217
+
218
+ # Option 3: Install from npm (once published)
219
+ opencode plugin opencode-rag
220
+ ```
221
+
222
+ The plugin auto-detects configuration from `opencode-rag.json` or
223
+ `.opencode/rag.json` in the project root.
224
+
225
+ If you use the project-local plugin file, OpenCode auto-loads it from
226
+ `.opencode/plugins/` at startup and no `plugin` entry is required in
227
+ `.opencode/opencode.json`.
228
+
229
+ Restart OpenCode after changing plugin files or plugin configuration.
230
+
231
+ ### Logging
232
+
233
+ Logging is configured under the `logging` key:
234
+
235
+ ```json
236
+ {
237
+ "logging": {
238
+ "level": "info",
239
+ "logFilePath": "./.opencode/opencode-rag.log"
240
+ }
241
+ }
242
+ ```
243
+
244
+ | Option | Default | Description |
245
+ | ------------ | ------------------------------ | -------------------------------------------- |
246
+ | `level` | `"info"` | Log level: `"debug"`, `"info"`, or `"error"` |
247
+ | `logFilePath` | `"./.opencode/opencode-rag.log"` | Path to the log file (relative paths are resolved against the workspace directory) |
248
+
249
+ The resolved log file path also falls back to the `LOG_FILE_PATH` environment variable when the config value is not set. Config takes precedence over the env var when both are provided.
250
+
251
+ #### AGENTS.md hints for using the plugin
252
+
253
+ Add a section like this to the target workspace's `AGENTS.md` so the agent
254
+ knows how to use the plugin correctly:
255
+
256
+ ```markdown
257
+ ## OpenCodeRAG Plugin
258
+
259
+ This workspace has OpenCodeRAG installed for semantic code retrieval.
260
+
261
+ ### `opencode-rag-context` tool
262
+ Before planning, editing, or answering, use this tool to retrieve relevant code
263
+ chunks with file paths, line ranges, and surrounding implementation.
264
+ - `query` (required) — narrow, specific search, e.g. `"authentication middleware setup"`
265
+ - `pathHints` (optional) — up to 10 path filters, e.g. `["src/auth/"]`
266
+ - `languageHints` (optional) — up to 10 language filters, e.g. `["typescript"]`
267
+ - `topK` (optional) — result count (1-25, default 10)
268
+
269
+ ### File suggestions
270
+ After each user message, a `chat.message` hook appends up to 10 relevant file
271
+ suggestions to the message. Look for lines like
272
+ `src/file.ts (typescript, lines 10-42)` at the bottom of user input.
273
+
274
+ ### Indexing
275
+ - The plugin auto-indexes changed files in the background (debounced 5s)
276
+ - If no results come back, the workspace may not be indexed yet —
277
+ run `opencode-rag index` from the terminal
278
+ - Tiny files (under 1 KB), excluded extensions, and excluded directories
279
+ (`node_modules`, `.git`, `.opencode`, `dist`, etc.) are silently skipped
280
+ ```
281
+
282
+ The plugin registers itself in the system prompt via the
283
+ `experimental.chat.system.transform` hook, so compliant agents will see a
284
+ reminder about the `opencode-rag-context` tool in their system instructions.
285
+
286
+ ## Data Model
287
+
288
+ ```typescript
289
+ interface Chunk {
290
+ id: string;
291
+ content: string;
292
+ embedding?: number[];
293
+ metadata: {
294
+ filePath: string;
295
+ startLine: number;
296
+ endLine: number;
297
+ language: string;
298
+ };
299
+ }
300
+
301
+ interface SearchResult {
302
+ chunk: Chunk;
303
+ score: number; // 1 / (1 + L2_distance), range [0, 1]
304
+ }
305
+ ```
306
+
307
+ ## Chunking
308
+
309
+ | Language | Strategy | Captures |
310
+ | ---------- | ------------------------------ | ----------------------------------------- |
311
+ | TypeScript | AST (tree-sitter) | functions, methods, classes, interfaces |
312
+ | Python | AST (tree-sitter) | functions, classes, decorated definitions |
313
+ | Java | AST (tree-sitter) | methods, classes, interfaces, enums |
314
+ | Go | AST (tree-sitter) | functions, methods, type declarations |
315
+ | C | AST (tree-sitter) | functions, structs, enums, unions, typedefs |
316
+ | C++ | AST (tree-sitter) | functions, classes, structs, enums, namespaces, templates |
317
+ | C# | AST (tree-sitter) | classes, interfaces, structs, enums, methods, namespaces, records |
318
+ | JavaScript | AST (tree-sitter) | functions, classes, arrow functions, exports |
319
+ | JSON | AST (tree-sitter) | key-value pairs |
320
+ | XML | AST (tree-sitter) | elements (1 chunk per root element) |
321
+ | HTML | AST (tree-sitter) | `<script>` / `<style>` blocks |
322
+ | CSS | AST (tree-sitter) | rule sets, at-rules, media, keyframes |
323
+ | Razor | Regex (brace matching) | `@code` / `@functions` blocks, template regions |
324
+ | Markdown | Regex heading split | h1/h2 sections + trailing content |
325
+ | Solution | Regex (section boundary) | project entries and global sections |
326
+ | Rust | AST (tree-sitter) | functions, structs, enums, traits, impl blocks, modules, types |
327
+ | Ruby | AST (tree-sitter) | methods, classes, modules, singleton methods |
328
+ | Kotlin | AST (tree-sitter) | functions, classes, interfaces, objects, properties |
329
+ | Swift | AST (tree-sitter) | functions, classes, structs, enums, protocols, extensions, variables |
330
+ | LaTeX | Regex section split | chapter/section/subsection/subsubsection boundaries |
331
+ | PDF | Paragraph-based (text extraction) | groups small paragraphs, splits oversized |
332
+ | (other) | Line-based (100 lines/chunk) | raw text blocks |
333
+
334
+ Custom chunkers can be added without modifying the project source code. Two
335
+ registration paths are supported:
336
+
337
+ ### Config file
338
+
339
+ Add a `chunkers` array to `opencode-rag.json`:
340
+
341
+ ```json
342
+ {
343
+ "chunkers": [
344
+ { "module": "./path/to/rust-chunker.js", "extensions": [".rs"] }
345
+ ]
346
+ }
347
+ ```
348
+
349
+ The module path is resolved relative to the config file. The loaded module must
350
+ export (as default or named) an object implementing the `Chunker` interface:
351
+
352
+ ```typescript
353
+ interface Chunker {
354
+ readonly language: string;
355
+ readonly fileExtensions?: string[];
356
+ chunk(filePath: string, content: string): Promise<Chunk[]>;
357
+ }
358
+ ```
359
+
360
+ ### Programmatic
361
+
362
+ ```typescript
363
+ import { registerChunker } from "opencode-rag/library";
364
+ registerChunker(myChunker, [".rs"]);
365
+ ```
366
+
367
+ The optional second argument overrides the chunker's `fileExtensions`. If a
368
+ built-in chunker already covers the requested extension, the new registration is
369
+ skipped and a warning is emitted.
370
+
371
+ ## Vector Store
372
+
373
+ LanceDB stores chunks in a `chunks` table with columns: `id`, `content`,
374
+ `embedding` (vector), `filePath`, `startLine`, `endLine`, `language`.
375
+
376
+ - **Disk mode**: files in `vectorStore.path` (default `.opencode/rag_db`)
377
+ - **Memory mode**: `memory://` URI — for tests only, data lost on close
378
+ - **Manifest sidecar**: `manifest.json` in the store directory tracks indexed
379
+ files for incremental updates
380
+ - Schema is auto-inferred from a seed row on first table creation
381
+ - L2 distance search, score = `1 / (1 + distance)`
382
+ - Stored file paths are normalized to absolute forward-slash paths
383
+
384
+ ## Development
385
+
386
+ ```bash
387
+ # TypeScript typecheck
388
+ npm run typecheck
389
+
390
+ # Run all tests
391
+ npm test
392
+
393
+ # Run specific test file
394
+ node --import tsx --test src/__tests__/chunker/fallback.test.ts
395
+ ```
396
+
397
+ Project structure:
398
+ ```
399
+ src/
400
+ core/ — interfaces.ts, config.ts
401
+ chunker/ — grammar.ts, base.ts, language chunkers, fallback.ts, factory.ts, uuid.ts
402
+ embedder/ — ollama.ts, openai.ts, factory.ts
403
+ vectorstore/ — lancedb.ts
404
+ retriever/ — retriever.ts
405
+ types/ — opencode-plugin.d.ts
406
+ indexer.ts — incremental indexing + watch scheduling
407
+ watcher.ts — background indexer (chokidar + debounced scheduler + periodic timer)
408
+ cli.ts, plugin.ts, plugin-entry.ts, index.ts
409
+ __tests__/ — mirrors the module structure
410
+ ```
411
+
412
+ Test framework is Node's built-in runner (`node:test`) with `tsx` for TypeScript
413
+ imports. No test library dependencies.
414
+
415
+ ## Limitations
416
+
417
+ - Embedding model dimension is auto-probed at startup; falls back to 384 if probing fails.
418
+ - 21 built-in chunkers (AST for 16, regex for 4, PDF text for 1) + configurable fallback
419
+
420
+ ## Privacy
421
+
422
+ All processing is local. Embeddings are generated via local Ollama by default.
423
+ No data leaves the machine unless configured to use a remote embedding API.
@@ -0,0 +1,10 @@
1
+ import type { Chunker, Chunk } from "../core/interfaces.js";
2
+ export declare abstract class TreeSitterChunker implements Chunker {
3
+ abstract readonly language: string;
4
+ abstract readonly fileExtensions: string[];
5
+ abstract readonly grammarName: string;
6
+ abstract readonly nodeTypes: Set<string>;
7
+ private parser;
8
+ private getParser;
9
+ chunk(filePath: string, content: string): Promise<Chunk[]>;
10
+ }
@@ -0,0 +1,34 @@
1
+ import { Parser } from "web-tree-sitter";
2
+ import { loadLanguage, walkTree } from "./grammar.js";
3
+ import { uuid } from "./uuid.js";
4
+ export class TreeSitterChunker {
5
+ parser = null;
6
+ async getParser() {
7
+ if (!this.parser) {
8
+ const lang = await loadLanguage(this.grammarName);
9
+ this.parser = new Parser();
10
+ this.parser.setLanguage(lang);
11
+ }
12
+ return this.parser;
13
+ }
14
+ async chunk(filePath, content) {
15
+ if (content.trim().length === 0)
16
+ return [];
17
+ const parser = await this.getParser();
18
+ const tree = parser.parse(content);
19
+ if (!tree)
20
+ return [];
21
+ const nodes = walkTree(tree.rootNode, this.nodeTypes, content);
22
+ return nodes.map((node) => ({
23
+ id: uuid(),
24
+ content: node.text,
25
+ metadata: {
26
+ filePath,
27
+ startLine: node.startLine,
28
+ endLine: node.endLine,
29
+ language: this.language,
30
+ },
31
+ }));
32
+ }
33
+ }
34
+ //# sourceMappingURL=base.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base.js","sourceRoot":"","sources":["../../src/chunker/base.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAgB,MAAM,cAAc,CAAC;AAEpE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,OAAgB,iBAAiB;IAM7B,MAAM,GAAkB,IAAI,CAAC;IAE7B,KAAK,CAAC,SAAS;QACrB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAClD,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAChC,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,OAAe;QAC3C,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE3C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QAErB,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAE/D,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAa,EAAE,EAAE,CAAC,CAAC;YACnC,EAAE,EAAE,IAAI,EAAE;YACV,OAAO,EAAE,IAAI,CAAC,IAAI;YAClB,QAAQ,EAAE;gBACR,QAAQ;gBACR,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,OAAO,EAAE,IAAI,CAAC,OAAO;gBACrB,QAAQ,EAAE,IAAI,CAAC,QAAQ;aACxB;SACF,CAAC,CAAC,CAAC;IACN,CAAC;CACF"}
@@ -0,0 +1,8 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export declare class CChunker extends TreeSitterChunker {
3
+ readonly language = "c";
4
+ readonly fileExtensions: string[];
5
+ readonly grammarName = "c";
6
+ readonly nodeTypes: Set<string>;
7
+ }
8
+ export declare const cChunker: CChunker;
@@ -0,0 +1,16 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export class CChunker extends TreeSitterChunker {
3
+ language = "c";
4
+ fileExtensions = [".c", ".h"];
5
+ grammarName = "c";
6
+ nodeTypes = new Set([
7
+ "function_definition",
8
+ "struct_specifier",
9
+ "enum_specifier",
10
+ "union_specifier",
11
+ "type_definition",
12
+ "preproc_def",
13
+ ]);
14
+ }
15
+ export const cChunker = new CChunker();
16
+ //# sourceMappingURL=c.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"c.js","sourceRoot":"","sources":["../../src/chunker/c.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,QAAS,SAAQ,iBAAiB;IACpC,QAAQ,GAAG,GAAG,CAAC;IACf,cAAc,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAC9B,WAAW,GAAG,GAAG,CAAC;IAClB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,qBAAqB;QACrB,kBAAkB;QAClB,gBAAgB;QAChB,iBAAiB;QACjB,iBAAiB;QACjB,aAAa;KACd,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,QAAQ,GAAG,IAAI,QAAQ,EAAE,CAAC"}
@@ -0,0 +1,8 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export declare class CppChunker extends TreeSitterChunker {
3
+ readonly language = "cpp";
4
+ readonly fileExtensions: string[];
5
+ readonly grammarName = "cpp";
6
+ readonly nodeTypes: Set<string>;
7
+ }
8
+ export declare const cppChunker: CppChunker;
@@ -0,0 +1,17 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export class CppChunker extends TreeSitterChunker {
3
+ language = "cpp";
4
+ fileExtensions = [".cpp", ".cc", ".cxx", ".hpp", ".hxx"];
5
+ grammarName = "cpp";
6
+ nodeTypes = new Set([
7
+ "function_definition",
8
+ "class_specifier",
9
+ "struct_specifier",
10
+ "enum_specifier",
11
+ "union_specifier",
12
+ "namespace_definition",
13
+ "template_declaration",
14
+ ]);
15
+ }
16
+ export const cppChunker = new CppChunker();
17
+ //# sourceMappingURL=cpp.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cpp.js","sourceRoot":"","sources":["../../src/chunker/cpp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,UAAW,SAAQ,iBAAiB;IACtC,QAAQ,GAAG,KAAK,CAAC;IACjB,cAAc,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACzD,WAAW,GAAG,KAAK,CAAC;IACpB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,qBAAqB;QACrB,iBAAiB;QACjB,kBAAkB;QAClB,gBAAgB;QAChB,iBAAiB;QACjB,sBAAsB;QACtB,sBAAsB;KACvB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC"}
@@ -0,0 +1,8 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export declare class CSharpChunker extends TreeSitterChunker {
3
+ readonly language = "csharp";
4
+ readonly fileExtensions: string[];
5
+ readonly grammarName = "c_sharp";
6
+ readonly nodeTypes: Set<string>;
7
+ }
8
+ export declare const csharpChunker: CSharpChunker;
@@ -0,0 +1,17 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export class CSharpChunker extends TreeSitterChunker {
3
+ language = "csharp";
4
+ fileExtensions = [".cs"];
5
+ grammarName = "c_sharp";
6
+ nodeTypes = new Set([
7
+ "class_declaration",
8
+ "interface_declaration",
9
+ "struct_declaration",
10
+ "enum_declaration",
11
+ "method_declaration",
12
+ "namespace_declaration",
13
+ "record_declaration",
14
+ ]);
15
+ }
16
+ export const csharpChunker = new CSharpChunker();
17
+ //# sourceMappingURL=csharp.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csharp.js","sourceRoot":"","sources":["../../src/chunker/csharp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,aAAc,SAAQ,iBAAiB;IACzC,QAAQ,GAAG,QAAQ,CAAC;IACpB,cAAc,GAAG,CAAC,KAAK,CAAC,CAAC;IACzB,WAAW,GAAG,SAAS,CAAC;IACxB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,mBAAmB;QACnB,uBAAuB;QACvB,oBAAoB;QACpB,kBAAkB;QAClB,oBAAoB;QACpB,uBAAuB;QACvB,oBAAoB;KACrB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC"}
@@ -0,0 +1,8 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export declare class CssChunker extends TreeSitterChunker {
3
+ readonly language = "css";
4
+ readonly fileExtensions: string[];
5
+ readonly grammarName = "css";
6
+ readonly nodeTypes: Set<string>;
7
+ }
8
+ export declare const cssChunker: CssChunker;
@@ -0,0 +1,14 @@
1
+ import { TreeSitterChunker } from "./base.js";
2
+ export class CssChunker extends TreeSitterChunker {
3
+ language = "css";
4
+ fileExtensions = [".css"];
5
+ grammarName = "css";
6
+ nodeTypes = new Set([
7
+ "rule_set",
8
+ "at_rule",
9
+ "media_statement",
10
+ "keyframes_statement",
11
+ ]);
12
+ }
13
+ export const cssChunker = new CssChunker();
14
+ //# sourceMappingURL=css.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"css.js","sourceRoot":"","sources":["../../src/chunker/css.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,UAAW,SAAQ,iBAAiB;IACtC,QAAQ,GAAG,KAAK,CAAC;IACjB,cAAc,GAAG,CAAC,MAAM,CAAC,CAAC;IAC1B,WAAW,GAAG,KAAK,CAAC;IACpB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,UAAU;QACV,SAAS;QACT,iBAAiB;QACjB,qBAAqB;KACtB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC"}
@@ -0,0 +1,27 @@
1
+ import type { Chunker, Chunk } from "../core/interfaces.js";
2
+ import { typescriptChunker } from "./typescript.js";
3
+ import { pythonChunker } from "./python.js";
4
+ import { javaChunker } from "./java.js";
5
+ import { goChunker } from "./go.js";
6
+ import { markdownChunker } from "./markdown.js";
7
+ import { cChunker } from "./c.js";
8
+ import { cppChunker } from "./cpp.js";
9
+ import { csharpChunker } from "./csharp.js";
10
+ import { javascriptChunker } from "./javascript.js";
11
+ import { razorChunker } from "./razor.js";
12
+ import { jsonChunker } from "./json.js";
13
+ import { htmlChunker } from "./html.js";
14
+ import { cssChunker } from "./css.js";
15
+ import { xmlChunker } from "./xml.js";
16
+ import { slnChunker } from "./sln.js";
17
+ import { rustChunker } from "./rust.js";
18
+ import { rubyChunker } from "./ruby.js";
19
+ import { kotlinChunker } from "./kotlin.js";
20
+ import { swiftChunker } from "./swift.js";
21
+ import { texChunker } from "./tex.js";
22
+ import { fallbackChunker } from "./fallback.js";
23
+ import { pdfChunker } from "./pdf.js";
24
+ export declare function registerChunker(chunker: Chunker, extensions?: string[]): void;
25
+ export declare function getChunker(filePath: string): Chunker;
26
+ export declare function chunkFile(filePath: string, content: string): Promise<Chunk[]>;
27
+ export { typescriptChunker, pythonChunker, javaChunker, goChunker, markdownChunker, cChunker, cppChunker, csharpChunker, javascriptChunker, razorChunker, jsonChunker, htmlChunker, cssChunker, xmlChunker, slnChunker, rustChunker, rubyChunker, kotlinChunker, swiftChunker, texChunker, pdfChunker, fallbackChunker };