codevault 1.3.0-beta.7 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.env.example +49 -27
  2. package/README.md +512 -71
  3. package/dist/cli/commands/ask-cmd.d.ts +3 -0
  4. package/dist/cli/commands/ask-cmd.d.ts.map +1 -0
  5. package/dist/cli/commands/ask-cmd.js +113 -0
  6. package/dist/cli/commands/ask-cmd.js.map +1 -0
  7. package/dist/cli.js +2 -0
  8. package/dist/cli.js.map +1 -1
  9. package/dist/config/apply-env.d.ts.map +1 -1
  10. package/dist/config/apply-env.js +54 -22
  11. package/dist/config/apply-env.js.map +1 -1
  12. package/dist/config/loader.d.ts.map +1 -1
  13. package/dist/config/loader.js +72 -20
  14. package/dist/config/loader.js.map +1 -1
  15. package/dist/config/types.d.ts +11 -0
  16. package/dist/config/types.d.ts.map +1 -1
  17. package/dist/mcp/tools/ask-codebase.d.ts +85 -0
  18. package/dist/mcp/tools/ask-codebase.d.ts.map +1 -0
  19. package/dist/mcp/tools/ask-codebase.js +125 -0
  20. package/dist/mcp/tools/ask-codebase.js.map +1 -0
  21. package/dist/mcp-server.js +76 -0
  22. package/dist/mcp-server.js.map +1 -1
  23. package/dist/providers/base.js +4 -4
  24. package/dist/providers/base.js.map +1 -1
  25. package/dist/providers/chat-llm.d.ts +41 -0
  26. package/dist/providers/chat-llm.d.ts.map +1 -0
  27. package/dist/providers/chat-llm.js +155 -0
  28. package/dist/providers/chat-llm.js.map +1 -0
  29. package/dist/providers/index.d.ts.map +1 -1
  30. package/dist/providers/index.js +2 -1
  31. package/dist/providers/index.js.map +1 -1
  32. package/dist/providers/ollama.d.ts.map +1 -1
  33. package/dist/providers/ollama.js +3 -3
  34. package/dist/providers/ollama.js.map +1 -1
  35. package/dist/providers/openai.d.ts.map +1 -1
  36. package/dist/providers/openai.js +9 -8
  37. package/dist/providers/openai.js.map +1 -1
  38. package/dist/ranking/api-reranker.d.ts.map +1 -1
  39. package/dist/ranking/api-reranker.js +14 -28
  40. package/dist/ranking/api-reranker.js.map +1 -1
  41. package/dist/synthesis/markdown-formatter.d.ts +13 -0
  42. package/dist/synthesis/markdown-formatter.d.ts.map +1 -0
  43. package/dist/synthesis/markdown-formatter.js +104 -0
  44. package/dist/synthesis/markdown-formatter.js.map +1 -0
  45. package/dist/synthesis/prompt-builder.d.ts +21 -0
  46. package/dist/synthesis/prompt-builder.d.ts.map +1 -0
  47. package/dist/synthesis/prompt-builder.js +129 -0
  48. package/dist/synthesis/prompt-builder.js.map +1 -0
  49. package/dist/synthesis/synthesizer.d.ts +30 -0
  50. package/dist/synthesis/synthesizer.d.ts.map +1 -0
  51. package/dist/synthesis/synthesizer.js +210 -0
  52. package/dist/synthesis/synthesizer.js.map +1 -0
  53. package/dist/utils/rate-limiter.d.ts.map +1 -1
  54. package/dist/utils/rate-limiter.js +4 -3
  55. package/dist/utils/rate-limiter.js.map +1 -1
  56. package/package.json +4 -4
package/README.md CHANGED
@@ -6,65 +6,198 @@ CodeVault is an intelligent code indexing and search system that enables AI assi
6
6
 
7
7
  ## 🌟 Features
8
8
 
9
- - **🔍 Semantic Code Search**: Find code by meaning, not just keywords
9
+ - **🔍 Semantic Search**: Find code by meaning, not just keywords using vector embeddings
10
10
  - **🤖 MCP Integration**: Native support for Claude Desktop and other MCP clients
11
- - **🎯 Symbol-Aware Ranking**: Boost results based on function signatures and relationships
12
- - **⚡ Hybrid Search**: Combines vector embeddings with BM25 keyword matching
13
- - **🔄 Context Packs**: Save and reuse search scopes for different projects
11
+ - **💬 LLM-Synthesized Answers**: Ask questions in natural language, get markdown responses with code citations
12
+ - **🎯 Symbol-Aware Ranking**: Boost results based on function signatures, parameters, and relationships
13
+ - **⚡ Hybrid Retrieval**: Combines vector embeddings with BM25 keyword matching via Reciprocal Rank Fusion
14
+ - **🚀 Batch Processing**: Efficient API usage with configurable batching (50 chunks/batch by default)
15
+ - **📦 Smart Chunking**: Token-aware semantic code splitting with overlap for optimal context
16
+ - **🔄 Context Packs**: Save and reuse search scopes for different features/modules
17
+ - **🏠 Local-First**: Works with local models (Ollama) or cloud APIs (OpenAI, Nebius)
18
+ - **🔐 Optional Encryption**: AES-256-GCM encryption for indexed code chunks
19
+ - **⚙️ Global Configuration**: One-time setup with interactive wizard for CLI convenience
14
20
  - **📊 Multi-Language Support**: 25+ programming languages via Tree-sitter
15
- - **🏠 Local-First**: Works with local models (Ollama) or cloud APIs
16
- - **🔐 Optional Encryption**: Secure your indexed code chunks
17
- - **📈 Smart Chunking**: Token-aware code splitting for optimal context
21
+ - **🔎 File Watching**: Real-time index updates with debounced change detection
22
+ - **⏱️ Rate Limiting**: Intelligent request/token throttling with automatic retry
23
+ - **💾 Memory Efficient**: LRU caches with automatic cleanup for long-running processes
18
24
 
19
25
  ## 🚀 Quick Start
20
26
 
21
27
  ### Installation
22
28
 
29
+ #### NPM (Global - Recommended)
30
+
23
31
  ```bash
24
- npm install
32
+ # Install latest beta
33
+ npm install -g codevault@beta
34
+
35
+ # Interactive configuration setup (one-time)
36
+ codevault config init
37
+
38
+ # Index your project
39
+ cd /path/to/your/project
40
+ codevault index
41
+ ```
42
+
43
+ #### From Source
44
+
45
+ ```bash
46
+ git clone https://github.com/shariqriazz/codevault.git
47
+ cd codevault
48
+ npm npm install --legacy-peer-deps
25
49
  npm run build
26
50
  npm link
27
51
  ```
28
52
 
53
+ ### Configuration
54
+
55
+ CodeVault supports multiple configuration methods with clear priority:
56
+
57
+ **Priority:** Environment Variables > Project Config > Global Config > Defaults
58
+
59
+ #### Option 1: Interactive Setup (Recommended for CLI)
60
+
61
+ ```bash
62
+ codevault config init
63
+ ```
64
+
65
+ Guides you through:
66
+ - Provider selection (OpenAI, Ollama, Custom API)
67
+ - API key configuration
68
+ - Model selection (preset or custom)
69
+ - Advanced settings (rate limits, encryption, reranking)
70
+
71
+ Configuration saved to `~/.codevault/config.json`
72
+
73
+ #### Option 2: Manual CLI Configuration
74
+
75
+ ```bash
76
+ # Set API key
77
+ codevault config set providers.openai.apiKey sk-your-key-here
78
+ codevault config set providers.openai.model text-embedding-3-large
79
+
80
+ # View configuration
81
+ codevault config list
82
+
83
+ # See all config sources
84
+ codevault config list --sources
85
+ ```
86
+
87
+ #### Option 3: Environment Variables (MCP / CI/CD)
88
+
89
+ ```bash
90
+ # Embedding Provider (OpenAI-compatible APIs)
91
+ export CODEVAULT_EMBEDDING_API_KEY=sk-your-key-here
92
+ export CODEVAULT_EMBEDDING_BASE_URL=https://api.openai.com/v1
93
+ export CODEVAULT_EMBEDDING_MODEL=text-embedding-3-large
94
+
95
+ # Ollama (local, no API key needed)
96
+ export CODEVAULT_OLLAMA_EMBEDDING_MODEL=nomic-embed-text
97
+
98
+ # Custom settings
99
+ export CODEVAULT_EMBEDDING_MAX_TOKENS=8192
100
+ export CODEVAULT_EMBEDDING_DIMENSIONS=3072
101
+ export CODEVAULT_EMBEDDING_RATE_LIMIT_RPM=10000
102
+ export CODEVAULT_EMBEDDING_RATE_LIMIT_TPM=600000
103
+ ```
104
+
105
+ **Note:** Old variable names are still supported for backward compatibility:
106
+ - `OPENAI_API_KEY` → `CODEVAULT_EMBEDDING_API_KEY`
107
+ - `OPENAI_BASE_URL` → `CODEVAULT_EMBEDDING_BASE_URL`
108
+ - `CODEVAULT_OPENAI_EMBEDDING_MODEL` → `CODEVAULT_EMBEDDING_MODEL`
109
+ - `CODEVAULT_OLLAMA_MODEL` → `CODEVAULT_OLLAMA_EMBEDDING_MODEL`
110
+ - `CODEVAULT_MAX_TOKENS` → `CODEVAULT_EMBEDDING_MAX_TOKENS`
111
+ - `CODEVAULT_DIMENSIONS` → `CODEVAULT_EMBEDDING_DIMENSIONS`
112
+ - `CODEVAULT_RATE_LIMIT_RPM` → `CODEVAULT_EMBEDDING_RATE_LIMIT_RPM`
113
+ - `CODEVAULT_RATE_LIMIT_TPM` → `CODEVAULT_EMBEDDING_RATE_LIMIT_TPM`
114
+
115
+ #### Option 4: Project-Specific Config
116
+
117
+ ```bash
118
+ # Set local config (project-specific)
119
+ codevault config set --local provider ollama
120
+ codevault config set --local providers.ollama.model nomic-embed-text
121
+ ```
122
+
123
+ See [`CONFIGURATION.md`](CONFIGURATION.md) for complete configuration guide.
124
+
29
125
  ### Index Your Project
30
126
 
31
127
  ```bash
32
- # Navigate to your project
33
- cd /path/to/your/project
128
+ # Using global config (if set via codevault config init)
129
+ codevault index
34
130
 
35
131
  # Using Ollama (local, no API key required)
36
132
  codevault index --provider ollama
37
133
 
38
- # Using OpenAI
134
+ # Using OpenAI with custom settings
39
135
  export OPENAI_API_KEY=your-key-here
40
136
  codevault index --provider openai
41
137
 
42
138
  # Using Qwen (via Nebius AI Studio)
43
- export OPENAI_API_KEY=your-nebius-api-key
44
- export OPENAI_BASE_URL=https://api.studio.nebius.com/v1/
45
- export CODEVAULT_OPENAI_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B
139
+ export CODEVAULT_EMBEDDING_API_KEY=your-nebius-api-key
140
+ export CODEVAULT_EMBEDDING_BASE_URL=https://api.studio.nebius.com/v1
141
+ export CODEVAULT_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B
46
142
  codevault index --provider openai
143
+
144
+ # With encryption
145
+ export CODEVAULT_ENCRYPTION_KEY=$(openssl rand -base64 32)
146
+ codevault index --encrypt on
147
+
148
+ # Watch for changes (auto-update index)
149
+ codevault watch --debounce 500
47
150
  ```
48
151
 
49
152
  ### Search Your Code
50
153
 
51
154
  ```bash
155
+ # Basic search
52
156
  codevault search "authentication function"
157
+
158
+ # Search with filters
53
159
  codevault search "stripe checkout" --tags stripe --lang php
160
+
161
+ # Search with full code chunks
162
+ codevault search-with-code "database connection" --limit 5
163
+
164
+ # Ask questions with LLM-synthesized answers
165
+ codevault ask "How does authentication work in this codebase?"
166
+ codevault ask "How do I add a new payment provider?" --multi-query --stream
167
+
168
+ # View project stats
169
+ codevault info
54
170
  ```
55
171
 
56
172
  ### Use with Claude Desktop
57
173
 
58
174
  Add to your `claude_desktop_config.json`:
59
175
 
176
+ ```json
177
+ {
178
+ "mcpServers": {
179
+ "codevault": {
180
+ "command": "npx",
181
+ "args": ["-y", "codevault@beta", "mcp"],
182
+ "env": {
183
+ "CODEVAULT_EMBEDDING_API_KEY": "your-api-key-here",
184
+ "CODEVAULT_EMBEDDING_MODEL": "text-embedding-3-large"
185
+ }
186
+ }
187
+ }
188
+ }
189
+ ```
190
+
191
+ Or use local installation:
192
+
60
193
  ```json
61
194
  {
62
195
  "mcpServers": {
63
196
  "codevault": {
64
197
  "command": "node",
65
- "args": ["/path/to/codevault-v2/dist/mcp-server.js"],
198
+ "args": ["/path/to/codevault/dist/mcp-server.js"],
66
199
  "env": {
67
- "OPENAI_API_KEY": "your-api-key-here"
200
+ "CODEVAULT_EMBEDDING_API_KEY": "your-api-key-here"
68
201
  }
69
202
  }
70
203
  }
@@ -73,6 +206,81 @@ Add to your `claude_desktop_config.json`:
73
206
 
74
207
  ## 📖 Documentation
75
208
 
209
+ ### CLI Commands
210
+
211
+ ```bash
212
+ # Configuration Management
213
+ codevault config init # Interactive setup wizard
214
+ codevault config set <key> <value> # Set global config value
215
+ codevault config set --local <key> <val> # Set project config value
216
+ codevault config get <key> # Get config value
217
+ codevault config list # Show merged config
218
+ codevault config list --sources # Show all config sources
219
+ codevault config unset <key> # Remove config value
220
+ codevault config path # Show config file paths
221
+
222
+ # Indexing
223
+ codevault index [path] # Index project
224
+ codevault index --provider openai # Use specific provider
225
+ codevault index --encrypt on # Enable encryption
226
+ codevault update [path] # Update existing index
227
+ codevault watch [path] # Watch for changes
228
+ codevault watch --debounce 1000 # Custom debounce interval
229
+
230
+ # Searching
231
+ codevault search <query> [path] # Search code (metadata only)
232
+ --limit <num> # Max results (default: 10)
233
+ --provider <name> # Embedding provider
234
+ --path_glob <pattern> # Filter by file pattern
235
+ --tags <tag...> # Filter by tags
236
+ --lang <language...> # Filter by language
237
+ --reranker <off|api> # Enable API reranking
238
+ --hybrid <on|off> # Hybrid search (default: on)
239
+ --bm25 <on|off> # BM25 keyword search (default: on)
240
+ --symbol_boost <on|off> # Symbol boosting (default: on)
241
+
242
+ codevault search-with-code <query> # Search with full code chunks
243
+ --max-code-size <bytes> # Max code size per chunk
244
+
245
+ # Ask Questions (LLM Synthesis)
246
+ codevault ask <question> # Ask a question, get synthesized answer
247
+ -p, --provider <name> # Embedding provider
248
+ -c, --chat-provider <name> # Chat LLM provider (auto|openai|ollama)
249
+ -k, --max-chunks <num> # Max code chunks to analyze (default: 10)
250
+ --path_glob <pattern...> # Filter by file pattern
251
+ --tags <tag...> # Filter by tags
252
+ --lang <language...> # Filter by language
253
+ --reranker <on|off> # Use API reranking (default: on)
254
+ --multi-query # Break complex questions into sub-queries
255
+ --temperature <num> # LLM temperature 0-2 (default: 0.7)
256
+ --stream # Stream response in real-time
257
+ --citations # Add citation footer
258
+ --no-metadata # Hide search metadata
259
+
260
+ # Context Packs
261
+ codevault context list # List saved contexts
262
+ codevault context show <name> # Show context pack details
263
+ codevault context use <name> # Activate context pack
264
+
265
+ # Utilities
266
+ codevault info # Project statistics
267
+ codevault mcp # Start MCP server
268
+ codevault --version # Show version
269
+ ```
270
+
271
+ ### MCP Tools
272
+
273
+ When used via MCP, CodeVault provides these tools:
274
+
275
+ - **`search_code`**: Semantic search returning metadata (paths, symbols, scores, SHAs)
276
+ - **`search_code_with_chunks`**: Search + retrieve full code for each result
277
+ - **`get_code_chunk`**: Get specific code chunk by SHA
278
+ - **`ask_codebase`**: ✨ Ask questions and get LLM-synthesized answers with code citations
279
+ - **`index_project`**: Index a new project
280
+ - **`update_project`**: Update existing index
281
+ - **`get_project_stats`**: Get project overview and statistics
282
+ - **`use_context_pack`**: Apply saved search context/scope
283
+
76
284
  ### Supported Languages
77
285
 
78
286
  - **Web**: JavaScript, TypeScript, TSX, HTML, CSS, JSON, Markdown
@@ -87,88 +295,321 @@ Add to your `claude_desktop_config.json`:
87
295
  |----------|-------|------------|---------|----------|------------------|
88
296
  | **ollama** | nomic-embed-text | 768 | 8K | Local, no API costs | ❌ No |
89
297
  | **openai** | text-embedding-3-large | 3072 | 8K | Highest quality | ✅ Yes |
90
- | **openai** | Qwen/Qwen3-Embedding-8B | 4096 | 32K | Large context, high quality | ✅ Yes (Nebius AI) |
298
+ | **openai** | text-embedding-3-small | 1536 | 8K | Faster, cheaper | ✅ Yes |
299
+ | **openai** | Qwen/Qwen3-Embedding-8B | 4096 | 32K | Large context, high quality | ✅ Yes (Nebius) |
300
+ | **custom** | Your choice | Custom | Custom | Any OpenAI-compatible API | ✅ Yes |
91
301
 
92
- ### CLI Commands
302
+ ### Environment Variables
93
303
 
94
304
  ```bash
95
- # Indexing
96
- codevault index [path] # Index project
97
- codevault update [path] # Update existing index
98
- codevault watch [path] # Watch for changes
99
-
100
- # Searching
101
- codevault search <query> [path] # Search code
102
- --limit <num> # Max results
103
- --provider <name> # Embedding provider
104
- --path_glob <pattern> # Filter by file pattern
105
- --tags <tag...> # Filter by tags
106
- --lang <language...> # Filter by language
305
+ # Embedding Provider Configuration
306
+ CODEVAULT_EMBEDDING_API_KEY=sk-... # API key for embeddings
307
+ CODEVAULT_EMBEDDING_BASE_URL=https://api.openai.com/v1 # Embedding API endpoint
308
+ CODEVAULT_EMBEDDING_MODEL=text-embedding-3-large # Embedding model name
309
+ CODEVAULT_OLLAMA_EMBEDDING_MODEL=nomic-embed-text # Ollama embedding model
107
310
 
108
- # Context Packs
109
- codevault context list # List saved contexts
110
- codevault context show <name> # Show context pack
111
- codevault context use <name> # Activate context pack
311
+ # Embedding Chunking Configuration
312
+ CODEVAULT_EMBEDDING_MAX_TOKENS=8192 # Max tokens per embedding chunk
313
+ CODEVAULT_EMBEDDING_DIMENSIONS=3072 # Embedding vector dimensions
112
314
 
113
- # Utilities
114
- codevault info # Project statistics
115
- codevault mcp # Start MCP server
116
- ```
315
+ # Embedding API Rate Limiting
316
+ CODEVAULT_EMBEDDING_RATE_LIMIT_RPM=10000 # Embedding API requests/min
317
+ CODEVAULT_EMBEDDING_RATE_LIMIT_TPM=600000 # Embedding API tokens/min
117
318
 
118
- ### MCP Tools
319
+ # Encryption
320
+ CODEVAULT_ENCRYPTION_KEY=... # 32-byte key (base64 or hex)
119
321
 
120
- - **`search_code`**: Semantic code search with filters
121
- - **`search_code_with_chunks`**: Search + retrieve full code
122
- - **`get_code_chunk`**: Get specific code by SHA
123
- - **`index_project`**: Index a new project
124
- - **`update_project`**: Update existing index
125
- - **`get_project_stats`**: Project overview
126
- - **`use_context_pack`**: Apply saved search context
322
+ # API Reranking (Optional)
323
+ # Novita Qwen3-Reranker (32K context, great for code)
324
+ CODEVAULT_RERANK_API_URL=https://api.novita.ai/openai/v1/rerank
325
+ CODEVAULT_RERANK_API_KEY=your-novita-key
326
+ CODEVAULT_RERANK_MODEL=qwen/qwen3-reranker-8b
127
327
 
128
- ### Environment Variables
328
+ # Or Cohere (4K context, $25 free credits)
329
+ # CODEVAULT_RERANK_API_URL=https://api.cohere.ai/v1/rerank
330
+ # CODEVAULT_RERANK_API_KEY=your-cohere-key
331
+ # CODEVAULT_RERANK_MODEL=rerank-english-v3.0
332
+
333
+ # Memory Management
334
+ CODEVAULT_CACHE_CLEAR_INTERVAL=3600000 # Cache cleanup interval (ms)
335
+ ```
336
+
337
+ ### Ask Questions (LLM Synthesis)
338
+
339
+ The `ask` command combines semantic search with LLM synthesis to answer natural language questions about your codebase:
129
340
 
130
341
  ```bash
131
- # OpenAI Configuration
132
- OPENAI_API_KEY=sk-...
133
- OPENAI_BASE_URL=https://api.openai.com/v1
134
- CODEVAULT_OPENAI_EMBEDDING_MODEL=text-embedding-3-large
342
+ # Basic question
343
+ codevault ask "How does authentication work in this codebase?"
135
344
 
136
- # Ollama Configuration
137
- CODEVAULT_OLLAMA_MODEL=nomic-embed-text
345
+ # With filters
346
+ codevault ask "How do I add Stripe checkout?" --tags stripe --lang php
138
347
 
139
- # Chunking
140
- CODEVAULT_MAX_TOKENS=8192
141
- CODEVAULT_DIMENSIONS=3072
348
+ # Complex question with multi-query
349
+ codevault ask "What are the main components and how do they interact?" --multi-query
142
350
 
143
- # Rate Limiting
144
- CODEVAULT_RATE_LIMIT_RPM=10000 # Requests per minute
145
- CODEVAULT_RATE_LIMIT_TPM=600000 # Tokens per minute
351
+ # Streaming response
352
+ codevault ask "Explain the database connection pooling" --stream
146
353
 
147
- # Reranking
148
- CODEVAULT_RERANK_API_URL=...
149
- CODEVAULT_RERANK_API_KEY=...
150
- CODEVAULT_RERANK_MODEL=...
354
+ # With custom settings
355
+ codevault ask "How does error handling work?" \
356
+ --chat-provider openai \
357
+ --temperature 0.5 \
358
+ --max-chunks 15 \
359
+ --reranker on
151
360
 
152
- # Encryption
153
- CODEVAULT_ENCRYPTION_KEY=...
361
+ # Using Ollama for local processing
362
+ codevault ask "What routes are available?" --chat-provider ollama
363
+ ```
364
+
365
+ **How it works:**
366
+ 1. 🔍 Searches codebase using embeddings (+ optional reranking)
367
+ 2. 📚 Retrieves top N most relevant code chunks
368
+ 3. 🤖 Sends context to LLM with structured prompt
369
+ 4. 📝 LLM synthesizes natural language answer with code citations
370
+ 5. ✨ Returns formatted markdown with file references
371
+
372
+ **Example Output:**
373
+ ```markdown
374
+ # How Authentication Works
375
+
376
+ The authentication system uses a middleware-based approach...
377
+
378
+ The main authentication flow is handled by `AuthMiddleware` in [`src/middleware/auth.ts`](src/middleware/auth.ts):
379
+
380
+ ```typescript
381
+ export function authenticate(req, res, next) {
382
+ // Token validation logic
383
+ ...
384
+ }
385
+ ```
386
+
387
+ Key components:
388
+ - **Session Management**: [`src/auth/session.ts`](src/auth/session.ts)
389
+ - **Token Validation**: [`src/auth/token.ts`](src/auth/token.ts)
390
+ - **User Model**: [`src/models/user.ts`](src/models/user.ts)
154
391
  ```
155
392
 
156
393
  ## 🏗️ Architecture
157
394
 
395
+ ### How It Works
396
+
397
+ 1. **Indexing Phase**
398
+ - Parses source files using Tree-sitter
399
+ - Extracts symbols, signatures, and relationships
400
+ - Creates semantic chunks (token-aware, with overlap)
401
+ - Batch generates embeddings (50 chunks/batch)
402
+ - Stores in SQLite + compressed chunks on disk
403
+
404
+ 2. **Search Phase**
405
+ - Generates query embedding
406
+ - Performs vector similarity search
407
+ - Runs BM25 keyword search (if enabled)
408
+ - Applies Reciprocal Rank Fusion
409
+ - Boosts results based on symbol matching
410
+ - Optionally applies API reranking
411
+ - Returns ranked results with metadata
412
+
413
+ 3. **Retrieval Phase**
414
+ - Fetches code chunks by SHA
415
+ - Decompresses and decrypts (if encrypted)
416
+ - Returns full code with context
417
+
158
418
  ### Project Structure
159
419
 
160
420
  ```
161
421
  .codevault/
162
- ├── codevault.db # SQLite database
163
- ├── chunks/ # Compressed code chunks
164
- └── contextpacks/ # Saved search contexts
165
- codevault.codemap.json # Lightweight index
422
+ ├── codevault.db # SQLite: embeddings + metadata
423
+ ├── chunks/ # Compressed code chunks
424
+ │ ├── <sha>.gz # Plain compressed
425
+ │ └── <sha>.gz.enc # Encrypted compressed
426
+ └── contextpacks/ # Saved search contexts
427
+ └── feature-auth.json # Example context pack
428
+
429
+ codevault.codemap.json # Lightweight index (symbol graph)
430
+
431
+ ~/.codevault/ # Global CLI configuration
432
+ └── config.json # User-wide settings
433
+ ```
434
+
435
+ ### Advanced Features
436
+
437
+ #### Batch Processing
438
+
439
+ Embeddings are generated in batches of 50 for optimal API efficiency:
440
+
441
+ ```typescript
442
+ // Automatic batching - no configuration needed
443
+ // Processes 50 chunks per API call
444
+ // Falls back to individual processing on error
445
+ ```
446
+
447
+ #### Smart Chunking
448
+
449
+ Token-aware semantic chunking with configurable limits:
450
+
451
+ - Respects function/class boundaries
452
+ - Applies overlap for context continuity
453
+ - Subdivides large functions intelligently
454
+ - Merges small chunks when beneficial
455
+
456
+ #### Symbol-Aware Ranking
457
+
458
+ Boosts search results based on:
459
+ - Exact symbol name matches
460
+ - Function signature matches
461
+ - Parameter name matches
462
+ - Symbol neighbor relationships (calls, imports)
463
+
464
+ #### Hybrid Search
465
+
466
+ Combines multiple ranking signals:
467
+ - Vector similarity (semantic understanding)
468
+ - BM25 keyword matching (exact term matches)
469
+ - Symbol boost (code structure awareness)
470
+ - Reciprocal Rank Fusion (combines rankings)
471
+
472
+ #### Context Packs
473
+
474
+ Save search scopes for reuse:
475
+
476
+ ```json
477
+ {
478
+ "key": "feature-auth",
479
+ "name": "Authentication Feature",
480
+ "description": "Login, signup, password reset",
481
+ "scope": {
482
+ "path_glob": ["src/auth/**", "src/middleware/auth.ts"],
483
+ "tags": ["auth", "security"],
484
+ "lang": ["typescript", "javascript"]
485
+ }
486
+ }
487
+ ```
488
+
489
+ Usage:
490
+ ```bash
491
+ codevault context use feature-auth
492
+ codevault search "token validation" # Scoped to auth files
493
+ ```
494
+
495
+ #### File Watching
496
+
497
+ Real-time index updates with intelligent debouncing:
498
+
499
+ ```bash
500
+ codevault watch --debounce 500
166
501
  ```
167
502
 
503
+ - Detects file changes, additions, deletions
504
+ - Batches rapid changes (debouncing)
505
+ - Updates only affected chunks
506
+ - Preserves index consistency
507
+
508
+ #### Encryption
509
+
510
+ AES-256-GCM encryption for code chunks:
511
+
512
+ ```bash
513
+ # Generate secure key
514
+ export CODEVAULT_ENCRYPTION_KEY=$(openssl rand -base64 32)
515
+
516
+ # Index with encryption
517
+ codevault index --encrypt on
518
+
519
+ # Files stored as .gz.enc instead of .gz
520
+ # Automatic decryption on read (requires key)
521
+ ```
522
+
523
+ ## 🔧 Performance & Optimization
524
+
525
+ ### Memory Management
526
+
527
+ - LRU caches with automatic eviction
528
+ - Periodic cache cleanup (configurable interval)
529
+ - Graceful shutdown handlers for MCP server
530
+ - Token counter caching for repeated operations
531
+
532
+ ### Rate Limiting
533
+
534
+ Intelligent throttling prevents API errors:
535
+
536
+ - Configurable RPM (requests per minute)
537
+ - Configurable TPM (tokens per minute)
538
+ - Automatic retry with exponential backoff
539
+ - Queue size limits prevent memory exhaustion
540
+
541
+ ### Batch Efficiency
542
+
543
+ - 50 chunks per embedding API call (vs 1 per call)
544
+ - Reduces API overhead by ~98%
545
+ - Automatic fallback for failed batches
546
+ - Preserves partial progress on errors
547
+
548
+ ## 🐛 Troubleshooting
549
+
550
+ ### Common Issues
551
+
552
+ **"Which config is being used?"**
553
+ ```bash
554
+ codevault config list --sources
555
+ ```
556
+
557
+ **"MCP not using my global config"**
558
+
559
+ This is correct! MCP uses environment variables by design. Global config is for CLI convenience only.
560
+
561
+ **"Rate limit errors"**
562
+ ```bash
563
+ # Reduce rate limits
564
+ codevault config set rateLimit.rpm 100
565
+ codevault config set rateLimit.tpm 10000
566
+ ```
567
+
568
+ **"Out of memory during indexing"**
569
+ ```bash
570
+ # Reduce batch size via environment
571
+ export BATCH_SIZE=25
572
+ codevault index
573
+ ```
574
+
575
+ **"Encryption key errors"**
576
+ ```bash
577
+ # Generate valid key (32 bytes)
578
+ export CODEVAULT_ENCRYPTION_KEY=$(openssl rand -base64 32)
579
+ ```
580
+
581
+ ## 🤝 Contributing
582
+
583
+ Contributions welcome! Please:
584
+
585
+ 1. Fork the repository
586
+ 2. Create a feature branch
587
+ 3. Make your changes
588
+ 4. Add tests if applicable
589
+ 5. Submit a pull request
590
+
168
591
  ## 📄 License
169
592
 
170
- MIT License
593
+ MIT License - see [LICENSE](LICENSE) file for details.
594
+
595
+ ## 🔗 Links
596
+
597
+ - **GitHub**: https://github.com/shariqriazz/codevault
598
+ - **NPM**: https://www.npmjs.com/package/codevault
599
+ - **Issues**: https://github.com/shariqriazz/codevault/issues
600
+ - **Configuration Guide**: [CONFIGURATION.md](CONFIGURATION.md)
601
+ - **Ask Feature Guide**: [ASK_FEATURE.md](ASK_FEATURE.md)
602
+
603
+ ## 🙏 Acknowledgments
604
+
605
+ Built with:
606
+ - [Model Context Protocol](https://modelcontextprotocol.io/) - AI integration framework
607
+ - [Tree-sitter](https://tree-sitter.github.io/) - Parsing infrastructure
608
+ - [OpenAI](https://openai.com/) - Embedding models
609
+ - [Ollama](https://ollama.ai/) - Local model support
171
610
 
172
611
  ---
173
612
 
174
- **Built by Shariq Riaz**
613
+ **Version**: 1.3.0-beta.7
614
+ **Built by**: Shariq Riaz
615
+ **Last Updated**: January 2025
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare function registerAskCommand(program: Command): void;
3
+ //# sourceMappingURL=ask-cmd.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ask-cmd.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/ask-cmd.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAYpC,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CA0HzD"}