@kb-labs/mind-vector-store 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +496 -0
- package/dist/index.d.ts +47 -0
- package/dist/index.js +127 -0
- package/dist/index.js.map +1 -0
- package/package.json +49 -0
package/README.md
ADDED
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
# @kb-labs/mind-vector-store
|
|
2
|
+
|
|
3
|
+
**Vector storage abstraction for KB Labs Mind system.**
|
|
4
|
+
|
|
5
|
+
Unified interface for storing and searching vector embeddings, providing Qdrant integration with in-memory fallback for development and testing.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **💾 Multiple Backends** - Qdrant, in-memory, file-based storage
|
|
10
|
+
- **🔍 Semantic Search** - Fast vector similarity search
|
|
11
|
+
- **🔄 Graceful Fallback** - Automatic fallback to memory store
|
|
12
|
+
- **📊 Metadata Filtering** - Filter by file type, language, repository
|
|
13
|
+
- **⚡ Batch Operations** - Efficient bulk insert/update/delete
|
|
14
|
+
- **✅ Integrity Checks** - Verify store consistency
|
|
15
|
+
- **📈 Statistics** - Track store size and performance
|
|
16
|
+
- **🎯 Collection Management** - Multiple isolated collections
|
|
17
|
+
|
|
18
|
+
## Architecture
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
mind-vector-store/
|
|
22
|
+
├── src/
|
|
23
|
+
│ ├── index.ts # Main exports
|
|
24
|
+
│ ├── stores/ # Vector store implementations
|
|
25
|
+
│ │ ├── qdrant-store.ts # Qdrant vector store (production)
|
|
26
|
+
│ │ ├── memory-store.ts # In-memory store (dev/test)
|
|
27
|
+
│ │ └── file-store.ts # File-based store (optional)
|
|
28
|
+
│ ├── store-factory.ts # Factory pattern
|
|
29
|
+
│ └── types.ts # Store interfaces
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Usage
|
|
33
|
+
|
|
34
|
+
### Creating Vector Store
|
|
35
|
+
|
|
36
|
+
```typescript
|
|
37
|
+
import { usePlatform } from '@kb-labs/sdk';
|
|
38
|
+
|
|
39
|
+
// Get platform vector store (recommended - uses singleton)
|
|
40
|
+
const platform = usePlatform();
|
|
41
|
+
const vectorStore = platform.getVectorStore();
|
|
42
|
+
|
|
43
|
+
// Platform automatically provides the right implementation:
|
|
44
|
+
// - Qdrant in production (if configured)
|
|
45
|
+
// - In-memory for development/testing
|
|
46
|
+
|
|
47
|
+
// Manual creation (only if you need custom config)
|
|
48
|
+
import { QdrantVectorStore, MemoryVectorStore } from '@kb-labs/sdk';
|
|
49
|
+
|
|
50
|
+
const qdrantStore = new QdrantVectorStore({
|
|
51
|
+
url: process.env.QDRANT_URL || 'http://localhost:6333',
|
|
52
|
+
collection: 'mind-default',
|
|
53
|
+
dimensions: 1536,
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const memoryStore = new MemoryVectorStore({
|
|
57
|
+
dimensions: 1536,
|
|
58
|
+
});
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Inserting Vectors
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
import type { IndexedChunk } from '@kb-labs/sdk';
|
|
65
|
+
|
|
66
|
+
const chunks: IndexedChunk[] = [
|
|
67
|
+
{
|
|
68
|
+
id: 'chunk-1',
|
|
69
|
+
content: 'JWT token validation in middleware',
|
|
70
|
+
embedding: [0.1, -0.3, 0.5, ...], // 1536-dim vector
|
|
71
|
+
metadata: {
|
|
72
|
+
path: 'src/auth/middleware.ts',
|
|
73
|
+
language: 'typescript',
|
|
74
|
+
repository: 'my-project',
|
|
75
|
+
startLine: 42,
|
|
76
|
+
endLine: 58,
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
await vectorStore.upsert(chunks);
|
|
82
|
+
console.log('Inserted', chunks.length, 'chunks');
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Searching Vectors
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
const queryEmbedding = [0.2, -0.1, 0.4, ...]; // From embedding provider
|
|
89
|
+
|
|
90
|
+
const results = await vectorStore.search({
|
|
91
|
+
vector: queryEmbedding,
|
|
92
|
+
limit: 10,
|
|
93
|
+
threshold: 0.7, // Min similarity score
|
|
94
|
+
filter: {
|
|
95
|
+
language: 'typescript',
|
|
96
|
+
repository: 'my-project',
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
console.log('Found', results.length, 'results');
|
|
101
|
+
results.forEach(result => {
|
|
102
|
+
console.log(`[${result.score.toFixed(2)}] ${result.chunk.metadata.path}`);
|
|
103
|
+
});
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Deleting Vectors
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
// Delete specific chunks
|
|
110
|
+
await vectorStore.delete(['chunk-1', 'chunk-2']);
|
|
111
|
+
|
|
112
|
+
// Delete by filter
|
|
113
|
+
await vectorStore.deleteByFilter({
|
|
114
|
+
repository: 'old-project',
|
|
115
|
+
});
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Vector Stores
|
|
119
|
+
|
|
120
|
+
### Qdrant Store
|
|
121
|
+
|
|
122
|
+
**Qdrant** is a high-performance vector database optimized for similarity search.
|
|
123
|
+
|
|
124
|
+
**Features:**
|
|
125
|
+
- ✅ Fast search (milliseconds)
|
|
126
|
+
- ✅ HNSW indexing for efficiency
|
|
127
|
+
- ✅ Metadata filtering
|
|
128
|
+
- ✅ Persistence
|
|
129
|
+
- ✅ Horizontal scaling
|
|
130
|
+
|
|
131
|
+
**Configuration:**
|
|
132
|
+
```typescript
|
|
133
|
+
{
|
|
134
|
+
type: 'qdrant',
|
|
135
|
+
url: 'http://localhost:6333', // Qdrant server URL
|
|
136
|
+
apiKey: process.env.QDRANT_API_KEY, // Optional (for cloud)
|
|
137
|
+
collection: 'mind-default', // Collection name
|
|
138
|
+
dimensions: 1536, // Embedding dimensions
|
|
139
|
+
distance: 'cosine', // 'cosine' | 'euclidean' | 'dot'
|
|
140
|
+
indexConfig: {
|
|
141
|
+
type: 'hnsw', // HNSW indexing
|
|
142
|
+
m: 16, // Number of edges per node
|
|
143
|
+
efConstruct: 100, // Construction time quality
|
|
144
|
+
},
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
**Setup (Local):**
|
|
149
|
+
```bash
|
|
150
|
+
# Docker
|
|
151
|
+
docker run -p 6333:6333 qdrant/qdrant
|
|
152
|
+
|
|
153
|
+
# Or Docker Compose
|
|
154
|
+
docker-compose up qdrant
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Setup (Cloud):**
|
|
158
|
+
```bash
|
|
159
|
+
# Qdrant Cloud (qdrant.io)
|
|
160
|
+
export QDRANT_URL=https://your-cluster.qdrant.io
|
|
161
|
+
export QDRANT_API_KEY=your-api-key
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Memory Store
|
|
165
|
+
|
|
166
|
+
**In-memory vector store** for development and testing.
|
|
167
|
+
|
|
168
|
+
**Features:**
|
|
169
|
+
- ✅ Instant setup (no server)
|
|
170
|
+
- ✅ Fast for small datasets (<10K vectors)
|
|
171
|
+
- ✅ Good for tests
|
|
172
|
+
- ❌ No persistence (lost on restart)
|
|
173
|
+
- ❌ Limited scalability
|
|
174
|
+
|
|
175
|
+
**Configuration:**
|
|
176
|
+
```typescript
|
|
177
|
+
{
|
|
178
|
+
type: 'memory',
|
|
179
|
+
dimensions: 1536,
|
|
180
|
+
distance: 'cosine', // 'cosine' | 'euclidean' | 'dot'
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Use cases:**
|
|
185
|
+
- Unit tests
|
|
186
|
+
- Local development without Qdrant
|
|
187
|
+
- CI/CD pipelines
|
|
188
|
+
- Quick prototyping
|
|
189
|
+
|
|
190
|
+
### File Store
|
|
191
|
+
|
|
192
|
+
**File-based vector store** for persistent local storage.
|
|
193
|
+
|
|
194
|
+
**Features:**
|
|
195
|
+
- ✅ Persistent (survives restarts)
|
|
196
|
+
- ✅ No server needed
|
|
197
|
+
- ✅ Good for small-medium datasets (<100K vectors)
|
|
198
|
+
- ❌ Slower than Qdrant
|
|
199
|
+
- ❌ No horizontal scaling
|
|
200
|
+
|
|
201
|
+
**Configuration:**
|
|
202
|
+
```typescript
|
|
203
|
+
{
|
|
204
|
+
type: 'file',
|
|
205
|
+
path: '.kb/mind/vectors',
|
|
206
|
+
dimensions: 1536,
|
|
207
|
+
distance: 'cosine',
|
|
208
|
+
compression: true, // Compress on disk
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
**Storage format:**
|
|
213
|
+
```
|
|
214
|
+
.kb/mind/vectors/
|
|
215
|
+
├── metadata.json # Store metadata
|
|
216
|
+
├── vectors.bin # Binary vector data
|
|
217
|
+
└── index.json # Index for fast lookup
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Advanced Features
|
|
221
|
+
|
|
222
|
+
### Metadata Filtering
|
|
223
|
+
|
|
224
|
+
Filter search results by metadata:
|
|
225
|
+
|
|
226
|
+
```typescript
|
|
227
|
+
const results = await vectorStore.search({
|
|
228
|
+
vector: queryEmbedding,
|
|
229
|
+
limit: 20,
|
|
230
|
+
filter: {
|
|
231
|
+
// Exact match
|
|
232
|
+
language: 'typescript',
|
|
233
|
+
repository: 'my-project',
|
|
234
|
+
|
|
235
|
+
// Range (if supported)
|
|
236
|
+
startLine: { gte: 100, lte: 200 },
|
|
237
|
+
|
|
238
|
+
// Multiple values (OR)
|
|
239
|
+
fileType: ['ts', 'tsx'],
|
|
240
|
+
},
|
|
241
|
+
});
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Batch Operations
|
|
245
|
+
|
|
246
|
+
Efficient bulk operations:
|
|
247
|
+
|
|
248
|
+
```typescript
|
|
249
|
+
// Batch insert (up to 1000 chunks)
|
|
250
|
+
const chunks = [/* ... 1000 chunks ... */];
|
|
251
|
+
await vectorStore.upsertBatch(chunks, { batchSize: 100 });
|
|
252
|
+
|
|
253
|
+
// Batch search (multiple queries)
|
|
254
|
+
const queries = [emb1, emb2, emb3];
|
|
255
|
+
const allResults = await vectorStore.searchBatch(queries, { limit: 10 });
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Integrity Checks
|
|
259
|
+
|
|
260
|
+
Verify store consistency:
|
|
261
|
+
|
|
262
|
+
```typescript
|
|
263
|
+
const report = await vectorStore.verify();
|
|
264
|
+
|
|
265
|
+
console.log('Store status:', report.ok ? 'OK' : 'ERRORS');
|
|
266
|
+
console.log('Total vectors:', report.stats.totalVectors);
|
|
267
|
+
console.log('Inconsistencies:', report.inconsistencies);
|
|
268
|
+
|
|
269
|
+
if (!report.ok) {
|
|
270
|
+
report.inconsistencies.forEach(issue => {
|
|
271
|
+
console.error(`[${issue.code}] ${issue.message}`);
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
**Checks performed:**
|
|
277
|
+
- ✅ Dimension consistency
|
|
278
|
+
- ✅ Missing vectors
|
|
279
|
+
- ✅ Duplicate IDs
|
|
280
|
+
- ✅ Corrupt embeddings
|
|
281
|
+
- ✅ Metadata integrity
|
|
282
|
+
|
|
283
|
+
### Statistics
|
|
284
|
+
|
|
285
|
+
Get store statistics:
|
|
286
|
+
|
|
287
|
+
```typescript
|
|
288
|
+
const stats = await vectorStore.getStats();
|
|
289
|
+
|
|
290
|
+
console.log('Total vectors:', stats.totalVectors);
|
|
291
|
+
console.log('Total collections:', stats.collectionCount);
|
|
292
|
+
console.log('Memory usage:', stats.memorySizeMB, 'MB');
|
|
293
|
+
console.log('Disk usage:', stats.diskSizeMB, 'MB');
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Collection Management
|
|
297
|
+
|
|
298
|
+
Manage multiple isolated collections:
|
|
299
|
+
|
|
300
|
+
```typescript
|
|
301
|
+
// Create collection
|
|
302
|
+
await vectorStore.createCollection('my-project', {
|
|
303
|
+
dimensions: 1536,
|
|
304
|
+
distance: 'cosine',
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
// List collections
|
|
308
|
+
const collections = await vectorStore.listCollections();
|
|
309
|
+
console.log('Collections:', collections);
|
|
310
|
+
|
|
311
|
+
// Delete collection
|
|
312
|
+
await vectorStore.deleteCollection('old-project');
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## Performance
|
|
316
|
+
|
|
317
|
+
### Benchmark (100K vectors, 1536 dims)
|
|
318
|
+
|
|
319
|
+
| Operation | Qdrant | Memory | File |
|
|
320
|
+
|-----------|--------|--------|------|
|
|
321
|
+
| Insert (1K vectors) | ~500ms | ~100ms | ~2s |
|
|
322
|
+
| Search (top 10) | ~5ms | ~50ms | ~100ms |
|
|
323
|
+
| Batch search (100 queries) | ~200ms | ~2s | ~5s |
|
|
324
|
+
|
|
325
|
+
### Optimization Tips
|
|
326
|
+
|
|
327
|
+
1. **Use Qdrant for production** - Fastest and most scalable
|
|
328
|
+
2. **Batch operations** - 10-100x faster than individual ops
|
|
329
|
+
3. **Tune HNSW parameters** - Adjust `m` and `efConstruct` for speed/quality tradeoff
|
|
330
|
+
4. **Filter carefully** - Metadata filters can slow search
|
|
331
|
+
5. **Use appropriate distance** - Cosine for normalized vectors, dot product for raw
|
|
332
|
+
|
|
333
|
+
## Configuration
|
|
334
|
+
|
|
335
|
+
### Environment Variables
|
|
336
|
+
|
|
337
|
+
```bash
|
|
338
|
+
# Qdrant
|
|
339
|
+
export QDRANT_URL=http://localhost:6333
|
|
340
|
+
export QDRANT_API_KEY=your-api-key
|
|
341
|
+
|
|
342
|
+
# Default settings
|
|
343
|
+
export VECTOR_STORE_TYPE=qdrant
|
|
344
|
+
export VECTOR_STORE_COLLECTION=mind-default
|
|
345
|
+
export VECTOR_DIMENSIONS=1536
|
|
346
|
+
export VECTOR_DISTANCE=cosine
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
### Store Selection with Fallback
|
|
350
|
+
|
|
351
|
+
```typescript
|
|
352
|
+
async function createStoreWithFallback() {
|
|
353
|
+
try {
|
|
354
|
+
// Try Qdrant first
|
|
355
|
+
return createVectorStore({
|
|
356
|
+
type: 'qdrant',
|
|
357
|
+
url: process.env.QDRANT_URL,
|
|
358
|
+
});
|
|
359
|
+
} catch {
|
|
360
|
+
console.warn('Qdrant unavailable, using memory store');
|
|
361
|
+
return createVectorStore({
|
|
362
|
+
type: 'memory',
|
|
363
|
+
dimensions: 1536,
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
## Dependencies
|
|
370
|
+
|
|
371
|
+
```json
|
|
372
|
+
{
|
|
373
|
+
"dependencies": {
|
|
374
|
+
"@kb-labs/sdk": "^1.0.0",
|
|
375
|
+
"@qdrant/js-client-rest": "^1.7.0"
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Testing
|
|
381
|
+
|
|
382
|
+
```bash
|
|
383
|
+
# Run unit tests (uses memory store)
|
|
384
|
+
pnpm test
|
|
385
|
+
|
|
386
|
+
# Test with real Qdrant
|
|
387
|
+
QDRANT_URL=http://localhost:6333 pnpm test:integration
|
|
388
|
+
|
|
389
|
+
# Benchmark
|
|
390
|
+
pnpm test:benchmark
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
## Development
|
|
394
|
+
|
|
395
|
+
### Build
|
|
396
|
+
|
|
397
|
+
```bash
|
|
398
|
+
pnpm build
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
### Watch Mode
|
|
402
|
+
|
|
403
|
+
```bash
|
|
404
|
+
pnpm dev
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
### Type Check
|
|
408
|
+
|
|
409
|
+
```bash
|
|
410
|
+
pnpm typecheck
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
## Best Practices
|
|
414
|
+
|
|
415
|
+
**DO ✅:**
|
|
416
|
+
- **Use Qdrant in production** - Best performance and scalability
|
|
417
|
+
- **Validate dimensions** - Ensure all vectors same size
|
|
418
|
+
- **Batch operations** - Much faster than individual ops
|
|
419
|
+
- **Use memory store for tests** - Fast, no setup needed
|
|
420
|
+
- **Filter efficiently** - Index metadata fields used in filters
|
|
421
|
+
- **Run integrity checks** - Verify store consistency regularly
|
|
422
|
+
|
|
423
|
+
**DON'T ❌:**
|
|
424
|
+
- **Use memory store in production** - No persistence, limited scale
|
|
425
|
+
- **Mix dimensions** - All vectors must be same size
|
|
426
|
+
- **Skip batch operations** - 10-100x slower individually
|
|
427
|
+
- **Ignore errors** - Handle store failures gracefully
|
|
428
|
+
- **Over-filter** - Each filter slows search
|
|
429
|
+
|
|
430
|
+
## Related Packages
|
|
431
|
+
|
|
432
|
+
- **@kb-labs/mind-engine** - Uses vector store for semantic search
|
|
433
|
+
- **@kb-labs/mind-embeddings** - Generates embeddings for storage
|
|
434
|
+
|
|
435
|
+
## Examples
|
|
436
|
+
|
|
437
|
+
### Example: Semantic Code Search
|
|
438
|
+
|
|
439
|
+
```typescript
|
|
440
|
+
import { createVectorStore, createEmbeddingProvider } from '@kb-labs/sdk';
|
|
441
|
+
|
|
442
|
+
// Setup
|
|
443
|
+
const embedder = createEmbeddingProvider({ type: 'openai' });
|
|
444
|
+
const store = createVectorStore({
|
|
445
|
+
type: 'qdrant',
|
|
446
|
+
url: 'http://localhost:6333',
|
|
447
|
+
collection: 'my-codebase',
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
// Index code
|
|
451
|
+
const codeChunks = [
|
|
452
|
+
{ id: '1', content: 'JWT validation middleware', path: 'src/auth.ts' },
|
|
453
|
+
{ id: '2', content: 'User login handler', path: 'src/login.ts' },
|
|
454
|
+
{ id: '3', content: 'Database connection pool', path: 'src/db.ts' },
|
|
455
|
+
];
|
|
456
|
+
|
|
457
|
+
for (const chunk of codeChunks) {
|
|
458
|
+
const embedding = await embedder.embed(chunk.content);
|
|
459
|
+
await store.upsert([{
|
|
460
|
+
id: chunk.id,
|
|
461
|
+
content: chunk.content,
|
|
462
|
+
embedding,
|
|
463
|
+
metadata: { path: chunk.path },
|
|
464
|
+
}]);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Search
|
|
468
|
+
const queryEmb = await embedder.embed('authentication implementation');
|
|
469
|
+
const results = await store.search({
|
|
470
|
+
vector: queryEmb,
|
|
471
|
+
limit: 3,
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
console.log('Top results:');
|
|
475
|
+
results.forEach(r => {
|
|
476
|
+
console.log(`[${r.score.toFixed(2)}] ${r.chunk.metadata.path}`);
|
|
477
|
+
});
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
## License
|
|
481
|
+
|
|
482
|
+
Private - KB Labs internal use only.
|
|
483
|
+
|
|
484
|
+
## Support
|
|
485
|
+
|
|
486
|
+
For questions, check:
|
|
487
|
+
- [Mind Engine README](../mind-engine/README.md)
|
|
488
|
+
- [Mind Embeddings README](../mind-embeddings/README.md)
|
|
489
|
+
- [CLAUDE.md](../../CLAUDE.md) - Development guide
|
|
490
|
+
- [Qdrant Documentation](https://qdrant.tech/documentation/)
|
|
491
|
+
|
|
492
|
+
---
|
|
493
|
+
|
|
494
|
+
**Last Updated**: 2025-12-09
|
|
495
|
+
**Version**: 0.1.0
|
|
496
|
+
**Status**: 🟡 SDK Migration Pending (Phase 3)
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
interface EmbeddingVector {
|
|
2
|
+
dim: number;
|
|
3
|
+
values: number[];
|
|
4
|
+
}
|
|
5
|
+
interface SpanRange {
|
|
6
|
+
startLine: number;
|
|
7
|
+
endLine: number;
|
|
8
|
+
}
|
|
9
|
+
interface StoredMindChunk {
|
|
10
|
+
chunkId: string;
|
|
11
|
+
scopeId: string;
|
|
12
|
+
sourceId: string;
|
|
13
|
+
path: string;
|
|
14
|
+
span: SpanRange;
|
|
15
|
+
text: string;
|
|
16
|
+
metadata?: Record<string, unknown>;
|
|
17
|
+
embedding: EmbeddingVector;
|
|
18
|
+
}
|
|
19
|
+
interface FileMetadata {
|
|
20
|
+
path: string;
|
|
21
|
+
mtime: number;
|
|
22
|
+
hash: string;
|
|
23
|
+
}
|
|
24
|
+
interface MindVectorStoreOptions {
|
|
25
|
+
indexDir: string;
|
|
26
|
+
}
|
|
27
|
+
interface VectorSearchFilters {
|
|
28
|
+
sourceIds?: Set<string>;
|
|
29
|
+
pathMatcher?: (filePath: string) => boolean;
|
|
30
|
+
}
|
|
31
|
+
interface VectorSearchMatch {
|
|
32
|
+
chunk: StoredMindChunk;
|
|
33
|
+
score: number;
|
|
34
|
+
}
|
|
35
|
+
declare class MindVectorStore {
|
|
36
|
+
private readonly options;
|
|
37
|
+
private readonly cache;
|
|
38
|
+
constructor(options: MindVectorStoreOptions);
|
|
39
|
+
replaceScope(scopeId: string, chunks: StoredMindChunk[]): Promise<void>;
|
|
40
|
+
scopeExists(scopeId: string): Promise<boolean>;
|
|
41
|
+
updateScope(scopeId: string, chunks: StoredMindChunk[], fileMetadata?: Map<string, FileMetadata>): Promise<void>;
|
|
42
|
+
search(scopeId: string, vector: EmbeddingVector, limit: number, filters?: VectorSearchFilters): Promise<VectorSearchMatch[]>;
|
|
43
|
+
private loadScope;
|
|
44
|
+
private getScopePath;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export { type EmbeddingVector, type FileMetadata, MindVectorStore, type MindVectorStoreOptions, type SpanRange, type StoredMindChunk, type VectorSearchFilters, type VectorSearchMatch };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import fs from 'fs-extra';
|
|
3
|
+
import { cosineSimilarity as cosineSimilarity$1 } from '@kb-labs/mind-core';
|
|
4
|
+
|
|
5
|
+
// src/index.ts
|
|
6
|
+
var MindVectorStore = class {
|
|
7
|
+
options;
|
|
8
|
+
cache = /* @__PURE__ */ new Map();
|
|
9
|
+
constructor(options) {
|
|
10
|
+
this.options = options;
|
|
11
|
+
}
|
|
12
|
+
async replaceScope(scopeId, chunks) {
|
|
13
|
+
this.cache.set(scopeId, chunks);
|
|
14
|
+
await fs.ensureDir(this.options.indexDir);
|
|
15
|
+
const filePath = this.getScopePath(scopeId);
|
|
16
|
+
const payload = {
|
|
17
|
+
scopeId,
|
|
18
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
19
|
+
chunks
|
|
20
|
+
};
|
|
21
|
+
await fs.writeJson(filePath, payload, { spaces: 2 });
|
|
22
|
+
}
|
|
23
|
+
async scopeExists(scopeId) {
|
|
24
|
+
const filePath = this.getScopePath(scopeId);
|
|
25
|
+
return fs.pathExists(filePath);
|
|
26
|
+
}
|
|
27
|
+
async updateScope(scopeId, chunks, fileMetadata) {
|
|
28
|
+
if (!fileMetadata || fileMetadata.size === 0) {
|
|
29
|
+
return this.replaceScope(scopeId, chunks);
|
|
30
|
+
}
|
|
31
|
+
const existingChunks = await this.loadScope(scopeId);
|
|
32
|
+
const existingFiles = /* @__PURE__ */ new Map();
|
|
33
|
+
for (const chunk of existingChunks) {
|
|
34
|
+
const existingMeta = chunk.metadata;
|
|
35
|
+
if (existingMeta?.fileHash && existingMeta?.fileMtime) {
|
|
36
|
+
const currentMeta = existingFiles.get(chunk.path);
|
|
37
|
+
if (!currentMeta || existingMeta.fileMtime > (currentMeta.mtime ?? 0)) {
|
|
38
|
+
existingFiles.set(chunk.path, {
|
|
39
|
+
path: chunk.path,
|
|
40
|
+
mtime: existingMeta.fileMtime,
|
|
41
|
+
hash: existingMeta.fileHash
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
const changedFiles = /* @__PURE__ */ new Set();
|
|
47
|
+
const deletedFiles = /* @__PURE__ */ new Set();
|
|
48
|
+
for (const [path2, newMeta] of fileMetadata.entries()) {
|
|
49
|
+
const existingMeta = existingFiles.get(path2);
|
|
50
|
+
if (!existingMeta || existingMeta.hash !== newMeta.hash || existingMeta.mtime !== newMeta.mtime) {
|
|
51
|
+
changedFiles.add(path2);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
for (const path2 of existingFiles.keys()) {
|
|
55
|
+
if (!fileMetadata.has(path2)) {
|
|
56
|
+
deletedFiles.add(path2);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (changedFiles.size + deletedFiles.size >= existingFiles.size * 0.8) {
|
|
60
|
+
return this.replaceScope(scopeId, chunks);
|
|
61
|
+
}
|
|
62
|
+
const unchangedChunks = existingChunks.filter(
|
|
63
|
+
(chunk) => !deletedFiles.has(chunk.path) && !changedFiles.has(chunk.path)
|
|
64
|
+
);
|
|
65
|
+
const newChunks = chunks.filter((chunk) => changedFiles.has(chunk.path));
|
|
66
|
+
const updatedChunks = [...unchangedChunks, ...newChunks];
|
|
67
|
+
this.cache.set(scopeId, updatedChunks);
|
|
68
|
+
await fs.ensureDir(this.options.indexDir);
|
|
69
|
+
const filePath = this.getScopePath(scopeId);
|
|
70
|
+
const payload = {
|
|
71
|
+
scopeId,
|
|
72
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
73
|
+
chunks: updatedChunks
|
|
74
|
+
};
|
|
75
|
+
await fs.writeJson(filePath, payload, { spaces: 2 });
|
|
76
|
+
}
|
|
77
|
+
async search(scopeId, vector, limit, filters) {
|
|
78
|
+
const records = await this.loadScope(scopeId);
|
|
79
|
+
if (records.length === 0) {
|
|
80
|
+
return [];
|
|
81
|
+
}
|
|
82
|
+
return records.filter((chunk) => applyFilters(chunk, filters)).map((chunk) => ({
|
|
83
|
+
chunk,
|
|
84
|
+
score: cosineSimilarity(vector, chunk.embedding)
|
|
85
|
+
})).filter((match) => Number.isFinite(match.score)).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
86
|
+
}
|
|
87
|
+
async loadScope(scopeId) {
|
|
88
|
+
const cached = this.cache.get(scopeId);
|
|
89
|
+
if (cached) {
|
|
90
|
+
return cached;
|
|
91
|
+
}
|
|
92
|
+
const filePath = this.getScopePath(scopeId);
|
|
93
|
+
if (!await fs.pathExists(filePath)) {
|
|
94
|
+
this.cache.set(scopeId, []);
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
const payload = await fs.readJson(filePath);
|
|
98
|
+
this.cache.set(scopeId, payload.chunks);
|
|
99
|
+
return payload.chunks;
|
|
100
|
+
}
|
|
101
|
+
getScopePath(scopeId) {
|
|
102
|
+
const safeId = scopeId.replace(/[\\/]/g, "_");
|
|
103
|
+
return path.join(this.options.indexDir, `${safeId}.json`);
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
function applyFilters(chunk, filters) {
|
|
107
|
+
if (!filters) {
|
|
108
|
+
return true;
|
|
109
|
+
}
|
|
110
|
+
if (filters.sourceIds?.size && !filters.sourceIds.has(chunk.sourceId)) {
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
if (filters.pathMatcher && !filters.pathMatcher(chunk.path)) {
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
118
|
+
function cosineSimilarity(a, b) {
|
|
119
|
+
if (a.dim !== b.dim) {
|
|
120
|
+
return 0;
|
|
121
|
+
}
|
|
122
|
+
return cosineSimilarity$1(a.values, b.values);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export { MindVectorStore };
|
|
126
|
+
//# sourceMappingURL=index.js.map
|
|
127
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"names":["path","calculateCosineSimilarity"],"mappings":";;;;;AAmDO,IAAM,kBAAN,MAAsB;AAAA,EACV,OAAA;AAAA,EACA,KAAA,uBAAY,GAAA,EAA+B;AAAA,EAE5D,YAAY,OAAA,EAAiC;AAC3C,IAAA,IAAA,CAAK,OAAA,GAAU,OAAA;AAAA,EACjB;AAAA,EAEA,MAAM,YAAA,CACJ,OAAA,EACA,MAAA,EACe;AACf,IAAA,IAAA,CAAK,KAAA,CAAM,GAAA,CAAI,OAAA,EAAS,MAAM,CAAA;AAC9B,IAAA,MAAM,EAAA,CAAG,SAAA,CAAU,IAAA,CAAK,OAAA,CAAQ,QAAQ,CAAA;AACxC,IAAA,MAAM,QAAA,GAAW,IAAA,CAAK,YAAA,CAAa,OAAO,CAAA;AAC1C,IAAA,MAAM,OAAA,GAA0B;AAAA,MAC9B,OAAA;AAAA,MACA,WAAA,EAAA,iBAAa,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,MACpC;AAAA,KACF;AACA,IAAA,MAAM,GAAG,SAAA,CAAU,QAAA,EAAU,SAAS,EAAE,MAAA,EAAQ,GAAG,CAAA;AAAA,EACrD;AAAA,EAEA,MAAM,YAAY,OAAA,EAAmC;AACnD,IAAA,MAAM,QAAA,GAAW,IAAA,CAAK,YAAA,CAAa,OAAO,CAAA;AAC1C,IAAA,OAAO,EAAA,CAAG,WAAW,QAAQ,CAAA;AAAA,EAC/B;AAAA,EAEA,MAAM,WAAA,CACJ,OAAA,EACA,MAAA,EACA,YAAA,EACe;AACf,IAAA,IAAI,CAAC,YAAA,IAAgB,YAAA,CAAa,IAAA,KAAS,CAAA,EAAG;AAE5C,MAAA,OAAO,IAAA,CAAK,YAAA,CAAa,OAAA,EAAS,MAAM,CAAA;AAAA,IAC1C;AAGA,IAAA,MAAM,cAAA,GAAiB,MAAM,IAAA,CAAK,SAAA,CAAU,OAAO,CAAA;AACnD,IAAA,MAAM,aAAA,uBAAoB,GAAA,EAA0B;AAGpD,IAAA,KAAA,MAAW,SAAS,cAAA,EAAgB;AAClC,MAAA,MAAM,eAAe,KAAA,CAAM,QAAA;AAC3B,MAAA,IAAI,YAAA,EAAc,QAAA,IAAY,YAAA,EAAc,SAAA,EAAW;AACrD,QAAA,MAAM,WAAA,GAAc,aAAA,CAAc,GAAA,CAAI,KAAA,CAAM,IAAI,CAAA;AAEhD,QAAA,IAAI,CAAC,WAAA,IAAgB,YAAA,CAAa,SAAA,IAAa,WAAA,CAAY,SAAS,CAAA,CAAA,EAAK;AACvE,UAAA,aAAA,CAAc,GAAA,CAAI,MAAM,IAAA,EAAM;AAAA,YAC5B,MAAM,KAAA,CAAM,IAAA;AAAA,YACZ,OAAO,YAAA,CAAa,SAAA;AAAA,YACpB,MAAM,YAAA,CAAa;AAAA,WACpB,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAGA,IAAA,MAAM,YAAA,uBAAmB,GAAA,EAAY;AACrC,IAAA,MAAM,YAAA,uBAAmB,GAAA,EAAY;AAGrC,IAAA,KAAA,MAAW,CAACA,KAAAA,EAAM,OAAO,CAAA,IAAK,YAAA,CAAa,SAAQ,EAAG;AACpD,MAAA,MAAM,YAAA,GAAe,aAAA,CAAc,GAAA,CAAIA,KAAI,CAAA;AAC3C,MAAA,IAAI,CAAC,gBAAgB,YAAA,CAAa,IAAA,KAAS,QAAQ,IAAA,IAAQ,YAAA,CAAa,KAAA,KAAU,OAAA,CAAQ,KAAA,EAAO;AAC/F,QAAA,YAAA,CAAa,IAAIA,KAAI,CAAA;AAAA,MACvB;AAAA,IACF;AAGA,IAAA,KAAA,MAAWA,KAAAA,IAAQ,aAAA,CAAc,IAAA,EAAK,EAAG;AACvC,MAAA,IAAI,CAAC,YAAA,CAAa,GAAA,CAAIA,KAAI,CAAA,EAAG;AAC3B,QAAA,YAAA,CAAa,IAAIA,KAAI,CAAA;AAAA,MACvB;AAAA,IACF;AAGA,IAAA,IAAI,aAAa,IAAA,GAAO,YAAA,CAAa,IAAA,IAAQ,aAAA,CAAc,OAAO,GAAA,EAAK;AACrE,MAAA,OAAO,IAAA,CAAK,YAAA,CAAa,OAAA,EAAS,MAAM,CAAA;AAAA,IAC1C;AAGA,IAAA,MAAM,kBAAkB,cAAA,CAAe,MAAA;AAAA,MAAO,CAAA,KAAA,KAC5C,CAAC,YAAA,CAAa,GAAA,CAAI,KAAA,CAAM,IAAI,CAAA,IAAK,CAAC,YAAA,CAAa,GAAA,CAAI,KAAA,CAAM,IAAI;AAAA,KAC/D;AAGA,IAAA,MAAM,SAAA,GAAY,OAAO,MAAA,CAAO,CAAA,KAAA,KAAS,aAAa,GAAA,CAAI,KAAA,CAAM,IAAI,CAAC,CAAA;AAGrE,IAAA,MAAM,aAAA,GAAgB,CAAC,GAAG,eAAA,EAAiB,GAAG,SAAS,CAAA;AAGvD,IAAA,IAAA,CAAK,KAAA,CAAM,GAAA,CAAI,OAAA,EAAS,aAAa,CAAA;AACrC,IAAA,MAAM,EAAA,CAAG,SAAA,CAAU,IAAA,CAAK,OAAA,CAAQ,QAAQ,CAAA;AACxC,IAAA,MAAM,QAAA,GAAW,IAAA,CAAK,YAAA,CAAa,OAAO,CAAA;AAC1C,IAAA,MAAM,OAAA,GAA0B;AAAA,MAC9B,OAAA;AAAA,MACA,WAAA,EAAA,iBAAa,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,MACpC,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,GAAG,SAAA,CAAU,QAAA,EAAU,SAAS,EAAE,MAAA,EAAQ,GAAG,CAAA;AAAA,EACrD;AAAA,EAEA,MAAM,MAAA,CACJ,OAAA,EACA,MAAA,EACA,OACA,OAAA,EAC8B;AAC9B,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,SAAA,CAAU,OAAO,CAAA;AAC5C,IAAA,IAAI,OAAA,CAAQ,WAAW,CAAA,EAAG;AACxB,MAAA,OAAO,EAAC;AAAA,IACV;AAEA,IAAA,OAAO,OAAA,CACJ,OAAO,CAAA,KAAA,KAAS,YAAA,CAAa,OAAO,OAAO,CAAC,CAAA,CAC5C,GAAA,CAAI,CAAA,KAAA,MAAU;AAAA,MACb,KAAA;AAAA,MACA,KAAA,EAAO,gBAAA,CAAiB,MAAA,EAAQ,KAAA,CAAM,SAAS;AAAA,KACjD,CAAE,EACD,MAAA,CAAO,CAAA,KAAA,KAAS,OAAO,QAAA,CAAS,KAAA,CAAM,KAAK,CAAC,CAAA,CAC5C,KAAK,CAAC,CAAA,EAAG,MAAM,CAAA,CAAE,KAAA,GAAQ,EAAE,KAAK,CAAA,CAChC,KAAA,CAAM,CAAA,EAAG,KAAK,CAAA;AAAA,EACnB;AAAA,EAEA,MAAc,UAAU,OAAA,EAA6C;AACnE,IAAA,MAAM,MAAA,GAAS,IAAA,CAAK,KAAA,CAAM,GAAA,CAAI,OAAO,CAAA;AACrC,IAAA,IAAI,MAAA,EAAQ;AACV,MAAA,OAAO,MAAA;AAAA,IACT;AAEA,IAAA,MAAM,QAAA,GAAW,IAAA,CAAK,YAAA,CAAa,OAAO,CAAA;AAC1C,IAAA,IAAI,CAAE,MAAM,EAAA,CAAG,UAAA,CAAW,QAAQ,CAAA,EAAI;AACpC,MAAA,IAAA,CAAK,KAAA,CAAM,GAAA,CAAI,OAAA,EAAS,EAAE,CAAA;AAC1B,MAAA,OAAO,EAAC;AAAA,IACV;AAEA,IAAA,MAAM,OAAA,GAAW,MAAM,EAAA,CAAG,QAAA,CAAS,QAAQ,CAAA;AAC3C,IAAA,IAAA,CAAK,KAAA,CAAM,GAAA,CAAI,OAAA,EAAS,OAAA,CAAQ,MAAM,CAAA;AACtC,IAAA,OAAO,OAAA,CAAQ,MAAA;AAAA,EACjB;AAAA,EAEQ,aAAa,OAAA,EAAyB;AAC5C,IAAA,MAAM,MAAA,GAAS,OAAA,CAAQ,OAAA,CAAQ,QAAA,EAAU,GAAG,CAAA;AAC5C,IAAA,OAAO,KAAK,IAAA,CAAK,IAAA,CAAK,QAAQ,QAAA,EAAU,CAAA,EAAG,MAAM,CAAA,KAAA,CAAO,CAAA;AAAA,EAC1D;AACF;AAEA,SAAS,YAAA,CACP,OACA,OAAA,EACS;AACT,EAAA,IAAI,CAAC,OAAA,EAAS;AACZ,IAAA,OAAO,IAAA;AAAA,EACT;AACA,EAAA,IACE,OAAA,CAAQ,WAAW,IAAA,IACnB,CAAC,QAAQ,SAAA,CAAU,GAAA,CAAI,KAAA,CAAM,QAAQ,CAAA,EACrC;AACA,IAAA,OAAO,KAAA;AAAA,EACT;AACA,EAAA,IAAI,QAAQ,WAAA,IAAe,CAAC,QAAQ,WAAA,CAAY,KAAA,CAAM,IAAI,CAAA,EAAG;AAC3D,IAAA,OAAO,KAAA;AAAA,EACT;AACA,EAAA,OAAO,IAAA;AACT;AAKA,SAAS,gBAAA,CAAiB,GAAoB,CAAA,EAA4B;AACxE,EAAA,IAAI,CAAA,CAAE,GAAA,KAAQ,CAAA,CAAE,GAAA,EAAK;AACnB,IAAA,OAAO,CAAA;AAAA,EACT;AACA,EAAA,OAAOC,kBAAA,CAA0B,CAAA,CAAE,MAAA,EAAQ,CAAA,CAAE,MAAM,CAAA;AACrD","file":"index.js","sourcesContent":["import path from 'node:path';\nimport fs from 'fs-extra';\nimport { cosineSimilarity as calculateCosineSimilarity } from '@kb-labs/mind-core';\n\nexport interface EmbeddingVector {\n dim: number;\n values: number[];\n}\n\nexport interface SpanRange {\n startLine: number;\n endLine: number;\n}\n\nexport interface StoredMindChunk {\n chunkId: string;\n scopeId: string;\n sourceId: string;\n path: string;\n span: SpanRange;\n text: string;\n metadata?: Record<string, unknown>;\n embedding: EmbeddingVector;\n}\n\nexport interface FileMetadata {\n path: string;\n mtime: number;\n hash: string;\n}\n\nexport interface MindVectorStoreOptions {\n indexDir: string;\n}\n\nexport interface VectorSearchFilters {\n sourceIds?: Set<string>;\n pathMatcher?: (filePath: string) => boolean;\n}\n\nexport interface VectorSearchMatch {\n chunk: StoredMindChunk;\n score: number;\n}\n\ninterface ScopeIndexFile {\n scopeId: string;\n generatedAt: string;\n chunks: StoredMindChunk[];\n}\n\nexport class MindVectorStore {\n private readonly options: MindVectorStoreOptions;\n private readonly cache = new Map<string, StoredMindChunk[]>();\n\n constructor(options: MindVectorStoreOptions) {\n this.options = options;\n }\n\n async replaceScope(\n scopeId: string,\n chunks: StoredMindChunk[],\n ): Promise<void> {\n this.cache.set(scopeId, chunks);\n await fs.ensureDir(this.options.indexDir);\n const filePath = this.getScopePath(scopeId);\n const payload: ScopeIndexFile = {\n scopeId,\n generatedAt: new Date().toISOString(),\n chunks,\n };\n await fs.writeJson(filePath, payload, { spaces: 2 });\n }\n\n async scopeExists(scopeId: string): Promise<boolean> {\n const filePath = this.getScopePath(scopeId);\n return fs.pathExists(filePath);\n }\n\n async updateScope(\n scopeId: string,\n chunks: StoredMindChunk[],\n fileMetadata?: Map<string, FileMetadata>,\n ): Promise<void> {\n if (!fileMetadata || fileMetadata.size === 0) {\n // Fallback to full rebuild if no metadata provided\n return this.replaceScope(scopeId, chunks);\n }\n\n // Get existing chunks for comparison\n const existingChunks = await this.loadScope(scopeId);\n const existingFiles = new Map<string, FileMetadata>();\n\n // Extract file metadata from existing chunks\n for (const chunk of existingChunks) {\n const existingMeta = chunk.metadata as { fileHash?: string; fileMtime?: number } | undefined;\n if (existingMeta?.fileHash && existingMeta?.fileMtime) {\n const currentMeta = existingFiles.get(chunk.path);\n // Keep the latest mtime if multiple chunks from same file\n if (!currentMeta || (existingMeta.fileMtime > (currentMeta.mtime ?? 0))) {\n existingFiles.set(chunk.path, {\n path: chunk.path,\n mtime: existingMeta.fileMtime,\n hash: existingMeta.fileHash,\n });\n }\n }\n }\n\n // Determine which files changed\n const changedFiles = new Set<string>();\n const deletedFiles = new Set<string>();\n\n // Check for changed or new files\n for (const [path, newMeta] of fileMetadata.entries()) {\n const existingMeta = existingFiles.get(path);\n if (!existingMeta || existingMeta.hash !== newMeta.hash || existingMeta.mtime !== newMeta.mtime) {\n changedFiles.add(path);\n }\n }\n\n // Check for deleted files\n for (const path of existingFiles.keys()) {\n if (!fileMetadata.has(path)) {\n deletedFiles.add(path);\n }\n }\n\n // If everything changed, use full rebuild (more efficient)\n if (changedFiles.size + deletedFiles.size >= existingFiles.size * 0.8) {\n return this.replaceScope(scopeId, chunks);\n }\n\n // Filter out chunks from deleted and changed files\n const unchangedChunks = existingChunks.filter(chunk =>\n !deletedFiles.has(chunk.path) && !changedFiles.has(chunk.path)\n );\n\n // Add new chunks only from changed files\n const newChunks = chunks.filter(chunk => changedFiles.has(chunk.path));\n\n // Combine unchanged and new chunks\n const updatedChunks = [...unchangedChunks, ...newChunks];\n\n // Save updated index\n this.cache.set(scopeId, updatedChunks);\n await fs.ensureDir(this.options.indexDir);\n const filePath = this.getScopePath(scopeId);\n const payload: ScopeIndexFile = {\n scopeId,\n generatedAt: new Date().toISOString(),\n chunks: updatedChunks,\n };\n await fs.writeJson(filePath, payload, { spaces: 2 });\n }\n\n async search(\n scopeId: string,\n vector: EmbeddingVector,\n limit: number,\n filters?: VectorSearchFilters,\n ): Promise<VectorSearchMatch[]> {\n const records = await this.loadScope(scopeId);\n if (records.length === 0) {\n return [];\n }\n\n return records\n .filter(chunk => applyFilters(chunk, filters))\n .map(chunk => ({\n chunk,\n score: cosineSimilarity(vector, chunk.embedding),\n }))\n .filter(match => Number.isFinite(match.score))\n .sort((a, b) => b.score - a.score)\n .slice(0, limit);\n }\n\n private async loadScope(scopeId: string): Promise<StoredMindChunk[]> {\n const cached = this.cache.get(scopeId);\n if (cached) {\n return cached;\n }\n\n const filePath = this.getScopePath(scopeId);\n if (!(await fs.pathExists(filePath))) {\n this.cache.set(scopeId, []);\n return [];\n }\n\n const payload = (await fs.readJson(filePath)) as ScopeIndexFile;\n this.cache.set(scopeId, payload.chunks);\n return payload.chunks;\n }\n\n private getScopePath(scopeId: string): string {\n const safeId = scopeId.replace(/[\\\\/]/g, '_');\n return path.join(this.options.indexDir, `${safeId}.json`);\n }\n}\n\nfunction applyFilters(\n chunk: StoredMindChunk,\n filters?: VectorSearchFilters,\n): boolean {\n if (!filters) {\n return true;\n }\n if (\n filters.sourceIds?.size &&\n !filters.sourceIds.has(chunk.sourceId)\n ) {\n return false;\n }\n if (filters.pathMatcher && !filters.pathMatcher(chunk.path)) {\n return false;\n }\n return true;\n}\n\n/**\n * Wrapper for cosineSimilarity that works with EmbeddingVector types\n */\nfunction cosineSimilarity(a: EmbeddingVector, b: EmbeddingVector): number {\n if (a.dim !== b.dim) {\n return 0;\n }\n return calculateCosineSimilarity(a.values, b.values);\n}\n"]}
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kb-labs/mind-vector-store",
|
|
3
|
+
"version": "1.5.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Lightweight vector storage for KB Labs Mind RAG pipeline.",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"require": "./dist/index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist",
|
|
17
|
+
"README.md",
|
|
18
|
+
"LICENSE"
|
|
19
|
+
],
|
|
20
|
+
"sideEffects": false,
|
|
21
|
+
"scripts": {
|
|
22
|
+
"clean": "rimraf dist",
|
|
23
|
+
"build": "tsup --config tsup.config.ts",
|
|
24
|
+
"dev": "tsup --config tsup.config.ts --watch",
|
|
25
|
+
"lint": "eslint src --ext .ts,.tsx,.js,.jsx",
|
|
26
|
+
"lint:fix": "eslint . --fix",
|
|
27
|
+
"type-check": "tsc --noEmit",
|
|
28
|
+
"test": "vitest run --passWithNoTests",
|
|
29
|
+
"test:watch": "vitest --passWithNoTests"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"@kb-labs/mind-core": "^1.5.0",
|
|
33
|
+
"fs-extra": "^11.0.0"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@kb-labs/devkit": "link:../../../../infra/kb-labs-devkit",
|
|
37
|
+
"@types/fs-extra": "^11.0.0",
|
|
38
|
+
"@types/node": "^24.3.3",
|
|
39
|
+
"rimraf": "^6.0.1",
|
|
40
|
+
"tsup": "^8.5.0",
|
|
41
|
+
"typescript": "^5.6.3",
|
|
42
|
+
"vitest": "^3.2.4"
|
|
43
|
+
},
|
|
44
|
+
"engines": {
|
|
45
|
+
"node": ">=20.0.0",
|
|
46
|
+
"pnpm": ">=9.0.0"
|
|
47
|
+
},
|
|
48
|
+
"packageManager": "pnpm@9.11.0"
|
|
49
|
+
}
|