verso-db 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +252 -0
- package/dist/BinaryHeap.d.ts +25 -0
- package/dist/BinaryHeap.d.ts.map +1 -0
- package/dist/Collection.d.ts +156 -0
- package/dist/Collection.d.ts.map +1 -0
- package/dist/HNSWIndex.d.ts +357 -0
- package/dist/HNSWIndex.d.ts.map +1 -0
- package/dist/MaxBinaryHeap.d.ts +63 -0
- package/dist/MaxBinaryHeap.d.ts.map +1 -0
- package/dist/Storage.d.ts +54 -0
- package/dist/Storage.d.ts.map +1 -0
- package/dist/VectorDB.d.ts +44 -0
- package/dist/VectorDB.d.ts.map +1 -0
- package/dist/backends/DistanceBackend.d.ts +5 -0
- package/dist/backends/DistanceBackend.d.ts.map +1 -0
- package/dist/backends/JsDistanceBackend.d.ts +37 -0
- package/dist/backends/JsDistanceBackend.d.ts.map +1 -0
- package/dist/encoding/DeltaEncoder.d.ts +61 -0
- package/dist/encoding/DeltaEncoder.d.ts.map +1 -0
- package/dist/errors.d.ts +58 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/index.d.ts +64 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3732 -0
- package/dist/presets.d.ts +91 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/quantization/ScalarQuantizer.d.ts +114 -0
- package/dist/quantization/ScalarQuantizer.d.ts.map +1 -0
- package/dist/storage/BatchWriter.d.ts +104 -0
- package/dist/storage/BatchWriter.d.ts.map +1 -0
- package/dist/storage/BunStorageBackend.d.ts +58 -0
- package/dist/storage/BunStorageBackend.d.ts.map +1 -0
- package/dist/storage/MemoryBackend.d.ts +44 -0
- package/dist/storage/MemoryBackend.d.ts.map +1 -0
- package/dist/storage/OPFSBackend.d.ts +59 -0
- package/dist/storage/OPFSBackend.d.ts.map +1 -0
- package/dist/storage/StorageBackend.d.ts +66 -0
- package/dist/storage/StorageBackend.d.ts.map +1 -0
- package/dist/storage/WriteAheadLog.d.ts +111 -0
- package/dist/storage/WriteAheadLog.d.ts.map +1 -0
- package/dist/storage/createStorageBackend.d.ts +40 -0
- package/dist/storage/createStorageBackend.d.ts.map +1 -0
- package/dist/storage/index.d.ts +30 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/package.json +98 -0
- package/src/BinaryHeap.ts +131 -0
- package/src/Collection.ts +695 -0
- package/src/HNSWIndex.ts +1839 -0
- package/src/MaxBinaryHeap.ts +175 -0
- package/src/Storage.ts +435 -0
- package/src/VectorDB.ts +109 -0
- package/src/backends/DistanceBackend.ts +17 -0
- package/src/backends/JsDistanceBackend.ts +227 -0
- package/src/encoding/DeltaEncoder.ts +217 -0
- package/src/errors.ts +110 -0
- package/src/index.ts +138 -0
- package/src/presets.ts +229 -0
- package/src/quantization/ScalarQuantizer.ts +383 -0
- package/src/storage/BatchWriter.ts +336 -0
- package/src/storage/BunStorageBackend.ts +161 -0
- package/src/storage/MemoryBackend.ts +120 -0
- package/src/storage/OPFSBackend.ts +250 -0
- package/src/storage/StorageBackend.ts +74 -0
- package/src/storage/WriteAheadLog.ts +326 -0
- package/src/storage/createStorageBackend.ts +137 -0
- package/src/storage/index.ts +53 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
## [0.1.1](https://github.com/briansunter/verso/compare/v0.1.0...v0.1.1) (2025-12-29)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Bug Fixes
|
|
5
|
+
|
|
6
|
+
* update package for fresh publish ([62f7bb9](https://github.com/briansunter/verso/commit/62f7bb9d13bd38fea3ae5c01053bdbc59222f58b))
|
|
7
|
+
|
|
8
|
+
# [0.1.0](https://github.com/briansunter/verso/compare/v0.0.1...v0.1.0) (2025-12-29)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* initial release of verso-db ([2afb903](https://github.com/briansunter/verso/commit/2afb903df981e940489909510fbb5e65c3f29ab1)), closes [Hi#performance](https://github.com/Hi/issues/performance)
|
|
14
|
+
|
|
15
|
+
## [0.0.1](https://github.com/briansunter/verso/compare/v0.0.0...v0.0.1) (2025-12-27)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
### Bug Fixes
|
|
19
|
+
|
|
20
|
+
* prepare for initial release ([6740745](https://github.com/briansunter/verso/commit/67407459b261e3d40aee108800ca151c94a46b4f))
|
|
21
|
+
* trigger release with clean tag history ([d57f9a9](https://github.com/briansunter/verso/commit/d57f9a90d3734992cc698cc208cfa58fd7f7c07d))
|
|
22
|
+
|
|
23
|
+
# [1.1.0](https://github.com/briansunter/verso/compare/v1.0.1...v1.1.0) (2025-12-27)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
### Bug Fixes
|
|
27
|
+
|
|
28
|
+
* initial release of verso-db v0.0.1 ([2de2e36](https://github.com/briansunter/verso/commit/2de2e36b043d450ac13fe71f3bd21d404efffd53)), closes [Hi#performance](https://github.com/Hi/issues/performance)
|
|
29
|
+
* prepare for initial release ([6740745](https://github.com/briansunter/verso/commit/67407459b261e3d40aee108800ca151c94a46b4f))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
### Features
|
|
33
|
+
|
|
34
|
+
* initial release of verso-db v0.0.1 ([cbcbf70](https://github.com/briansunter/verso/commit/cbcbf70c412e8fff9170d9385d761e3b85bf9914)), closes [Hi#performance](https://github.com/Hi/issues/performance)
|
|
35
|
+
|
|
36
|
+
# [1.1.0](https://github.com/briansunter/verso/compare/v1.0.1...v1.1.0) (2025-12-27)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
### Bug Fixes
|
|
40
|
+
|
|
41
|
+
* initial release of verso-db v0.0.1 ([2de2e36](https://github.com/briansunter/verso/commit/2de2e36b043d450ac13fe71f3bd21d404efffd53)), closes [Hi#performance](https://github.com/Hi/issues/performance)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
### Features
|
|
45
|
+
|
|
46
|
+
* initial release of verso-db v0.0.1 ([cbcbf70](https://github.com/briansunter/verso/commit/cbcbf70c412e8fff9170d9385d761e3b85bf9914)), closes [Hi#performance](https://github.com/Hi/issues/performance)
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Brian Sunter
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# Verso
|
|
2
|
+
|
|
3
|
+
High-performance vector search with HNSW indexing for Bun and Browser.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/verso)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
|
|
8
|
+
## Performance
|
|
9
|
+
|
|
10
|
+
| Metric | Value |
|
|
11
|
+
|--------|-------|
|
|
12
|
+
| Recall@10 | **100%** on 768D Wikipedia embeddings |
|
|
13
|
+
| Query Performance | **95.5%** improvement from baseline |
|
|
14
|
+
| Memory Reduction | **4x** with Int8 quantization |
|
|
15
|
+
|
|
16
|
+
## Features
|
|
17
|
+
|
|
18
|
+
- **HNSW Algorithm** - Hierarchical Navigable Small World for fast approximate nearest neighbor search
|
|
19
|
+
- **Multiple Distance Metrics** - Cosine similarity, Euclidean, dot product
|
|
20
|
+
- **Int8 Quantization** - 4x memory reduction with minimal recall loss
|
|
21
|
+
- **Multi-Platform** - Bun (file system) and Browser (OPFS)
|
|
22
|
+
- **Parameter Presets** - Pre-tuned configurations for different use cases
|
|
23
|
+
- **Batch Queries** - Efficient batch processing for throughput
|
|
24
|
+
- **Metadata Filtering** - MongoDB-style query operators
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Bun
|
|
30
|
+
bun add verso
|
|
31
|
+
|
|
32
|
+
# npm
|
|
33
|
+
npm install verso
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
import { VectorDB, getRecommendedPreset } from 'verso';
|
|
40
|
+
|
|
41
|
+
// Create database
|
|
42
|
+
const db = new VectorDB();
|
|
43
|
+
|
|
44
|
+
// Get recommended parameters for your vector dimensions
|
|
45
|
+
const preset = getRecommendedPreset(768);
|
|
46
|
+
|
|
47
|
+
// Create collection
|
|
48
|
+
const collection = await db.createCollection('documents', {
|
|
49
|
+
dimension: 768,
|
|
50
|
+
metric: 'cosine',
|
|
51
|
+
M: preset.M,
|
|
52
|
+
efConstruction: preset.efConstruction
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
// Add vectors
|
|
56
|
+
await collection.add({
|
|
57
|
+
ids: ['doc1', 'doc2', 'doc3'],
|
|
58
|
+
vectors: [
|
|
59
|
+
new Float32Array(768).fill(0.1),
|
|
60
|
+
new Float32Array(768).fill(0.2),
|
|
61
|
+
new Float32Array(768).fill(0.3)
|
|
62
|
+
],
|
|
63
|
+
metadata: [
|
|
64
|
+
{ title: 'Document 1', category: 'tech' },
|
|
65
|
+
{ title: 'Document 2', category: 'science' },
|
|
66
|
+
{ title: 'Document 3', category: 'tech' }
|
|
67
|
+
]
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Query
|
|
71
|
+
const results = await collection.query({
|
|
72
|
+
queryVector: new Float32Array(768).fill(0.15),
|
|
73
|
+
k: 10,
|
|
74
|
+
efSearch: preset.efSearch
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
console.log(results);
|
|
78
|
+
// [{ id: 'doc1', score: 0.99, metadata: { title: 'Document 1', ... } }, ...]
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## API Reference
|
|
82
|
+
|
|
83
|
+
### VectorDB
|
|
84
|
+
|
|
85
|
+
Main database class for managing collections.
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
const db = new VectorDB();
|
|
89
|
+
|
|
90
|
+
// Create collection
|
|
91
|
+
const collection = await db.createCollection('name', {
|
|
92
|
+
dimension: 768,
|
|
93
|
+
metric: 'cosine', // 'cosine' | 'euclidean' | 'dot_product'
|
|
94
|
+
M: 16, // Max connections per node
|
|
95
|
+
efConstruction: 200 // Build-time search depth
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Get existing collection
|
|
99
|
+
const collection = await db.getCollection('name');
|
|
100
|
+
|
|
101
|
+
// List collections
|
|
102
|
+
const names = await db.listCollections();
|
|
103
|
+
|
|
104
|
+
// Delete collection
|
|
105
|
+
await db.deleteCollection('name');
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Collection
|
|
109
|
+
|
|
110
|
+
```typescript
|
|
111
|
+
// Add vectors
|
|
112
|
+
await collection.add({
|
|
113
|
+
ids: string[],
|
|
114
|
+
vectors: Float32Array[],
|
|
115
|
+
metadata?: object[]
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// Query
|
|
119
|
+
const results = await collection.query({
|
|
120
|
+
queryVector: Float32Array,
|
|
121
|
+
k: number,
|
|
122
|
+
efSearch?: number,
|
|
123
|
+
filter?: FilterExpression
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// Batch query
|
|
127
|
+
const batchResults = await collection.queryBatch({
|
|
128
|
+
queryVectors: Float32Array[],
|
|
129
|
+
k: number,
|
|
130
|
+
efSearch?: number
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// Delete vectors
|
|
134
|
+
await collection.delete(ids: string[]);
|
|
135
|
+
|
|
136
|
+
// Get collection info
|
|
137
|
+
const info = await collection.info();
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Metadata Filtering
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
const results = await collection.query({
|
|
144
|
+
queryVector: queryVec,
|
|
145
|
+
k: 10,
|
|
146
|
+
filter: {
|
|
147
|
+
category: 'tech', // Exact match
|
|
148
|
+
score: { $gt: 0.5 }, // Greater than
|
|
149
|
+
tags: { $in: ['ai', 'ml'] } // In array
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Supported operators: `$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`, `$in`, `$nin`
|
|
155
|
+
|
|
156
|
+
### Parameter Presets
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
import { getRecommendedPreset, getRAGPreset, PRESETS } from 'verso';
|
|
160
|
+
|
|
161
|
+
// Automatic preset based on dimensions
|
|
162
|
+
const preset = getRecommendedPreset(768);
|
|
163
|
+
|
|
164
|
+
// RAG-optimized preset (high recall)
|
|
165
|
+
const ragPreset = getRAGPreset(768);
|
|
166
|
+
|
|
167
|
+
// Available presets
|
|
168
|
+
PRESETS.LOW_DIM // <= 128 dimensions
|
|
169
|
+
PRESETS.MEDIUM_DIM // 256-512 dimensions
|
|
170
|
+
PRESETS.HIGH_DIM // 768+ dimensions
|
|
171
|
+
PRESETS.VERY_HIGH_DIM // 1024+ dimensions
|
|
172
|
+
PRESETS.SMALL_DATASET // < 10k vectors
|
|
173
|
+
PRESETS.LARGE_DATASET // 100k+ vectors
|
|
174
|
+
PRESETS.MAX_RECALL // Prioritize accuracy
|
|
175
|
+
PRESETS.LOW_LATENCY // Prioritize speed
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Int8 Quantization
|
|
179
|
+
|
|
180
|
+
Reduce memory usage by 4x with minimal recall loss:
|
|
181
|
+
|
|
182
|
+
```typescript
|
|
183
|
+
import { ScalarQuantizer, QuantizedVectorStore } from 'verso';
|
|
184
|
+
|
|
185
|
+
// Create quantizer
|
|
186
|
+
const quantizer = new ScalarQuantizer(768);
|
|
187
|
+
|
|
188
|
+
// Train on sample vectors
|
|
189
|
+
quantizer.train(sampleVectors);
|
|
190
|
+
|
|
191
|
+
// Quantize vectors
|
|
192
|
+
const quantized = quantizer.quantize(vector);
|
|
193
|
+
|
|
194
|
+
// Use with QuantizedVectorStore for integrated search
|
|
195
|
+
const store = new QuantizedVectorStore(768);
|
|
196
|
+
store.add(ids, vectors);
|
|
197
|
+
const results = store.search(queryVector, k);
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Storage Backends
|
|
201
|
+
|
|
202
|
+
Verso automatically selects the appropriate storage backend:
|
|
203
|
+
|
|
204
|
+
| Environment | Backend | Storage |
|
|
205
|
+
|-------------|---------|---------|
|
|
206
|
+
| Bun | `BunStorageBackend` | File system |
|
|
207
|
+
| Browser | `OPFSBackend` | Origin Private File System |
|
|
208
|
+
| Fallback | `MemoryBackend` | In-memory (no persistence) |
|
|
209
|
+
|
|
210
|
+
```typescript
|
|
211
|
+
import { createStorageBackend, getRecommendedStorageType } from 'verso';
|
|
212
|
+
|
|
213
|
+
// Automatic detection
|
|
214
|
+
const backend = await createStorageBackend({
|
|
215
|
+
basePath: './vectors'
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
// Check available types
|
|
219
|
+
const type = getRecommendedStorageType(); // 'bun' | 'opfs' | 'memory'
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Benchmarks
|
|
223
|
+
|
|
224
|
+
See [docs/BENCHMARKS.md](docs/BENCHMARKS.md) for detailed performance analysis.
|
|
225
|
+
|
|
226
|
+
**Quick Summary (768D vectors, Cohere Wikipedia dataset):**
|
|
227
|
+
- 100% Recall@10 with optimized parameters
|
|
228
|
+
- 95.5% query performance improvement through optimizations
|
|
229
|
+
- 4x memory reduction with Int8 quantization
|
|
230
|
+
|
|
231
|
+
## Development
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
# Install dependencies
|
|
235
|
+
bun install
|
|
236
|
+
|
|
237
|
+
# Run tests
|
|
238
|
+
bun test
|
|
239
|
+
|
|
240
|
+
# Run browser tests
|
|
241
|
+
bun run test:browser
|
|
242
|
+
|
|
243
|
+
# Build
|
|
244
|
+
bun run build
|
|
245
|
+
|
|
246
|
+
# Run benchmarks
|
|
247
|
+
bun run bench
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## License
|
|
251
|
+
|
|
252
|
+
MIT
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export declare class BinaryHeap {
|
|
2
|
+
private ids;
|
|
3
|
+
private dists;
|
|
4
|
+
private _size;
|
|
5
|
+
private capacity;
|
|
6
|
+
private lastPoppedValue;
|
|
7
|
+
constructor(capacity: number);
|
|
8
|
+
/**
|
|
9
|
+
* Push an element onto the heap.
|
|
10
|
+
* When at capacity, simply returns without adding (caller manages bounds).
|
|
11
|
+
* This is the correct behavior for HNSW candidate exploration.
|
|
12
|
+
*/
|
|
13
|
+
push(id: number, dist: number): void;
|
|
14
|
+
pop(): number;
|
|
15
|
+
peek(): number;
|
|
16
|
+
peekValue(): number;
|
|
17
|
+
getLastPoppedValue(): number;
|
|
18
|
+
size(): number;
|
|
19
|
+
clear(): void;
|
|
20
|
+
isEmpty(): boolean;
|
|
21
|
+
getCapacity(): number;
|
|
22
|
+
private heapifyUp;
|
|
23
|
+
private heapifyDown;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=BinaryHeap.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BinaryHeap.d.ts","sourceRoot":"","sources":["../src/BinaryHeap.ts"],"names":[],"mappings":"AAAA,qBAAa,UAAU;IACrB,OAAO,CAAC,GAAG,CAAc;IACzB,OAAO,CAAC,KAAK,CAAe;IAC5B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,eAAe,CAAa;gBAExB,QAAQ,EAAE,MAAM;IAO5B;;;;OAIG;IACH,IAAI,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI;IAapC,GAAG,IAAI,MAAM;IAcb,IAAI,IAAI,MAAM;IAId,SAAS,IAAI,MAAM;IAInB,kBAAkB,IAAI,MAAM;IAI5B,IAAI,IAAI,MAAM;IAId,KAAK,IAAI,IAAI;IAIb,OAAO,IAAI,OAAO;IAIlB,WAAW,IAAI,MAAM;IAIrB,OAAO,CAAC,SAAS;IAuBjB,OAAO,CAAC,WAAW;CAiCpB"}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { DistanceMetric } from './HNSWIndex';
|
|
2
|
+
/**
|
|
3
|
+
* Configuration for adding vectors to a collection.
|
|
4
|
+
*/
|
|
5
|
+
export interface AddConfig {
|
|
6
|
+
/** Unique string identifiers for each vector */
|
|
7
|
+
ids: string[];
|
|
8
|
+
/** Array of vectors to add (must match collection dimension) */
|
|
9
|
+
vectors: number[][];
|
|
10
|
+
/** Optional metadata for each vector (same length as ids/vectors) */
|
|
11
|
+
metadata?: Array<Record<string, any>>;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Configuration for querying a collection.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* // Simple query
|
|
19
|
+
* const results = await collection.query({
|
|
20
|
+
* queryVector: [0.1, 0.2, 0.3],
|
|
21
|
+
* k: 10
|
|
22
|
+
* });
|
|
23
|
+
*
|
|
24
|
+
* // Query with metadata filter
|
|
25
|
+
* const results = await collection.query({
|
|
26
|
+
* queryVector: [0.1, 0.2, 0.3],
|
|
27
|
+
* k: 10,
|
|
28
|
+
* filter: {
|
|
29
|
+
* category: 'science', // Exact match
|
|
30
|
+
* year: { $gte: 2020 }, // Greater than or equal
|
|
31
|
+
* status: { $in: ['active', 'pending'] } // In array
|
|
32
|
+
* }
|
|
33
|
+
* });
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export interface QueryConfig {
|
|
37
|
+
/** The query vector (must match collection dimension) */
|
|
38
|
+
queryVector: number[];
|
|
39
|
+
/** Number of nearest neighbors to return */
|
|
40
|
+
k: number;
|
|
41
|
+
/**
|
|
42
|
+
* Optional metadata filter. Supports MongoDB-style operators:
|
|
43
|
+
* - Simple equality: `{ field: value }`
|
|
44
|
+
* - `$gt`: Greater than `{ field: { $gt: 5 } }`
|
|
45
|
+
* - `$gte`: Greater than or equal `{ field: { $gte: 5 } }`
|
|
46
|
+
* - `$lt`: Less than `{ field: { $lt: 10 } }`
|
|
47
|
+
* - `$lte`: Less than or equal `{ field: { $lte: 10 } }`
|
|
48
|
+
* - `$ne`: Not equal `{ field: { $ne: 'excluded' } }`
|
|
49
|
+
* - `$in`: In array `{ field: { $in: ['a', 'b', 'c'] } }`
|
|
50
|
+
* - `$nin`: Not in array `{ field: { $nin: ['x', 'y'] } }`
|
|
51
|
+
*/
|
|
52
|
+
filter?: Record<string, any>;
|
|
53
|
+
/** Search effort parameter (higher = better recall, slower). Default: max(k*2, 50) */
|
|
54
|
+
efSearch?: number;
|
|
55
|
+
}
|
|
56
|
+
export interface QueryResult {
|
|
57
|
+
ids: string[];
|
|
58
|
+
distances: number[];
|
|
59
|
+
metadata: Array<Record<string, any>>;
|
|
60
|
+
}
|
|
61
|
+
export declare class Collection {
|
|
62
|
+
private name;
|
|
63
|
+
private dimension;
|
|
64
|
+
private metric;
|
|
65
|
+
private M;
|
|
66
|
+
private efConstruction;
|
|
67
|
+
private indexPath;
|
|
68
|
+
private metaPath;
|
|
69
|
+
private deletedPath;
|
|
70
|
+
private hnsw;
|
|
71
|
+
private idMap;
|
|
72
|
+
private idReverseMap;
|
|
73
|
+
private metadata;
|
|
74
|
+
private deletedIds;
|
|
75
|
+
constructor(name: string, config: {
|
|
76
|
+
dimension: number;
|
|
77
|
+
metric?: DistanceMetric;
|
|
78
|
+
M?: number;
|
|
79
|
+
efConstruction?: number;
|
|
80
|
+
}, collectionPath: string);
|
|
81
|
+
init(): Promise<void>;
|
|
82
|
+
private loadFromDisk;
|
|
83
|
+
add(config: AddConfig): Promise<void>;
|
|
84
|
+
query(config: QueryConfig): Promise<QueryResult>;
|
|
85
|
+
/**
|
|
86
|
+
* Batch query for multiple vectors at once.
|
|
87
|
+
* More efficient than calling query() multiple times.
|
|
88
|
+
*
|
|
89
|
+
* @param configs Array of query configurations
|
|
90
|
+
* @returns Array of query results, one per query
|
|
91
|
+
*/
|
|
92
|
+
queryBatch(configs: QueryConfig[]): Promise<QueryResult[]>;
|
|
93
|
+
/**
|
|
94
|
+
* Brute-force KNN search for validation and correctness checking
|
|
95
|
+
* This checks all vectors and returns the true k nearest neighbors
|
|
96
|
+
*/
|
|
97
|
+
queryBruteForce(config: QueryConfig): Promise<QueryResult>;
|
|
98
|
+
/**
|
|
99
|
+
* Upsert vectors (insert or update).
|
|
100
|
+
*
|
|
101
|
+
* NOTE: HNSW indices don't support efficient in-place updates.
|
|
102
|
+
* Duplicate IDs will throw an error. For updates, rebuild the index
|
|
103
|
+
* without the old vectors and add the new ones.
|
|
104
|
+
*
|
|
105
|
+
* @throws Error if any ID already exists
|
|
106
|
+
*/
|
|
107
|
+
upsert(config: AddConfig): Promise<void>;
|
|
108
|
+
/**
|
|
109
|
+
* Returns the number of active (non-deleted) vectors in the collection.
|
|
110
|
+
*/
|
|
111
|
+
count(): number;
|
|
112
|
+
/**
|
|
113
|
+
* Returns the total number of vectors including deleted (tombstoned) ones.
|
|
114
|
+
* Use this to determine when compaction might be beneficial.
|
|
115
|
+
*/
|
|
116
|
+
countWithDeleted(): number;
|
|
117
|
+
/**
|
|
118
|
+
* Returns the number of deleted (tombstoned) vectors.
|
|
119
|
+
*/
|
|
120
|
+
deletedCount(): number;
|
|
121
|
+
/**
|
|
122
|
+
* Mark a vector as deleted (tombstone deletion).
|
|
123
|
+
* The vector remains in the index but is excluded from search results.
|
|
124
|
+
* Use compact() to permanently remove deleted vectors and reclaim space.
|
|
125
|
+
*
|
|
126
|
+
* @param id The string ID of the vector to delete
|
|
127
|
+
* @returns true if the vector was deleted, false if it didn't exist or was already deleted
|
|
128
|
+
*/
|
|
129
|
+
delete(id: string): boolean;
|
|
130
|
+
/**
|
|
131
|
+
* Mark multiple vectors as deleted (tombstone deletion).
|
|
132
|
+
*
|
|
133
|
+
* @param ids Array of string IDs to delete
|
|
134
|
+
* @returns Number of vectors that were successfully deleted
|
|
135
|
+
*/
|
|
136
|
+
deleteBatch(ids: string[]): number;
|
|
137
|
+
/**
|
|
138
|
+
* Check if a vector exists and is not deleted.
|
|
139
|
+
*/
|
|
140
|
+
has(id: string): boolean;
|
|
141
|
+
/**
|
|
142
|
+
* Check if a vector was deleted (tombstoned).
|
|
143
|
+
*/
|
|
144
|
+
isDeleted(id: string): boolean;
|
|
145
|
+
saveToDisk(): Promise<void>;
|
|
146
|
+
private matchesFilter;
|
|
147
|
+
/**
|
|
148
|
+
* Compact the collection by rebuilding the index without deleted vectors.
|
|
149
|
+
* This permanently removes tombstoned vectors and reclaims space.
|
|
150
|
+
*
|
|
151
|
+
* @returns Number of vectors removed during compaction
|
|
152
|
+
*/
|
|
153
|
+
compact(): Promise<number>;
|
|
154
|
+
destroy(): Promise<void>;
|
|
155
|
+
}
|
|
156
|
+
//# sourceMappingURL=Collection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Collection.d.ts","sourceRoot":"","sources":["../src/Collection.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,cAAc,EAAE,MAAM,aAAa,CAAC;AAIxD;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,gDAAgD;IAChD,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,gEAAgE;IAChE,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;IACpB,qEAAqE;IACrE,QAAQ,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC;CACvC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,WAAW,WAAW;IAC1B,yDAAyD;IACzD,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,4CAA4C;IAC5C,CAAC,EAAE,MAAM,CAAC;IACV;;;;;;;;;;OAUG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,sFAAsF;IACtF,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC;CACtC;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,IAAI,CAAS;IACrB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,CAAC,CAAS;IAClB,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,IAAI,CAAY;IACxB,OAAO,CAAC,KAAK,CAAsB;IACnC,OAAO,CAAC,YAAY,CAAsB;IAC1C,OAAO,CAAC,QAAQ,CAAmC;IACnD,OAAO,CAAC,UAAU,CAAc;gBAEpB,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,cAAc,CAAC;QAAC,CAAC,CAAC,EAAE,MAAM,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,cAAc,EAAE,MAAM;IAwBvI,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAKb,YAAY;IA8DpB,GAAG,CAAC,MAAM,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;IAyDrC,KAAK,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAuEtD;;;;;;OAMG;IACG,UAAU,CAAC,OAAO,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IA2FhE;;;OAGG;IACG,eAAe,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IA0DhE;;;;;;;;OAQG;IACG,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;IAqB9C;;OAEG;IACH,KAAK,IAAI,MAAM;IAIf;;;OAGG;IACH,gBAAgB,IAAI,MAAM;IAI1B;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;;;;;;OAOG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IAS3B;;;;;OAKG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAQlC;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IAMxB;;OAEG;IACH,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IAMxB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IA6BjC,OAAO,CAAC,aAAa;IAsBrB;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,MAAM,CAAC;IA6D1B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAe/B"}
|