vectlite 0.1.8 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -2
- package/index.d.ts +15 -1
- package/index.js +26 -3
- package/native/Cargo.toml +1 -1
- package/native/src/lib.rs +247 -21
- package/native/vectlite-core/Cargo.toml +1 -1
- package/native/vectlite-core/src/lib.rs +842 -27
- package/native/vectlite-core/src/quantization.rs +1087 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/vectlite.node +0 -0
- package/prebuilds/darwin-x64/vectlite.node +0 -0
- package/prebuilds/linux-x64-gnu/vectlite.node +0 -0
- package/prebuilds/win32-x64-msvc/vectlite.node +0 -0
package/README.md
CHANGED
|
@@ -31,8 +31,11 @@ db.upsert('doc2', embedding2, { source: 'notes', title: 'Billing' })
|
|
|
31
31
|
// Search with filters
|
|
32
32
|
const results = db.search(embeddingQuery, { k: 5, filter: { source: 'blog' } })
|
|
33
33
|
|
|
34
|
+
// Query-free inspection
|
|
35
|
+
console.log(db.count({ filter: { source: 'blog' } }))
|
|
36
|
+
|
|
34
37
|
// Clean up
|
|
35
|
-
db.
|
|
38
|
+
db.close()
|
|
36
39
|
```
|
|
37
40
|
|
|
38
41
|
## Features
|
|
@@ -43,6 +46,7 @@ db.compact()
|
|
|
43
46
|
- **Dense vectors** -- cosine similarity with automatic HNSW indexing for large collections
|
|
44
47
|
- **Sparse vectors** -- BM25-scored inverted index for keyword retrieval
|
|
45
48
|
- **Hybrid search** -- dense + sparse fusion with linear or RRF strategies
|
|
49
|
+
- **Vector quantization** -- scalar (int8, 4x), binary (32x), and product quantization (PQ) with 2-stage rescoring
|
|
46
50
|
- **Rich metadata** -- string, number, boolean, null, array, and nested object values
|
|
47
51
|
- **Crash-safe WAL** -- writes land in a write-ahead log first, then checkpoint with `compact()`
|
|
48
52
|
- **Transactions** -- atomic batched writes with `db.transaction()`
|
|
@@ -62,9 +66,13 @@ db.compact()
|
|
|
62
66
|
|
|
63
67
|
- **Physical collections** -- `vectlite.openStore()` manages a directory of independent databases
|
|
64
68
|
- **Bulk ingestion** -- `bulkIngest()` with deferred index rebuilds for fast imports
|
|
69
|
+
- **Listing & filtered counts** -- `list()` and `count({ namespace, filter })` without a vector query
|
|
70
|
+
- **Delete by filter** -- `deleteByFilter()` for bulk deletion by metadata filter
|
|
65
71
|
- **Snapshots** -- `db.snapshot(path)` creates a self-contained copy
|
|
66
72
|
- **Backup / Restore** -- `db.backup(dir)` and `vectlite.restore(dir, path)` for full roundtrips
|
|
67
73
|
- **Read-only mode** -- `vectlite.open(path, { readOnly: true })` for safe concurrent readers
|
|
74
|
+
- **Explicit close** -- `db.close()` to release locks deterministically
|
|
75
|
+
- **Lock timeouts** -- `lockTimeout` for bounded lock acquisition waits
|
|
68
76
|
|
|
69
77
|
## Usage
|
|
70
78
|
|
|
@@ -145,11 +153,23 @@ const restored = vectlite.restore('/backups/full/', 'restored.vdb')
|
|
|
145
153
|
### Read-Only Mode
|
|
146
154
|
|
|
147
155
|
```js
|
|
148
|
-
const ro = vectlite.open('knowledge.vdb', { readOnly: true })
|
|
156
|
+
const ro = vectlite.open('knowledge.vdb', { readOnly: true, lockTimeout: 5 })
|
|
149
157
|
const results = ro.search(query, { k: 5 }) // Reads work
|
|
150
158
|
ro.upsert(...) // Throws VectLiteError
|
|
151
159
|
```
|
|
152
160
|
|
|
161
|
+
### Listing, Counting, and Lifecycle
|
|
162
|
+
|
|
163
|
+
```js
|
|
164
|
+
const db = vectlite.open('knowledge.vdb', { dimension: 384, lockTimeout: 5 })
|
|
165
|
+
|
|
166
|
+
const records = db.list({ namespace: 'docs', filter: { stale: false }, limit: 20 })
|
|
167
|
+
const count = db.count({ namespace: 'docs', filter: { source: 'blog' } })
|
|
168
|
+
const deleted = db.deleteByFilter({ stale: true }, { namespace: 'docs' })
|
|
169
|
+
|
|
170
|
+
db.close()
|
|
171
|
+
```
|
|
172
|
+
|
|
153
173
|
### Search Diagnostics
|
|
154
174
|
|
|
155
175
|
```js
|
|
@@ -164,6 +184,82 @@ console.log(outcome.stats.used_ann) // true
|
|
|
164
184
|
console.log(outcome.results[0].explain) // Detailed scoring breakdown
|
|
165
185
|
```
|
|
166
186
|
|
|
187
|
+
### Vector Quantization
|
|
188
|
+
|
|
189
|
+
Reduce memory usage and accelerate search with quantized vectors. All methods use a 2-stage pipeline: fast quantized candidate selection followed by exact float32 rescoring.
|
|
190
|
+
|
|
191
|
+
```js
|
|
192
|
+
// Scalar quantization (int8) -- 4x memory reduction, minimal recall loss
|
|
193
|
+
db.enableQuantization('scalar')
|
|
194
|
+
|
|
195
|
+
// Binary quantization -- 32x memory reduction, best for normalized embeddings
|
|
196
|
+
db.enableQuantization('binary', JSON.stringify({ rescoreMultiplier: 10 }))
|
|
197
|
+
|
|
198
|
+
// Product quantization -- configurable compression for very large datasets
|
|
199
|
+
db.enableQuantization('product', JSON.stringify({ numSubVectors: 16, numCentroids: 256 }))
|
|
200
|
+
|
|
201
|
+
// Search works exactly the same -- quantization accelerates it transparently
|
|
202
|
+
const results = db.search(queryEmbedding, { k: 10 })
|
|
203
|
+
|
|
204
|
+
// Check quantization status
|
|
205
|
+
console.log(db.isQuantized) // true
|
|
206
|
+
console.log(db.quantizationMethod) // "scalar", "binary", or "product"
|
|
207
|
+
|
|
208
|
+
// Disable quantization
|
|
209
|
+
db.disableQuantization()
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Quantization parameters persist across reopens in a `.vdb.quant` sidecar file. The quantized index auto-rebuilds on inserts and upserts.
|
|
213
|
+
|
|
214
|
+
## Database Methods Reference
|
|
215
|
+
|
|
216
|
+
### Write Methods
|
|
217
|
+
|
|
218
|
+
| Method | Description |
|
|
219
|
+
|---|---|
|
|
220
|
+
| `db.upsert(id, vector, metadata, options)` | Insert or update a single record |
|
|
221
|
+
| `db.insert(id, vector, metadata, options)` | Insert a record (throws on duplicate id) |
|
|
222
|
+
| `db.upsertMany(records, { namespace })` | Upsert a batch of records |
|
|
223
|
+
| `db.insertMany(records, { namespace })` | Insert a batch |
|
|
224
|
+
| `db.bulkIngest(records, { namespace, batchSize })` | Fastest bulk import with batched WAL writes |
|
|
225
|
+
| `db.delete(id, { namespace })` | Delete a single record |
|
|
226
|
+
| `db.deleteMany(ids, { namespace })` | Delete multiple records by id |
|
|
227
|
+
| `db.deleteByFilter(filter, { namespace })` | Delete all records matching a filter |
|
|
228
|
+
|
|
229
|
+
### Read Methods
|
|
230
|
+
|
|
231
|
+
| Method | Description |
|
|
232
|
+
|---|---|
|
|
233
|
+
| `db.get(id, { namespace })` | Get a single record by id |
|
|
234
|
+
| `db.search(query, options)` | Search and return a list of results |
|
|
235
|
+
| `db.searchWithStats(query, options)` | Search with detailed performance stats |
|
|
236
|
+
| `db.count({ namespace, filter })` | Count records, optionally scoped by namespace/filter |
|
|
237
|
+
| `db.list({ namespace, filter, limit, offset })` | List records without issuing a vector query |
|
|
238
|
+
| `db.namespaces()` | List all namespaces |
|
|
239
|
+
| `db.dimension` | Vector dimension (property) |
|
|
240
|
+
| `db.path` | Database file path (property) |
|
|
241
|
+
| `db.readOnly` | Whether the database is read-only (property) |
|
|
242
|
+
|
|
243
|
+
### Quantization Methods
|
|
244
|
+
|
|
245
|
+
| Method | Description |
|
|
246
|
+
|---|---|
|
|
247
|
+
| `db.enableQuantization(method, optionsJson)` | Enable quantization (`'scalar'`, `'binary'`, or `'product'`) |
|
|
248
|
+
| `db.disableQuantization()` | Disable quantization and remove persisted parameters |
|
|
249
|
+
| `db.isQuantized` | Whether quantization is enabled (property) |
|
|
250
|
+
| `db.quantizationMethod` | Active method name or `null` (property) |
|
|
251
|
+
|
|
252
|
+
### Maintenance Methods
|
|
253
|
+
|
|
254
|
+
| Method | Description |
|
|
255
|
+
|---|---|
|
|
256
|
+
| `db.compact()` | Fold WAL into snapshot and persist ANN indexes |
|
|
257
|
+
| `db.flush()` | Alias for `compact()` |
|
|
258
|
+
| `db.snapshot(dest)` | Create a self-contained `.vdb` copy |
|
|
259
|
+
| `db.backup(destDir)` | Full backup including ANN sidecar files |
|
|
260
|
+
| `db.transaction()` | Begin an atomic transaction |
|
|
261
|
+
| `db.close()` | Flush pending state, release the file lock, and invalidate the handle |
|
|
262
|
+
|
|
167
263
|
## Filter Operators
|
|
168
264
|
|
|
169
265
|
| Operator | Example | Description |
|
package/index.d.ts
CHANGED
|
@@ -82,6 +82,16 @@ export interface WriteOptions {
|
|
|
82
82
|
vectors?: NamedVectors | null
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
export interface CountOptions {
|
|
86
|
+
namespace?: string | null
|
|
87
|
+
filter?: Filter | null
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export interface ListOptions extends CountOptions {
|
|
91
|
+
limit?: number | null
|
|
92
|
+
offset?: number | null
|
|
93
|
+
}
|
|
94
|
+
|
|
85
95
|
export interface BulkIngestOptions {
|
|
86
96
|
namespace?: string | null
|
|
87
97
|
batchSize?: number
|
|
@@ -108,6 +118,7 @@ export interface SearchOptions {
|
|
|
108
118
|
export interface OpenOptions {
|
|
109
119
|
dimension?: number | null
|
|
110
120
|
readOnly?: boolean
|
|
121
|
+
lockTimeout?: number | null
|
|
111
122
|
}
|
|
112
123
|
|
|
113
124
|
export class VectLiteError extends Error {}
|
|
@@ -130,8 +141,10 @@ export class Database {
|
|
|
130
141
|
readonly dimension: number
|
|
131
142
|
readonly readOnly: boolean
|
|
132
143
|
|
|
133
|
-
count(): number
|
|
144
|
+
count(options?: CountOptions): number
|
|
134
145
|
namespaces(): string[]
|
|
146
|
+
close(): void
|
|
147
|
+
list(options?: ListOptions): Record[]
|
|
135
148
|
transaction(): Transaction
|
|
136
149
|
insert(id: string, vector: number[], metadata?: Metadata | null, options?: WriteOptions): void
|
|
137
150
|
upsert(id: string, vector: number[], metadata?: Metadata | null, options?: WriteOptions): void
|
|
@@ -141,6 +154,7 @@ export class Database {
|
|
|
141
154
|
get(id: string, options?: { namespace?: string | null }): Record | null
|
|
142
155
|
delete(id: string, options?: { namespace?: string | null }): boolean
|
|
143
156
|
deleteMany(ids: string[], options?: { namespace?: string | null }): number
|
|
157
|
+
deleteByFilter(filter: Filter, options?: { namespace?: string | null }): number
|
|
144
158
|
flush(): void
|
|
145
159
|
compact(): void
|
|
146
160
|
snapshot(dest: string): void
|
package/index.js
CHANGED
|
@@ -209,14 +209,31 @@ class Database {
|
|
|
209
209
|
return wrapError(() => this._native.readOnly)
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
-
count() {
|
|
213
|
-
return wrapError(() => this._native.count())
|
|
212
|
+
count(options = {}) {
|
|
213
|
+
return wrapError(() => this._native.count(options.namespace ?? null, encode(options.filter)))
|
|
214
214
|
}
|
|
215
215
|
|
|
216
216
|
namespaces() {
|
|
217
217
|
return wrapError(() => this._native.namespaces())
|
|
218
218
|
}
|
|
219
219
|
|
|
220
|
+
close() {
|
|
221
|
+
return wrapError(() => this._native.close())
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
list(options = {}) {
|
|
225
|
+
return wrapError(() =>
|
|
226
|
+
decode(
|
|
227
|
+
this._native.list(
|
|
228
|
+
options.namespace ?? null,
|
|
229
|
+
encode(options.filter),
|
|
230
|
+
options.limit ?? null,
|
|
231
|
+
options.offset ?? null,
|
|
232
|
+
),
|
|
233
|
+
),
|
|
234
|
+
)
|
|
235
|
+
}
|
|
236
|
+
|
|
220
237
|
transaction() {
|
|
221
238
|
return wrapError(() => new Transaction(this._native.transaction()))
|
|
222
239
|
}
|
|
@@ -261,6 +278,10 @@ class Database {
|
|
|
261
278
|
return wrapError(() => this._native.deleteMany(ids, options.namespace ?? null))
|
|
262
279
|
}
|
|
263
280
|
|
|
281
|
+
deleteByFilter(filter, options = {}) {
|
|
282
|
+
return wrapError(() => this._native.deleteByFilter(encode(filter), options.namespace ?? null))
|
|
283
|
+
}
|
|
284
|
+
|
|
264
285
|
flush() {
|
|
265
286
|
return wrapError(() => this._native.flush())
|
|
266
287
|
}
|
|
@@ -323,7 +344,9 @@ class Store {
|
|
|
323
344
|
}
|
|
324
345
|
|
|
325
346
|
function open(path, options = {}) {
|
|
326
|
-
return wrapError(() =>
|
|
347
|
+
return wrapError(() =>
|
|
348
|
+
new Database(native.open(path, options.dimension ?? null, options.readOnly ?? false, options.lockTimeout ?? null)),
|
|
349
|
+
)
|
|
327
350
|
}
|
|
328
351
|
|
|
329
352
|
function openStore(root) {
|
package/native/Cargo.toml
CHANGED
package/native/src/lib.rs
CHANGED
|
@@ -6,6 +6,10 @@ use napi::Error as NapiError;
|
|
|
6
6
|
use napi::bindgen_prelude::*;
|
|
7
7
|
use napi_derive::napi;
|
|
8
8
|
use serde_json::{Map, Number, Value, json};
|
|
9
|
+
use vectlite::quantization::{
|
|
10
|
+
BinaryQuantizationConfig, ProductQuantizationConfig, QuantizationConfig,
|
|
11
|
+
ScalarQuantizationConfig,
|
|
12
|
+
};
|
|
9
13
|
use vectlite::{
|
|
10
14
|
Database as CoreDatabase, FusionStrategy, HybridSearchOptions, Metadata, MetadataFilter,
|
|
11
15
|
MetadataValue, NamedVectors, Record, SearchOutcome, SearchResult, SparseVector,
|
|
@@ -133,9 +137,21 @@ impl NativeDatabase {
|
|
|
133
137
|
}
|
|
134
138
|
|
|
135
139
|
#[napi]
|
|
136
|
-
pub fn count(&self) -> Result<u32> {
|
|
140
|
+
pub fn count(&self, namespace: Option<String>, filter_json: Option<String>) -> Result<u32> {
|
|
141
|
+
let filter = filter_json
|
|
142
|
+
.as_ref()
|
|
143
|
+
.map(|json_str| {
|
|
144
|
+
let value: serde_json::Value = serde_json::from_str(json_str)
|
|
145
|
+
.map_err(|e| err(format!("invalid filter JSON: {e}")))?;
|
|
146
|
+
json_to_filter(&value)
|
|
147
|
+
})
|
|
148
|
+
.transpose()?;
|
|
149
|
+
if namespace.is_none() && filter.is_none() {
|
|
150
|
+
let database = self.read()?;
|
|
151
|
+
return Ok(database.len() as u32);
|
|
152
|
+
}
|
|
137
153
|
let database = self.read()?;
|
|
138
|
-
Ok(database.
|
|
154
|
+
Ok(database.count_filtered(namespace.as_deref(), filter.as_ref()) as u32)
|
|
139
155
|
}
|
|
140
156
|
|
|
141
157
|
#[napi]
|
|
@@ -145,11 +161,63 @@ impl NativeDatabase {
|
|
|
145
161
|
}
|
|
146
162
|
|
|
147
163
|
#[napi]
|
|
148
|
-
pub fn
|
|
149
|
-
|
|
164
|
+
pub fn close(&self) -> Result<()> {
|
|
165
|
+
let mut database = self.write()?;
|
|
166
|
+
database.close().map_err(to_napi_error)
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
#[napi]
|
|
170
|
+
pub fn list(
|
|
171
|
+
&self,
|
|
172
|
+
namespace: Option<String>,
|
|
173
|
+
filter_json: Option<String>,
|
|
174
|
+
limit: Option<u32>,
|
|
175
|
+
offset: Option<u32>,
|
|
176
|
+
) -> Result<String> {
|
|
177
|
+
let filter = filter_json
|
|
178
|
+
.as_ref()
|
|
179
|
+
.map(|json_str| {
|
|
180
|
+
let value: serde_json::Value = serde_json::from_str(json_str)
|
|
181
|
+
.map_err(|e| err(format!("invalid filter JSON: {e}")))?;
|
|
182
|
+
json_to_filter(&value)
|
|
183
|
+
})
|
|
184
|
+
.transpose()?;
|
|
185
|
+
let records = {
|
|
186
|
+
let database = self.read()?;
|
|
187
|
+
database
|
|
188
|
+
.list(
|
|
189
|
+
namespace.as_deref(),
|
|
190
|
+
filter.as_ref(),
|
|
191
|
+
limit.unwrap_or(0) as usize,
|
|
192
|
+
offset.unwrap_or(0) as usize,
|
|
193
|
+
)
|
|
194
|
+
.into_iter()
|
|
195
|
+
.cloned()
|
|
196
|
+
.collect::<Vec<_>>()
|
|
197
|
+
};
|
|
198
|
+
let json_records: Vec<Value> = records.iter().map(record_to_json).collect();
|
|
199
|
+
stringify_value(Value::Array(json_records))
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
#[napi(js_name = "deleteByFilter")]
|
|
203
|
+
pub fn delete_by_filter(&self, filter_json: String, namespace: Option<String>) -> Result<u32> {
|
|
204
|
+
let value: serde_json::Value = serde_json::from_str(&filter_json)
|
|
205
|
+
.map_err(|e| err(format!("invalid filter JSON: {e}")))?;
|
|
206
|
+
let filter = json_to_filter(&value)?;
|
|
207
|
+
let mut database = self.write_open()?;
|
|
208
|
+
database
|
|
209
|
+
.delete_by_filter(namespace.as_deref(), &filter)
|
|
210
|
+
.map(|count| count as u32)
|
|
211
|
+
.map_err(to_napi_error)
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
#[napi]
|
|
215
|
+
pub fn transaction(&self) -> Result<NativeTransaction> {
|
|
216
|
+
drop(self.read()?);
|
|
217
|
+
Ok(NativeTransaction {
|
|
150
218
|
inner: Arc::clone(&self.inner),
|
|
151
219
|
staged: Mutex::new(TransactionState::default()),
|
|
152
|
-
}
|
|
220
|
+
})
|
|
153
221
|
}
|
|
154
222
|
|
|
155
223
|
#[napi]
|
|
@@ -166,7 +234,7 @@ impl NativeDatabase {
|
|
|
166
234
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
167
235
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
168
236
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
169
|
-
let mut database = self.
|
|
237
|
+
let mut database = self.write_open()?;
|
|
170
238
|
database
|
|
171
239
|
.insert_with_vectors_in_namespace(
|
|
172
240
|
namespace.unwrap_or_default(),
|
|
@@ -193,7 +261,7 @@ impl NativeDatabase {
|
|
|
193
261
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
194
262
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
195
263
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
196
|
-
let mut database = self.
|
|
264
|
+
let mut database = self.write_open()?;
|
|
197
265
|
database
|
|
198
266
|
.upsert_with_vectors_in_namespace(
|
|
199
267
|
namespace.unwrap_or_default(),
|
|
@@ -209,7 +277,7 @@ impl NativeDatabase {
|
|
|
209
277
|
#[napi(js_name = "insertMany")]
|
|
210
278
|
pub fn insert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
|
|
211
279
|
let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
|
|
212
|
-
let mut database = self.
|
|
280
|
+
let mut database = self.write_open()?;
|
|
213
281
|
database
|
|
214
282
|
.insert_many(records)
|
|
215
283
|
.map(|count| count as u32)
|
|
@@ -219,7 +287,7 @@ impl NativeDatabase {
|
|
|
219
287
|
#[napi(js_name = "upsertMany")]
|
|
220
288
|
pub fn upsert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
|
|
221
289
|
let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
|
|
222
|
-
let mut database = self.
|
|
290
|
+
let mut database = self.write_open()?;
|
|
223
291
|
database
|
|
224
292
|
.upsert_many(records)
|
|
225
293
|
.map(|count| count as u32)
|
|
@@ -234,7 +302,7 @@ impl NativeDatabase {
|
|
|
234
302
|
batch_size: u32,
|
|
235
303
|
) -> Result<u32> {
|
|
236
304
|
let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
|
|
237
|
-
let mut database = self.
|
|
305
|
+
let mut database = self.write_open()?;
|
|
238
306
|
database
|
|
239
307
|
.bulk_ingest(records, batch_size as usize)
|
|
240
308
|
.map(|count| count as u32)
|
|
@@ -257,7 +325,7 @@ impl NativeDatabase {
|
|
|
257
325
|
|
|
258
326
|
#[napi]
|
|
259
327
|
pub fn delete(&self, id: String, namespace: Option<String>) -> Result<bool> {
|
|
260
|
-
let mut database = self.
|
|
328
|
+
let mut database = self.write_open()?;
|
|
261
329
|
database
|
|
262
330
|
.delete_in_namespace(&namespace.unwrap_or_default(), &id)
|
|
263
331
|
.map_err(to_napi_error)
|
|
@@ -265,7 +333,7 @@ impl NativeDatabase {
|
|
|
265
333
|
|
|
266
334
|
#[napi(js_name = "deleteMany")]
|
|
267
335
|
pub fn delete_many(&self, ids: Vec<String>, namespace: Option<String>) -> Result<u32> {
|
|
268
|
-
let mut database = self.
|
|
336
|
+
let mut database = self.write_open()?;
|
|
269
337
|
database
|
|
270
338
|
.delete_many_in_namespace(&namespace.unwrap_or_default(), ids)
|
|
271
339
|
.map(|count| count as u32)
|
|
@@ -274,16 +342,68 @@ impl NativeDatabase {
|
|
|
274
342
|
|
|
275
343
|
#[napi]
|
|
276
344
|
pub fn flush(&self) -> Result<()> {
|
|
277
|
-
let mut database = self.
|
|
345
|
+
let mut database = self.write_open()?;
|
|
278
346
|
database.flush().map_err(to_napi_error)
|
|
279
347
|
}
|
|
280
348
|
|
|
281
349
|
#[napi]
|
|
282
350
|
pub fn compact(&self) -> Result<()> {
|
|
283
|
-
let mut database = self.
|
|
351
|
+
let mut database = self.write_open()?;
|
|
284
352
|
database.compact().map_err(to_napi_error)
|
|
285
353
|
}
|
|
286
354
|
|
|
355
|
+
// -------------------------------------------------------------------
|
|
356
|
+
// Quantization
|
|
357
|
+
// -------------------------------------------------------------------
|
|
358
|
+
|
|
359
|
+
/// Enable quantization on the database.
|
|
360
|
+
/// `method`: "scalar", "binary", or "product"
|
|
361
|
+
/// `options_json`: JSON with optional keys: rescore_multiplier, num_sub_vectors, num_centroids, training_iterations
|
|
362
|
+
#[napi(js_name = "enableQuantization")]
|
|
363
|
+
pub fn enable_quantization(
|
|
364
|
+
&self,
|
|
365
|
+
method: Option<String>,
|
|
366
|
+
options_json: Option<String>,
|
|
367
|
+
) -> Result<()> {
|
|
368
|
+
let method = method.as_deref().unwrap_or("scalar");
|
|
369
|
+
let (rescore_multiplier, num_sub_vectors, num_centroids, training_iterations) =
|
|
370
|
+
parse_quantization_options(options_json.as_deref())?;
|
|
371
|
+
let config = build_quantization_config(
|
|
372
|
+
method,
|
|
373
|
+
rescore_multiplier,
|
|
374
|
+
num_sub_vectors,
|
|
375
|
+
num_centroids,
|
|
376
|
+
training_iterations,
|
|
377
|
+
)?;
|
|
378
|
+
let mut database = self.write_open()?;
|
|
379
|
+
database.enable_quantization(config).map_err(to_napi_error)
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/// Disable quantization and remove persisted parameters.
|
|
383
|
+
#[napi(js_name = "disableQuantization")]
|
|
384
|
+
pub fn disable_quantization(&self) -> Result<()> {
|
|
385
|
+
let mut database = self.write_open()?;
|
|
386
|
+
database.disable_quantization().map_err(to_napi_error)
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/// Returns true if quantization is enabled.
|
|
390
|
+
#[napi(getter, js_name = "isQuantized")]
|
|
391
|
+
pub fn is_quantized(&self) -> Result<bool> {
|
|
392
|
+
let database = self.read()?;
|
|
393
|
+
Ok(database.is_quantized())
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/// Returns the quantization method name if enabled, else null.
|
|
397
|
+
#[napi(getter, js_name = "quantizationMethod")]
|
|
398
|
+
pub fn quantization_method(&self) -> Result<Option<String>> {
|
|
399
|
+
let database = self.read()?;
|
|
400
|
+
Ok(database.quantization_config().map(|config| match config {
|
|
401
|
+
QuantizationConfig::Scalar(_) => "scalar".to_owned(),
|
|
402
|
+
QuantizationConfig::Binary(_) => "binary".to_owned(),
|
|
403
|
+
QuantizationConfig::Product(_) => "product".to_owned(),
|
|
404
|
+
}))
|
|
405
|
+
}
|
|
406
|
+
|
|
287
407
|
#[napi]
|
|
288
408
|
pub fn snapshot(&self, dest: String) -> Result<()> {
|
|
289
409
|
let database = self.read()?;
|
|
@@ -478,9 +598,14 @@ impl NativeTransaction {
|
|
|
478
598
|
|
|
479
599
|
impl NativeDatabase {
|
|
480
600
|
fn read(&self) -> Result<RwLockReadGuard<'_, CoreDatabase>> {
|
|
481
|
-
self
|
|
601
|
+
let database = self
|
|
602
|
+
.inner
|
|
482
603
|
.read()
|
|
483
|
-
.map_err(|_| err("database read lock poisoned"))
|
|
604
|
+
.map_err(|_| err("database read lock poisoned"))?;
|
|
605
|
+
if database.is_closed() {
|
|
606
|
+
return Err(to_napi_error(closed_database_error()));
|
|
607
|
+
}
|
|
608
|
+
Ok(database)
|
|
484
609
|
}
|
|
485
610
|
|
|
486
611
|
fn write(&self) -> Result<RwLockWriteGuard<'_, CoreDatabase>> {
|
|
@@ -489,6 +614,14 @@ impl NativeDatabase {
|
|
|
489
614
|
.map_err(|_| err("database write lock poisoned"))
|
|
490
615
|
}
|
|
491
616
|
|
|
617
|
+
fn write_open(&self) -> Result<RwLockWriteGuard<'_, CoreDatabase>> {
|
|
618
|
+
let database = self.write()?;
|
|
619
|
+
if database.is_closed() {
|
|
620
|
+
return Err(to_napi_error(closed_database_error()));
|
|
621
|
+
}
|
|
622
|
+
Ok(database)
|
|
623
|
+
}
|
|
624
|
+
|
|
492
625
|
fn execute_search(
|
|
493
626
|
&self,
|
|
494
627
|
query: Option<Vec<f32>>,
|
|
@@ -522,18 +655,40 @@ impl NativeDatabase {
|
|
|
522
655
|
}
|
|
523
656
|
|
|
524
657
|
#[napi]
|
|
525
|
-
pub fn open(
|
|
658
|
+
pub fn open(
|
|
659
|
+
path: String,
|
|
660
|
+
dimension: Option<u32>,
|
|
661
|
+
read_only: bool,
|
|
662
|
+
lock_timeout: Option<f64>,
|
|
663
|
+
) -> Result<NativeDatabase> {
|
|
526
664
|
let database = if read_only {
|
|
527
665
|
if !Path::new(&path).exists() {
|
|
528
666
|
return Err(err("cannot open non-existent database in read-only mode"));
|
|
529
667
|
}
|
|
530
|
-
|
|
668
|
+
match lock_timeout {
|
|
669
|
+
Some(timeout) => CoreDatabase::open_read_only_with_timeout(&path, Some(timeout))
|
|
670
|
+
.map_err(to_napi_error)?,
|
|
671
|
+
None => CoreDatabase::open_read_only(&path).map_err(to_napi_error)?,
|
|
672
|
+
}
|
|
531
673
|
} else if Path::new(&path).exists() {
|
|
532
|
-
match dimension {
|
|
533
|
-
Some(dimension) => {
|
|
674
|
+
match (dimension, lock_timeout) {
|
|
675
|
+
(Some(dimension), Some(timeout)) => {
|
|
676
|
+
let db = CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?;
|
|
677
|
+
if db.dimension() != dimension as usize {
|
|
678
|
+
return Err(to_napi_error(vectlite::VectLiteError::DimensionMismatch {
|
|
679
|
+
expected: db.dimension(),
|
|
680
|
+
found: dimension as usize,
|
|
681
|
+
}));
|
|
682
|
+
}
|
|
683
|
+
db
|
|
684
|
+
}
|
|
685
|
+
(Some(dimension), None) => {
|
|
534
686
|
CoreDatabase::open_or_create(&path, dimension as usize).map_err(to_napi_error)?
|
|
535
687
|
}
|
|
536
|
-
None
|
|
688
|
+
(None, Some(timeout)) => {
|
|
689
|
+
CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?
|
|
690
|
+
}
|
|
691
|
+
(None, None) => CoreDatabase::open(&path).map_err(to_napi_error)?,
|
|
537
692
|
}
|
|
538
693
|
} else {
|
|
539
694
|
let Some(dimension) = dimension else {
|
|
@@ -1212,3 +1367,74 @@ fn err(message: impl Into<String>) -> NapiError {
|
|
|
1212
1367
|
fn to_napi_error(error: vectlite::VectLiteError) -> NapiError {
|
|
1213
1368
|
err(error.to_string())
|
|
1214
1369
|
}
|
|
1370
|
+
|
|
1371
|
+
fn closed_database_error() -> vectlite::VectLiteError {
|
|
1372
|
+
vectlite::VectLiteError::InvalidFormat("database is closed".to_owned())
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
fn parse_quantization_options(
|
|
1376
|
+
options_json: Option<&str>,
|
|
1377
|
+
) -> Result<(Option<usize>, Option<usize>, Option<usize>, Option<usize>)> {
|
|
1378
|
+
let Some(json_str) = options_json else {
|
|
1379
|
+
return Ok((None, None, None, None));
|
|
1380
|
+
};
|
|
1381
|
+
let value: Value = serde_json::from_str(json_str)
|
|
1382
|
+
.map_err(|e| err(format!("invalid quantization options JSON: {e}")))?;
|
|
1383
|
+
let obj = value
|
|
1384
|
+
.as_object()
|
|
1385
|
+
.ok_or_else(|| err("quantization options must be a JSON object"))?;
|
|
1386
|
+
|
|
1387
|
+
let rescore_multiplier = obj
|
|
1388
|
+
.get("rescoreMultiplier")
|
|
1389
|
+
.or_else(|| obj.get("rescore_multiplier"))
|
|
1390
|
+
.and_then(|v| v.as_u64())
|
|
1391
|
+
.map(|v| v as usize);
|
|
1392
|
+
let num_sub_vectors = obj
|
|
1393
|
+
.get("numSubVectors")
|
|
1394
|
+
.or_else(|| obj.get("num_sub_vectors"))
|
|
1395
|
+
.and_then(|v| v.as_u64())
|
|
1396
|
+
.map(|v| v as usize);
|
|
1397
|
+
let num_centroids = obj
|
|
1398
|
+
.get("numCentroids")
|
|
1399
|
+
.or_else(|| obj.get("num_centroids"))
|
|
1400
|
+
.and_then(|v| v.as_u64())
|
|
1401
|
+
.map(|v| v as usize);
|
|
1402
|
+
let training_iterations = obj
|
|
1403
|
+
.get("trainingIterations")
|
|
1404
|
+
.or_else(|| obj.get("training_iterations"))
|
|
1405
|
+
.and_then(|v| v.as_u64())
|
|
1406
|
+
.map(|v| v as usize);
|
|
1407
|
+
|
|
1408
|
+
Ok((
|
|
1409
|
+
rescore_multiplier,
|
|
1410
|
+
num_sub_vectors,
|
|
1411
|
+
num_centroids,
|
|
1412
|
+
training_iterations,
|
|
1413
|
+
))
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
fn build_quantization_config(
|
|
1417
|
+
method: &str,
|
|
1418
|
+
rescore_multiplier: Option<usize>,
|
|
1419
|
+
num_sub_vectors: Option<usize>,
|
|
1420
|
+
num_centroids: Option<usize>,
|
|
1421
|
+
training_iterations: Option<usize>,
|
|
1422
|
+
) -> Result<QuantizationConfig> {
|
|
1423
|
+
match method {
|
|
1424
|
+
"scalar" | "int8" => Ok(QuantizationConfig::Scalar(ScalarQuantizationConfig {
|
|
1425
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(5),
|
|
1426
|
+
})),
|
|
1427
|
+
"binary" => Ok(QuantizationConfig::Binary(BinaryQuantizationConfig {
|
|
1428
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(10),
|
|
1429
|
+
})),
|
|
1430
|
+
"product" | "pq" => Ok(QuantizationConfig::Product(ProductQuantizationConfig {
|
|
1431
|
+
num_sub_vectors: num_sub_vectors.unwrap_or(16),
|
|
1432
|
+
num_centroids: num_centroids.unwrap_or(256),
|
|
1433
|
+
training_iterations: training_iterations.unwrap_or(20),
|
|
1434
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(10),
|
|
1435
|
+
})),
|
|
1436
|
+
other => Err(err(format!(
|
|
1437
|
+
"unknown quantization method '{other}'. Expected: 'scalar', 'binary', or 'product'"
|
|
1438
|
+
))),
|
|
1439
|
+
}
|
|
1440
|
+
}
|