vectlite 0.1.11 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +230 -1
- package/index.d.ts +55 -0
- package/index.js +171 -12
- package/native/Cargo.toml +1 -1
- package/native/src/lib.rs +733 -25
- package/native/vectlite-core/Cargo.toml +2 -1
- package/native/vectlite-core/src/lib.rs +6092 -1990
- package/native/vectlite-core/src/quantization.rs +1587 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/vectlite.node +0 -0
- package/prebuilds/darwin-x64/vectlite.node +0 -0
- package/prebuilds/linux-x64-gnu/vectlite.node +0 -0
- package/prebuilds/win32-x64-msvc/vectlite.node +0 -0
package/native/src/lib.rs
CHANGED
|
@@ -6,10 +6,15 @@ use napi::Error as NapiError;
|
|
|
6
6
|
use napi::bindgen_prelude::*;
|
|
7
7
|
use napi_derive::napi;
|
|
8
8
|
use serde_json::{Map, Number, Value, json};
|
|
9
|
+
use vectlite::quantization::{
|
|
10
|
+
BinaryQuantizationConfig, MultiVectorQuantizationConfig, ProductQuantizationConfig,
|
|
11
|
+
QuantizationConfig, ScalarQuantizationConfig, TwoBitQuantizationConfig,
|
|
12
|
+
};
|
|
9
13
|
use vectlite::{
|
|
10
|
-
Database as CoreDatabase, FusionStrategy, HybridSearchOptions, Metadata,
|
|
11
|
-
|
|
12
|
-
Store as CoreStore,
|
|
14
|
+
Database as CoreDatabase, DistanceMetric, FusionStrategy, HybridSearchOptions, Metadata,
|
|
15
|
+
MetadataFilter, MetadataValue, MultiVectorSearchOptions, MultiVectors, NamedVectors,
|
|
16
|
+
PayloadIndexType, Record, SearchOutcome, SearchResult, SparseVector, Store as CoreStore,
|
|
17
|
+
WriteOperation,
|
|
13
18
|
};
|
|
14
19
|
|
|
15
20
|
#[napi(js_name = "NativeDatabase")]
|
|
@@ -126,6 +131,12 @@ impl NativeDatabase {
|
|
|
126
131
|
Ok(database.dimension() as u32)
|
|
127
132
|
}
|
|
128
133
|
|
|
134
|
+
#[napi(getter)]
|
|
135
|
+
pub fn metric(&self) -> Result<String> {
|
|
136
|
+
let database = self.read()?;
|
|
137
|
+
Ok(database.metric().name().to_owned())
|
|
138
|
+
}
|
|
139
|
+
|
|
129
140
|
#[napi(getter, js_name = "readOnly")]
|
|
130
141
|
pub fn read_only(&self) -> Result<bool> {
|
|
131
142
|
let database = self.read()?;
|
|
@@ -142,10 +153,6 @@ impl NativeDatabase {
|
|
|
142
153
|
json_to_filter(&value)
|
|
143
154
|
})
|
|
144
155
|
.transpose()?;
|
|
145
|
-
if namespace.is_none() && filter.is_none() {
|
|
146
|
-
let database = self.read()?;
|
|
147
|
-
return Ok(database.len() as u32);
|
|
148
|
-
}
|
|
149
156
|
let database = self.read()?;
|
|
150
157
|
Ok(database.count_filtered(namespace.as_deref(), filter.as_ref()) as u32)
|
|
151
158
|
}
|
|
@@ -195,6 +202,38 @@ impl NativeDatabase {
|
|
|
195
202
|
stringify_value(Value::Array(json_records))
|
|
196
203
|
}
|
|
197
204
|
|
|
205
|
+
#[napi(js_name = "listCursor")]
|
|
206
|
+
pub fn list_cursor(
|
|
207
|
+
&self,
|
|
208
|
+
namespace: Option<String>,
|
|
209
|
+
filter_json: Option<String>,
|
|
210
|
+
limit: Option<u32>,
|
|
211
|
+
cursor: Option<String>,
|
|
212
|
+
) -> Result<String> {
|
|
213
|
+
let filter = filter_json
|
|
214
|
+
.as_ref()
|
|
215
|
+
.map(|json_str| {
|
|
216
|
+
let value: serde_json::Value = serde_json::from_str(json_str)
|
|
217
|
+
.map_err(|e| err(format!("invalid filter JSON: {e}")))?;
|
|
218
|
+
json_to_filter(&value)
|
|
219
|
+
})
|
|
220
|
+
.transpose()?;
|
|
221
|
+
let database = self.read()?;
|
|
222
|
+
let (records, next_cursor) = database.list_cursor(
|
|
223
|
+
namespace.as_deref(),
|
|
224
|
+
filter.as_ref(),
|
|
225
|
+
limit.unwrap_or(0) as usize,
|
|
226
|
+
cursor.as_deref(),
|
|
227
|
+
);
|
|
228
|
+
let records: Vec<Record> = records.into_iter().cloned().collect();
|
|
229
|
+
let json_records: Vec<Value> = records.iter().map(record_to_json).collect();
|
|
230
|
+
let result = serde_json::json!({
|
|
231
|
+
"records": json_records,
|
|
232
|
+
"cursor": next_cursor,
|
|
233
|
+
});
|
|
234
|
+
stringify_value(result)
|
|
235
|
+
}
|
|
236
|
+
|
|
198
237
|
#[napi(js_name = "deleteByFilter")]
|
|
199
238
|
pub fn delete_by_filter(&self, filter_json: String, namespace: Option<String>) -> Result<u32> {
|
|
200
239
|
let value: serde_json::Value = serde_json::from_str(&filter_json)
|
|
@@ -207,6 +246,70 @@ impl NativeDatabase {
|
|
|
207
246
|
.map_err(to_napi_error)
|
|
208
247
|
}
|
|
209
248
|
|
|
249
|
+
#[napi]
|
|
250
|
+
pub fn update_metadata(
|
|
251
|
+
&self,
|
|
252
|
+
id: String,
|
|
253
|
+
metadata_json: String,
|
|
254
|
+
namespace: Option<String>,
|
|
255
|
+
) -> Result<bool> {
|
|
256
|
+
let patch = parse_metadata_json(Some(metadata_json))?;
|
|
257
|
+
let mut database = self.write_open()?;
|
|
258
|
+
database
|
|
259
|
+
.update_metadata_in_namespace(namespace.unwrap_or_default(), &id, patch)
|
|
260
|
+
.map_err(to_napi_error)
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// -------------------------------------------------------------------
|
|
264
|
+
// TTL / Expiry
|
|
265
|
+
// -------------------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
#[napi(js_name = "setTtl")]
|
|
268
|
+
pub fn set_ttl(&self, id: String, ttl: f64, namespace: Option<String>) -> Result<bool> {
|
|
269
|
+
let mut database = self.write_open()?;
|
|
270
|
+
database
|
|
271
|
+
.set_ttl_in_namespace(&namespace.unwrap_or_default(), &id, ttl)
|
|
272
|
+
.map_err(to_napi_error)
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
#[napi(js_name = "clearTtl")]
|
|
276
|
+
pub fn clear_ttl(&self, id: String, namespace: Option<String>) -> Result<bool> {
|
|
277
|
+
let mut database = self.write_open()?;
|
|
278
|
+
database
|
|
279
|
+
.clear_ttl_in_namespace(&namespace.unwrap_or_default(), &id)
|
|
280
|
+
.map_err(to_napi_error)
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// -------------------------------------------------------------------
|
|
284
|
+
// Payload Indexes
|
|
285
|
+
// -------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
#[napi(js_name = "createIndex")]
|
|
288
|
+
pub fn create_index(&self, field: String, index_type: String) -> Result<bool> {
|
|
289
|
+
let ty = parse_payload_index_type(&index_type)?;
|
|
290
|
+
let mut database = self.write_open()?;
|
|
291
|
+
database.create_index(&field, ty).map_err(to_napi_error)
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
#[napi(js_name = "dropIndex")]
|
|
295
|
+
pub fn drop_index(&self, field: String) -> Result<bool> {
|
|
296
|
+
let mut database = self.write_open()?;
|
|
297
|
+
database.drop_index(&field).map_err(to_napi_error)
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
#[napi(js_name = "listIndexes")]
|
|
301
|
+
pub fn list_indexes(&self) -> Result<String> {
|
|
302
|
+
let database = self.read()?;
|
|
303
|
+
let indexes = database.list_indexes();
|
|
304
|
+
let arr: Vec<Value> = indexes
|
|
305
|
+
.into_iter()
|
|
306
|
+
.map(|(field, index_type)| {
|
|
307
|
+
json!({ "field": field, "type": index_type.name() })
|
|
308
|
+
})
|
|
309
|
+
.collect();
|
|
310
|
+
serde_json::to_string(&arr).map_err(|e| err(format!("JSON serialize: {e}")))
|
|
311
|
+
}
|
|
312
|
+
|
|
210
313
|
#[napi]
|
|
211
314
|
pub fn transaction(&self) -> Result<NativeTransaction> {
|
|
212
315
|
drop(self.read()?);
|
|
@@ -225,21 +328,27 @@ impl NativeDatabase {
|
|
|
225
328
|
namespace: Option<String>,
|
|
226
329
|
sparse_json: Option<String>,
|
|
227
330
|
vectors_json: Option<String>,
|
|
331
|
+
ttl: Option<f64>,
|
|
228
332
|
) -> Result<()> {
|
|
229
333
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
230
334
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
231
335
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
232
336
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
337
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
338
|
+
let record = Record {
|
|
339
|
+
namespace: namespace.unwrap_or_default(),
|
|
340
|
+
id,
|
|
341
|
+
vector,
|
|
342
|
+
vectors,
|
|
343
|
+
sparse,
|
|
344
|
+
metadata,
|
|
345
|
+
multi_vectors: MultiVectors::new(),
|
|
346
|
+
expires_at,
|
|
347
|
+
};
|
|
233
348
|
let mut database = self.write_open()?;
|
|
234
349
|
database
|
|
235
|
-
.
|
|
236
|
-
|
|
237
|
-
&id,
|
|
238
|
-
vector,
|
|
239
|
-
vectors,
|
|
240
|
-
sparse,
|
|
241
|
-
metadata,
|
|
242
|
-
)
|
|
350
|
+
.insert_many(std::iter::once(record))
|
|
351
|
+
.map(|_| ())
|
|
243
352
|
.map_err(to_napi_error)
|
|
244
353
|
}
|
|
245
354
|
|
|
@@ -252,21 +361,27 @@ impl NativeDatabase {
|
|
|
252
361
|
namespace: Option<String>,
|
|
253
362
|
sparse_json: Option<String>,
|
|
254
363
|
vectors_json: Option<String>,
|
|
364
|
+
ttl: Option<f64>,
|
|
255
365
|
) -> Result<()> {
|
|
256
366
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
257
367
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
258
368
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
259
369
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
370
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
371
|
+
let record = Record {
|
|
372
|
+
namespace: namespace.unwrap_or_default(),
|
|
373
|
+
id,
|
|
374
|
+
vector,
|
|
375
|
+
vectors,
|
|
376
|
+
sparse,
|
|
377
|
+
metadata,
|
|
378
|
+
multi_vectors: MultiVectors::new(),
|
|
379
|
+
expires_at,
|
|
380
|
+
};
|
|
260
381
|
let mut database = self.write_open()?;
|
|
261
382
|
database
|
|
262
|
-
.
|
|
263
|
-
|
|
264
|
-
&id,
|
|
265
|
-
vector,
|
|
266
|
-
vectors,
|
|
267
|
-
sparse,
|
|
268
|
-
metadata,
|
|
269
|
-
)
|
|
383
|
+
.upsert_many(std::iter::once(record))
|
|
384
|
+
.map(|_| ())
|
|
270
385
|
.map_err(to_napi_error)
|
|
271
386
|
}
|
|
272
387
|
|
|
@@ -348,6 +463,242 @@ impl NativeDatabase {
|
|
|
348
463
|
database.compact().map_err(to_napi_error)
|
|
349
464
|
}
|
|
350
465
|
|
|
466
|
+
// -------------------------------------------------------------------
|
|
467
|
+
// Quantization
|
|
468
|
+
// -------------------------------------------------------------------
|
|
469
|
+
|
|
470
|
+
/// Enable quantization on the database.
|
|
471
|
+
/// `method`: "scalar", "binary", or "product"
|
|
472
|
+
/// `options_json`: JSON with optional keys: rescore_multiplier, num_sub_vectors, num_centroids, training_iterations
|
|
473
|
+
#[napi(js_name = "enableQuantization")]
|
|
474
|
+
pub fn enable_quantization(
|
|
475
|
+
&self,
|
|
476
|
+
method: Option<String>,
|
|
477
|
+
options_json: Option<String>,
|
|
478
|
+
) -> Result<()> {
|
|
479
|
+
let method = method.as_deref().unwrap_or("scalar");
|
|
480
|
+
let (rescore_multiplier, num_sub_vectors, num_centroids, training_iterations) =
|
|
481
|
+
parse_quantization_options(options_json.as_deref())?;
|
|
482
|
+
let config = build_quantization_config(
|
|
483
|
+
method,
|
|
484
|
+
rescore_multiplier,
|
|
485
|
+
num_sub_vectors,
|
|
486
|
+
num_centroids,
|
|
487
|
+
training_iterations,
|
|
488
|
+
)?;
|
|
489
|
+
let mut database = self.write_open()?;
|
|
490
|
+
database.enable_quantization(config).map_err(to_napi_error)
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/// Disable quantization and remove persisted parameters.
|
|
494
|
+
#[napi(js_name = "disableQuantization")]
|
|
495
|
+
pub fn disable_quantization(&self) -> Result<()> {
|
|
496
|
+
let mut database = self.write_open()?;
|
|
497
|
+
database.disable_quantization().map_err(to_napi_error)
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/// Returns true if quantization is enabled.
|
|
501
|
+
#[napi(getter, js_name = "isQuantized")]
|
|
502
|
+
pub fn is_quantized(&self) -> Result<bool> {
|
|
503
|
+
let database = self.read()?;
|
|
504
|
+
Ok(database.is_quantized())
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/// Returns the quantization method name if enabled, else null.
|
|
508
|
+
#[napi(getter, js_name = "quantizationMethod")]
|
|
509
|
+
pub fn quantization_method(&self) -> Result<Option<String>> {
|
|
510
|
+
let database = self.read()?;
|
|
511
|
+
Ok(database.quantization_config().map(|config| match config {
|
|
512
|
+
QuantizationConfig::Scalar(_) => "scalar".to_owned(),
|
|
513
|
+
QuantizationConfig::Binary(_) => "binary".to_owned(),
|
|
514
|
+
QuantizationConfig::Product(_) => "product".to_owned(),
|
|
515
|
+
}))
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// ---- Multi-vector / ColBERT-style late interaction ----
|
|
519
|
+
|
|
520
|
+
/// Upsert a record with multi-vector token embeddings (ColBERT-style).
|
|
521
|
+
///
|
|
522
|
+
/// `multi_vectors_json` is a JSON string mapping space names to arrays of
|
|
523
|
+
/// token vectors, e.g. `{"colbert": [[0.1, 0.2], [0.3, 0.4]]}`.
|
|
524
|
+
#[napi(js_name = "upsertMultiVectors")]
|
|
525
|
+
pub fn upsert_multi_vectors(
|
|
526
|
+
&self,
|
|
527
|
+
id: String,
|
|
528
|
+
vector: Vec<f64>,
|
|
529
|
+
multi_vectors_json: String,
|
|
530
|
+
options_json: Option<String>,
|
|
531
|
+
) -> Result<()> {
|
|
532
|
+
let vector: Vec<f32> = vector.iter().map(|&v| v as f32).collect();
|
|
533
|
+
let mv_value: Value = serde_json::from_str(&multi_vectors_json)
|
|
534
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
535
|
+
let mv = json_to_multi_vectors(&mv_value)?;
|
|
536
|
+
|
|
537
|
+
let (metadata, namespace) = if let Some(opts) = options_json {
|
|
538
|
+
let opts: Value = serde_json::from_str(&opts)
|
|
539
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
540
|
+
let metadata = opts
|
|
541
|
+
.get("metadata")
|
|
542
|
+
.map(|v| json_to_metadata(v))
|
|
543
|
+
.transpose()?
|
|
544
|
+
.unwrap_or_default();
|
|
545
|
+
let namespace = opts
|
|
546
|
+
.get("namespace")
|
|
547
|
+
.and_then(|v| v.as_str())
|
|
548
|
+
.unwrap_or("")
|
|
549
|
+
.to_string();
|
|
550
|
+
(metadata, namespace)
|
|
551
|
+
} else {
|
|
552
|
+
(Metadata::new(), String::new())
|
|
553
|
+
};
|
|
554
|
+
|
|
555
|
+
let mut database = self.write_open()?;
|
|
556
|
+
database
|
|
557
|
+
.upsert_multi_vectors_in_namespace(namespace, id, vector, metadata, mv)
|
|
558
|
+
.map_err(to_napi_error)
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/// Search using multi-vector late interaction (MaxSim) scoring.
|
|
562
|
+
///
|
|
563
|
+
/// Returns a JSON array of results with id, score, namespace, and metadata.
|
|
564
|
+
#[napi(js_name = "searchMultiVector")]
|
|
565
|
+
pub fn search_multi_vector(
|
|
566
|
+
&self,
|
|
567
|
+
space: String,
|
|
568
|
+
query_tokens_json: String,
|
|
569
|
+
options_json: Option<String>,
|
|
570
|
+
) -> Result<String> {
|
|
571
|
+
let qt_value: Value = serde_json::from_str(&query_tokens_json)
|
|
572
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
573
|
+
let qt_arr = qt_value
|
|
574
|
+
.as_array()
|
|
575
|
+
.ok_or_else(|| to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
576
|
+
"query_tokens must be a JSON array of arrays".to_owned(),
|
|
577
|
+
)))?;
|
|
578
|
+
let query_tokens: Vec<Vec<f32>> = qt_arr
|
|
579
|
+
.iter()
|
|
580
|
+
.map(|v| {
|
|
581
|
+
v.as_array()
|
|
582
|
+
.ok_or_else(|| {
|
|
583
|
+
to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
584
|
+
"each query token must be an array of numbers".to_owned(),
|
|
585
|
+
))
|
|
586
|
+
})?
|
|
587
|
+
.iter()
|
|
588
|
+
.map(|n| {
|
|
589
|
+
n.as_f64()
|
|
590
|
+
.map(|f| f as f32)
|
|
591
|
+
.ok_or_else(|| {
|
|
592
|
+
to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
593
|
+
"token values must be numbers".to_owned(),
|
|
594
|
+
))
|
|
595
|
+
})
|
|
596
|
+
})
|
|
597
|
+
.collect::<Result<Vec<f32>>>()
|
|
598
|
+
})
|
|
599
|
+
.collect::<Result<Vec<Vec<f32>>>>()?;
|
|
600
|
+
|
|
601
|
+
let (top_k, filter, namespace) = if let Some(opts) = options_json {
|
|
602
|
+
let opts: Value = serde_json::from_str(&opts)
|
|
603
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
604
|
+
let top_k = opts.get("k").and_then(|v| v.as_u64()).unwrap_or(10) as usize;
|
|
605
|
+
let filter = opts
|
|
606
|
+
.get("filter")
|
|
607
|
+
.map(|v| json_to_filter(v))
|
|
608
|
+
.transpose()?;
|
|
609
|
+
let namespace = opts
|
|
610
|
+
.get("namespace")
|
|
611
|
+
.and_then(|v| v.as_str())
|
|
612
|
+
.map(String::from);
|
|
613
|
+
(top_k, filter, namespace)
|
|
614
|
+
} else {
|
|
615
|
+
(10, None, None)
|
|
616
|
+
};
|
|
617
|
+
|
|
618
|
+
let options = MultiVectorSearchOptions {
|
|
619
|
+
top_k,
|
|
620
|
+
filter,
|
|
621
|
+
namespace,
|
|
622
|
+
};
|
|
623
|
+
|
|
624
|
+
let database = self.read()?;
|
|
625
|
+
let results = database
|
|
626
|
+
.search_multi_vector(&space, &query_tokens, options)
|
|
627
|
+
.map_err(to_napi_error)?;
|
|
628
|
+
|
|
629
|
+
let json_results: Vec<Value> = results
|
|
630
|
+
.into_iter()
|
|
631
|
+
.map(|r| {
|
|
632
|
+
json!({
|
|
633
|
+
"id": r.id,
|
|
634
|
+
"score": r.score,
|
|
635
|
+
"namespace": r.namespace,
|
|
636
|
+
"metadata": metadata_to_json(&r.metadata),
|
|
637
|
+
})
|
|
638
|
+
})
|
|
639
|
+
.collect();
|
|
640
|
+
|
|
641
|
+
serde_json::to_string(&json_results)
|
|
642
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/// Enable 2-bit quantization for a multi-vector space.
|
|
646
|
+
#[napi(js_name = "enableMultiVectorQuantization")]
|
|
647
|
+
pub fn enable_multi_vector_quantization(
|
|
648
|
+
&self,
|
|
649
|
+
space: String,
|
|
650
|
+
options_json: Option<String>,
|
|
651
|
+
) -> Result<()> {
|
|
652
|
+
let (method, rescore_multiplier) = if let Some(opts) = options_json {
|
|
653
|
+
let opts: Value = serde_json::from_str(&opts)
|
|
654
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
655
|
+
let method = opts
|
|
656
|
+
.get("method")
|
|
657
|
+
.and_then(|v| v.as_str())
|
|
658
|
+
.unwrap_or("two_bit")
|
|
659
|
+
.to_string();
|
|
660
|
+
let rescore = opts
|
|
661
|
+
.get("rescoreMultiplier")
|
|
662
|
+
.and_then(|v| v.as_u64())
|
|
663
|
+
.map(|v| v as usize);
|
|
664
|
+
(method, rescore)
|
|
665
|
+
} else {
|
|
666
|
+
("two_bit".to_string(), None)
|
|
667
|
+
};
|
|
668
|
+
|
|
669
|
+
let config = match method.as_str() {
|
|
670
|
+
"two_bit" => MultiVectorQuantizationConfig::TwoBit(TwoBitQuantizationConfig {
|
|
671
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(4),
|
|
672
|
+
}),
|
|
673
|
+
other => {
|
|
674
|
+
return Err(to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
675
|
+
format!("unknown multi-vector quantization method: {other}. Supported: two_bit"),
|
|
676
|
+
)));
|
|
677
|
+
}
|
|
678
|
+
};
|
|
679
|
+
|
|
680
|
+
let mut database = self.write_open()?;
|
|
681
|
+
database
|
|
682
|
+
.enable_multi_vector_quantization(&space, config)
|
|
683
|
+
.map_err(to_napi_error)
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/// Disable multi-vector quantization for a space.
|
|
687
|
+
#[napi(js_name = "disableMultiVectorQuantization")]
|
|
688
|
+
pub fn disable_multi_vector_quantization(&self, space: String) -> Result<()> {
|
|
689
|
+
let mut database = self.write_open()?;
|
|
690
|
+
database
|
|
691
|
+
.disable_multi_vector_quantization(&space)
|
|
692
|
+
.map_err(to_napi_error)
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
/// Returns true if multi-vector quantization is enabled for the given space.
|
|
696
|
+
#[napi(js_name = "isMultiVectorQuantized")]
|
|
697
|
+
pub fn is_multi_vector_quantized(&self, space: String) -> Result<bool> {
|
|
698
|
+
let database = self.read()?;
|
|
699
|
+
Ok(database.is_multi_vector_quantized(&space))
|
|
700
|
+
}
|
|
701
|
+
|
|
351
702
|
#[napi]
|
|
352
703
|
pub fn snapshot(&self, dest: String) -> Result<()> {
|
|
353
704
|
let database = self.read()?;
|
|
@@ -393,6 +744,206 @@ impl NativeDatabase {
|
|
|
393
744
|
}
|
|
394
745
|
}
|
|
395
746
|
|
|
747
|
+
// -------------------------------------------------------------------
|
|
748
|
+
// Async tasks (run on libuv threadpool via napi::Task)
|
|
749
|
+
// -------------------------------------------------------------------
|
|
750
|
+
|
|
751
|
+
pub struct SearchTask {
|
|
752
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
753
|
+
query: Option<Vec<f32>>,
|
|
754
|
+
request: SearchRequest,
|
|
755
|
+
with_stats: bool,
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
impl napi::Task for SearchTask {
|
|
759
|
+
type Output = String;
|
|
760
|
+
type JsValue = String;
|
|
761
|
+
|
|
762
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
763
|
+
let database = self
|
|
764
|
+
.db
|
|
765
|
+
.read()
|
|
766
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
767
|
+
let sparse_ref = if self.request.sparse.is_empty() {
|
|
768
|
+
None
|
|
769
|
+
} else {
|
|
770
|
+
Some(&self.request.sparse)
|
|
771
|
+
};
|
|
772
|
+
let outcome = if self.request.all_namespaces {
|
|
773
|
+
database
|
|
774
|
+
.hybrid_search_all_namespaces_with_stats(
|
|
775
|
+
self.query.as_deref(),
|
|
776
|
+
sparse_ref,
|
|
777
|
+
self.request.options.clone(),
|
|
778
|
+
)
|
|
779
|
+
.map_err(to_napi_error)?
|
|
780
|
+
} else {
|
|
781
|
+
database
|
|
782
|
+
.hybrid_search_in_namespace_with_stats(
|
|
783
|
+
&self.request.namespace,
|
|
784
|
+
self.query.as_deref(),
|
|
785
|
+
sparse_ref,
|
|
786
|
+
self.request.options.clone(),
|
|
787
|
+
)
|
|
788
|
+
.map_err(to_napi_error)?
|
|
789
|
+
};
|
|
790
|
+
if self.with_stats {
|
|
791
|
+
stringify_value(search_outcome_to_json(
|
|
792
|
+
&outcome,
|
|
793
|
+
self.request.explain,
|
|
794
|
+
&self.request.fusion_name,
|
|
795
|
+
))
|
|
796
|
+
} else {
|
|
797
|
+
stringify_value(search_results_to_json(
|
|
798
|
+
&outcome.results,
|
|
799
|
+
self.request.explain,
|
|
800
|
+
&self.request.fusion_name,
|
|
801
|
+
))
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
|
|
806
|
+
Ok(output)
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
pub struct FlushTask {
|
|
811
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
impl napi::Task for FlushTask {
|
|
815
|
+
type Output = ();
|
|
816
|
+
type JsValue = ();
|
|
817
|
+
|
|
818
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
819
|
+
let mut database = self
|
|
820
|
+
.db
|
|
821
|
+
.write()
|
|
822
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
823
|
+
database.flush().map_err(to_napi_error)
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
|
|
827
|
+
Ok(())
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
pub struct CompactTask {
|
|
832
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
impl napi::Task for CompactTask {
|
|
836
|
+
type Output = ();
|
|
837
|
+
type JsValue = ();
|
|
838
|
+
|
|
839
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
840
|
+
let mut database = self
|
|
841
|
+
.db
|
|
842
|
+
.write()
|
|
843
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
844
|
+
database.compact().map_err(to_napi_error)
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
|
|
848
|
+
Ok(())
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
pub struct BulkIngestTask {
|
|
853
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
854
|
+
records: Vec<Record>,
|
|
855
|
+
batch_size: usize,
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
impl napi::Task for BulkIngestTask {
|
|
859
|
+
type Output = u32;
|
|
860
|
+
type JsValue = u32;
|
|
861
|
+
|
|
862
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
863
|
+
let records = std::mem::take(&mut self.records);
|
|
864
|
+
let mut database = self
|
|
865
|
+
.db
|
|
866
|
+
.write()
|
|
867
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
868
|
+
database
|
|
869
|
+
.bulk_ingest(records, self.batch_size)
|
|
870
|
+
.map(|count| count as u32)
|
|
871
|
+
.map_err(to_napi_error)
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
|
|
875
|
+
Ok(output)
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
#[napi]
|
|
880
|
+
impl NativeDatabase {
|
|
881
|
+
#[napi(js_name = "searchAsync")]
|
|
882
|
+
pub fn search_async(
|
|
883
|
+
&self,
|
|
884
|
+
query: Option<Vec<f64>>,
|
|
885
|
+
options_json: Option<String>,
|
|
886
|
+
) -> Result<AsyncTask<SearchTask>> {
|
|
887
|
+
let request = parse_search_request(options_json)?;
|
|
888
|
+
let query = query
|
|
889
|
+
.map(|vector| js_vector_to_core(vector, "query vector"))
|
|
890
|
+
.transpose()?;
|
|
891
|
+
Ok(AsyncTask::new(SearchTask {
|
|
892
|
+
db: self.inner.clone(),
|
|
893
|
+
query,
|
|
894
|
+
request,
|
|
895
|
+
with_stats: false,
|
|
896
|
+
}))
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
#[napi(js_name = "searchWithStatsAsync")]
|
|
900
|
+
pub fn search_with_stats_async(
|
|
901
|
+
&self,
|
|
902
|
+
query: Option<Vec<f64>>,
|
|
903
|
+
options_json: Option<String>,
|
|
904
|
+
) -> Result<AsyncTask<SearchTask>> {
|
|
905
|
+
let request = parse_search_request(options_json)?;
|
|
906
|
+
let query = query
|
|
907
|
+
.map(|vector| js_vector_to_core(vector, "query vector"))
|
|
908
|
+
.transpose()?;
|
|
909
|
+
Ok(AsyncTask::new(SearchTask {
|
|
910
|
+
db: self.inner.clone(),
|
|
911
|
+
query,
|
|
912
|
+
request,
|
|
913
|
+
with_stats: true,
|
|
914
|
+
}))
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
#[napi(js_name = "flushAsync")]
|
|
918
|
+
pub fn flush_async(&self) -> AsyncTask<FlushTask> {
|
|
919
|
+
AsyncTask::new(FlushTask {
|
|
920
|
+
db: self.inner.clone(),
|
|
921
|
+
})
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
#[napi(js_name = "compactAsync")]
|
|
925
|
+
pub fn compact_async(&self) -> AsyncTask<CompactTask> {
|
|
926
|
+
AsyncTask::new(CompactTask {
|
|
927
|
+
db: self.inner.clone(),
|
|
928
|
+
})
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
#[napi(js_name = "bulkIngestAsync")]
|
|
932
|
+
pub fn bulk_ingest_async(
|
|
933
|
+
&self,
|
|
934
|
+
records_json: String,
|
|
935
|
+
namespace: Option<String>,
|
|
936
|
+
batch_size: u32,
|
|
937
|
+
) -> Result<AsyncTask<BulkIngestTask>> {
|
|
938
|
+
let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
|
|
939
|
+
Ok(AsyncTask::new(BulkIngestTask {
|
|
940
|
+
db: self.inner.clone(),
|
|
941
|
+
records,
|
|
942
|
+
batch_size: batch_size as usize,
|
|
943
|
+
}))
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
|
|
396
947
|
#[napi]
|
|
397
948
|
impl NativeTransaction {
|
|
398
949
|
#[napi]
|
|
@@ -410,11 +961,13 @@ impl NativeTransaction {
|
|
|
410
961
|
namespace: Option<String>,
|
|
411
962
|
sparse_json: Option<String>,
|
|
412
963
|
vectors_json: Option<String>,
|
|
964
|
+
ttl: Option<f64>,
|
|
413
965
|
) -> Result<()> {
|
|
414
966
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
415
967
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
416
968
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
417
969
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
970
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
418
971
|
self.stage(WriteOperation::Insert(Record {
|
|
419
972
|
namespace: namespace.unwrap_or_default(),
|
|
420
973
|
id,
|
|
@@ -422,6 +975,8 @@ impl NativeTransaction {
|
|
|
422
975
|
vectors,
|
|
423
976
|
sparse,
|
|
424
977
|
metadata,
|
|
978
|
+
multi_vectors: MultiVectors::new(),
|
|
979
|
+
expires_at,
|
|
425
980
|
}))
|
|
426
981
|
}
|
|
427
982
|
|
|
@@ -434,11 +989,13 @@ impl NativeTransaction {
|
|
|
434
989
|
namespace: Option<String>,
|
|
435
990
|
sparse_json: Option<String>,
|
|
436
991
|
vectors_json: Option<String>,
|
|
992
|
+
ttl: Option<f64>,
|
|
437
993
|
) -> Result<()> {
|
|
438
994
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
439
995
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
440
996
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
441
997
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
998
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
442
999
|
self.stage(WriteOperation::Upsert(Record {
|
|
443
1000
|
namespace: namespace.unwrap_or_default(),
|
|
444
1001
|
id,
|
|
@@ -446,6 +1003,8 @@ impl NativeTransaction {
|
|
|
446
1003
|
vectors,
|
|
447
1004
|
sparse,
|
|
448
1005
|
metadata,
|
|
1006
|
+
multi_vectors: MultiVectors::new(),
|
|
1007
|
+
expires_at,
|
|
449
1008
|
}))
|
|
450
1009
|
}
|
|
451
1010
|
|
|
@@ -604,7 +1163,13 @@ pub fn open(
|
|
|
604
1163
|
dimension: Option<u32>,
|
|
605
1164
|
read_only: bool,
|
|
606
1165
|
lock_timeout: Option<f64>,
|
|
1166
|
+
metric: Option<String>,
|
|
607
1167
|
) -> Result<NativeDatabase> {
|
|
1168
|
+
let parsed_metric = match metric.as_deref() {
|
|
1169
|
+
Some(name) => DistanceMetric::from_name(name).map_err(to_napi_error)?,
|
|
1170
|
+
None => DistanceMetric::Cosine,
|
|
1171
|
+
};
|
|
1172
|
+
|
|
608
1173
|
let database = if read_only {
|
|
609
1174
|
if !Path::new(&path).exists() {
|
|
610
1175
|
return Err(err("cannot open non-existent database in read-only mode"));
|
|
@@ -627,7 +1192,8 @@ pub fn open(
|
|
|
627
1192
|
db
|
|
628
1193
|
}
|
|
629
1194
|
(Some(dimension), None) => {
|
|
630
|
-
CoreDatabase::
|
|
1195
|
+
CoreDatabase::open_or_create_with_metric(&path, dimension as usize, parsed_metric)
|
|
1196
|
+
.map_err(to_napi_error)?
|
|
631
1197
|
}
|
|
632
1198
|
(None, Some(timeout)) => {
|
|
633
1199
|
CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?
|
|
@@ -638,7 +1204,8 @@ pub fn open(
|
|
|
638
1204
|
let Some(dimension) = dimension else {
|
|
639
1205
|
return Err(err("dimension is required when creating a new database"));
|
|
640
1206
|
};
|
|
641
|
-
CoreDatabase::
|
|
1207
|
+
CoreDatabase::create_with_metric(&path, dimension as usize, parsed_metric)
|
|
1208
|
+
.map_err(to_napi_error)?
|
|
642
1209
|
};
|
|
643
1210
|
|
|
644
1211
|
Ok(NativeDatabase {
|
|
@@ -682,6 +1249,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
|
|
|
682
1249
|
let fetch_k = get_usize(object, "fetchK")?.unwrap_or(0);
|
|
683
1250
|
let mmr_lambda = get_optional_f32(object, "mmrLambda")?;
|
|
684
1251
|
let vector_name = get_string(object, "vectorName")?;
|
|
1252
|
+
let truncate_dim = get_usize(object, "truncateDim")?;
|
|
685
1253
|
let fusion_name = get_string(object, "fusion")?.unwrap_or_else(|| "linear".to_owned());
|
|
686
1254
|
let rrf_k = get_usize(object, "rrfK")?.unwrap_or(60);
|
|
687
1255
|
let explain = get_bool(object, "explain")?.unwrap_or(false);
|
|
@@ -705,6 +1273,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
|
|
|
705
1273
|
mmr_lambda,
|
|
706
1274
|
vector_name,
|
|
707
1275
|
fusion: parse_fusion(&fusion_name, rrf_k)?,
|
|
1276
|
+
truncate_dim,
|
|
708
1277
|
multi_vector_queries: query_vectors,
|
|
709
1278
|
},
|
|
710
1279
|
})
|
|
@@ -829,6 +1398,39 @@ fn json_to_named_vectors(value: &Value) -> Result<NamedVectors> {
|
|
|
829
1398
|
Ok(vectors)
|
|
830
1399
|
}
|
|
831
1400
|
|
|
1401
|
+
fn json_to_multi_vectors(value: &Value) -> Result<MultiVectors> {
|
|
1402
|
+
let object = value
|
|
1403
|
+
.as_object()
|
|
1404
|
+
.ok_or_else(|| err("multi_vectors must be a JSON object"))?;
|
|
1405
|
+
let mut multi_vectors = MultiVectors::new();
|
|
1406
|
+
for (name, token_array) in object {
|
|
1407
|
+
if name.is_empty() {
|
|
1408
|
+
return Err(err("multi-vector space names must not be empty"));
|
|
1409
|
+
}
|
|
1410
|
+
let arr = token_array
|
|
1411
|
+
.as_array()
|
|
1412
|
+
.ok_or_else(|| err("multi-vector space value must be an array of arrays"))?;
|
|
1413
|
+
let mut token_vectors = Vec::with_capacity(arr.len());
|
|
1414
|
+
for item in arr {
|
|
1415
|
+
token_vectors.push(value_to_vector(item, "multi-vector token")?);
|
|
1416
|
+
}
|
|
1417
|
+
multi_vectors.insert(name.clone(), token_vectors);
|
|
1418
|
+
}
|
|
1419
|
+
Ok(multi_vectors)
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
fn multi_vectors_to_json(mv: &MultiVectors) -> Value {
|
|
1423
|
+
let mut map = Map::new();
|
|
1424
|
+
for (name, token_vectors) in mv {
|
|
1425
|
+
let arr: Vec<Value> = token_vectors
|
|
1426
|
+
.iter()
|
|
1427
|
+
.map(|v| Value::Array(v.iter().map(|&f| json!(f)).collect()))
|
|
1428
|
+
.collect();
|
|
1429
|
+
map.insert(name.clone(), Value::Array(arr));
|
|
1430
|
+
}
|
|
1431
|
+
Value::Object(map)
|
|
1432
|
+
}
|
|
1433
|
+
|
|
832
1434
|
fn json_to_filter(value: &Value) -> Result<MetadataFilter> {
|
|
833
1435
|
let object = value
|
|
834
1436
|
.as_object()
|
|
@@ -1013,6 +1615,18 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
|
|
|
1013
1615
|
.transpose()?
|
|
1014
1616
|
.unwrap_or_default();
|
|
1015
1617
|
|
|
1618
|
+
let multi_vectors = object
|
|
1619
|
+
.get("multi_vectors")
|
|
1620
|
+
.or_else(|| object.get("multiVectors"))
|
|
1621
|
+
.map(json_to_multi_vectors)
|
|
1622
|
+
.transpose()?
|
|
1623
|
+
.unwrap_or_default();
|
|
1624
|
+
|
|
1625
|
+
let ttl = object
|
|
1626
|
+
.get("ttl")
|
|
1627
|
+
.and_then(|v| v.as_f64());
|
|
1628
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
1629
|
+
|
|
1016
1630
|
Ok(Record {
|
|
1017
1631
|
namespace,
|
|
1018
1632
|
id: value_to_string(id)?,
|
|
@@ -1020,6 +1634,8 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
|
|
|
1020
1634
|
vectors,
|
|
1021
1635
|
sparse,
|
|
1022
1636
|
metadata,
|
|
1637
|
+
multi_vectors,
|
|
1638
|
+
expires_at,
|
|
1023
1639
|
})
|
|
1024
1640
|
}
|
|
1025
1641
|
|
|
@@ -1031,6 +1647,8 @@ fn record_to_json(record: &Record) -> Value {
|
|
|
1031
1647
|
"vectors": named_vectors_to_json(&record.vectors),
|
|
1032
1648
|
"sparse": sparse_to_json(&record.sparse),
|
|
1033
1649
|
"metadata": metadata_to_json(&record.metadata),
|
|
1650
|
+
"multi_vectors": multi_vectors_to_json(&record.multi_vectors),
|
|
1651
|
+
"expires_at": record.expires_at,
|
|
1034
1652
|
})
|
|
1035
1653
|
}
|
|
1036
1654
|
|
|
@@ -1132,6 +1750,8 @@ fn search_stats_to_json(stats: &vectlite::SearchStats) -> Value {
|
|
|
1132
1750
|
"ann_loaded_from_disk": stats.ann_loaded_from_disk,
|
|
1133
1751
|
"wal_entries_replayed": stats.wal_entries_replayed,
|
|
1134
1752
|
"fusion": stats.fusion,
|
|
1753
|
+
"effective_dimension": stats.effective_dimension,
|
|
1754
|
+
"matryoshka_truncated": stats.matryoshka_truncated,
|
|
1135
1755
|
"rerank_applied": false,
|
|
1136
1756
|
"rerank_count": 0,
|
|
1137
1757
|
"timings": {
|
|
@@ -1286,6 +1906,23 @@ fn value_to_f32(value: &Value, label: &str) -> Result<f32> {
|
|
|
1286
1906
|
.ok_or_else(|| err(format!("{label} must contain numeric values")))
|
|
1287
1907
|
}
|
|
1288
1908
|
|
|
1909
|
+
/// Convert an optional TTL (seconds from now) to an absolute `expires_at` timestamp.
|
|
1910
|
+
fn ttl_to_expires_at(ttl: Option<f64>) -> Result<Option<f64>> {
|
|
1911
|
+
match ttl {
|
|
1912
|
+
None => Ok(None),
|
|
1913
|
+
Some(t) if t < 0.0 || t.is_nan() => {
|
|
1914
|
+
Err(err("ttl must be a non-negative finite number"))
|
|
1915
|
+
}
|
|
1916
|
+
Some(t) => {
|
|
1917
|
+
let now = std::time::SystemTime::now()
|
|
1918
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
1919
|
+
.unwrap_or_default()
|
|
1920
|
+
.as_secs_f64();
|
|
1921
|
+
Ok(Some(now + t))
|
|
1922
|
+
}
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1289
1926
|
fn js_vector_to_core(values: Vec<f64>, label: &str) -> Result<Vec<f32>> {
|
|
1290
1927
|
let mut vector = Vec::with_capacity(values.len());
|
|
1291
1928
|
for value in values {
|
|
@@ -1315,3 +1952,74 @@ fn to_napi_error(error: vectlite::VectLiteError) -> NapiError {
|
|
|
1315
1952
|
fn closed_database_error() -> vectlite::VectLiteError {
|
|
1316
1953
|
vectlite::VectLiteError::InvalidFormat("database is closed".to_owned())
|
|
1317
1954
|
}
|
|
1955
|
+
|
|
1956
|
+
fn parse_payload_index_type(name: &str) -> Result<PayloadIndexType> {
|
|
1957
|
+
PayloadIndexType::from_name(name).map_err(to_napi_error)
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
fn parse_quantization_options(
|
|
1961
|
+
options_json: Option<&str>,
|
|
1962
|
+
) -> Result<(Option<usize>, Option<usize>, Option<usize>, Option<usize>)> {
|
|
1963
|
+
let Some(json_str) = options_json else {
|
|
1964
|
+
return Ok((None, None, None, None));
|
|
1965
|
+
};
|
|
1966
|
+
let value: Value = serde_json::from_str(json_str)
|
|
1967
|
+
.map_err(|e| err(format!("invalid quantization options JSON: {e}")))?;
|
|
1968
|
+
let obj = value
|
|
1969
|
+
.as_object()
|
|
1970
|
+
.ok_or_else(|| err("quantization options must be a JSON object"))?;
|
|
1971
|
+
|
|
1972
|
+
let rescore_multiplier = obj
|
|
1973
|
+
.get("rescoreMultiplier")
|
|
1974
|
+
.or_else(|| obj.get("rescore_multiplier"))
|
|
1975
|
+
.and_then(|v| v.as_u64())
|
|
1976
|
+
.map(|v| v as usize);
|
|
1977
|
+
let num_sub_vectors = obj
|
|
1978
|
+
.get("numSubVectors")
|
|
1979
|
+
.or_else(|| obj.get("num_sub_vectors"))
|
|
1980
|
+
.and_then(|v| v.as_u64())
|
|
1981
|
+
.map(|v| v as usize);
|
|
1982
|
+
let num_centroids = obj
|
|
1983
|
+
.get("numCentroids")
|
|
1984
|
+
.or_else(|| obj.get("num_centroids"))
|
|
1985
|
+
.and_then(|v| v.as_u64())
|
|
1986
|
+
.map(|v| v as usize);
|
|
1987
|
+
let training_iterations = obj
|
|
1988
|
+
.get("trainingIterations")
|
|
1989
|
+
.or_else(|| obj.get("training_iterations"))
|
|
1990
|
+
.and_then(|v| v.as_u64())
|
|
1991
|
+
.map(|v| v as usize);
|
|
1992
|
+
|
|
1993
|
+
Ok((
|
|
1994
|
+
rescore_multiplier,
|
|
1995
|
+
num_sub_vectors,
|
|
1996
|
+
num_centroids,
|
|
1997
|
+
training_iterations,
|
|
1998
|
+
))
|
|
1999
|
+
}
|
|
2000
|
+
|
|
2001
|
+
fn build_quantization_config(
|
|
2002
|
+
method: &str,
|
|
2003
|
+
rescore_multiplier: Option<usize>,
|
|
2004
|
+
num_sub_vectors: Option<usize>,
|
|
2005
|
+
num_centroids: Option<usize>,
|
|
2006
|
+
training_iterations: Option<usize>,
|
|
2007
|
+
) -> Result<QuantizationConfig> {
|
|
2008
|
+
match method {
|
|
2009
|
+
"scalar" | "int8" => Ok(QuantizationConfig::Scalar(ScalarQuantizationConfig {
|
|
2010
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(5),
|
|
2011
|
+
})),
|
|
2012
|
+
"binary" => Ok(QuantizationConfig::Binary(BinaryQuantizationConfig {
|
|
2013
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(10),
|
|
2014
|
+
})),
|
|
2015
|
+
"product" | "pq" => Ok(QuantizationConfig::Product(ProductQuantizationConfig {
|
|
2016
|
+
num_sub_vectors: num_sub_vectors.unwrap_or(16),
|
|
2017
|
+
num_centroids: num_centroids.unwrap_or(256),
|
|
2018
|
+
training_iterations: training_iterations.unwrap_or(20),
|
|
2019
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(10),
|
|
2020
|
+
})),
|
|
2021
|
+
other => Err(err(format!(
|
|
2022
|
+
"unknown quantization method '{other}'. Expected: 'scalar', 'binary', or 'product'"
|
|
2023
|
+
))),
|
|
2024
|
+
}
|
|
2025
|
+
}
|