vectlite 0.1.12 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -1
- package/index.d.ts +55 -0
- package/index.js +171 -12
- package/native/Cargo.toml +1 -1
- package/native/src/lib.rs +612 -27
- package/native/vectlite-core/Cargo.toml +2 -1
- package/native/vectlite-core/src/lib.rs +3871 -183
- package/native/vectlite-core/src/quantization.rs +500 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/vectlite.node +0 -0
- package/prebuilds/darwin-x64/vectlite.node +0 -0
- package/prebuilds/linux-x64-gnu/vectlite.node +0 -0
- package/prebuilds/win32-x64-msvc/vectlite.node +0 -0
package/native/src/lib.rs
CHANGED
|
@@ -7,13 +7,14 @@ use napi::bindgen_prelude::*;
|
|
|
7
7
|
use napi_derive::napi;
|
|
8
8
|
use serde_json::{Map, Number, Value, json};
|
|
9
9
|
use vectlite::quantization::{
|
|
10
|
-
BinaryQuantizationConfig,
|
|
11
|
-
ScalarQuantizationConfig,
|
|
10
|
+
BinaryQuantizationConfig, MultiVectorQuantizationConfig, ProductQuantizationConfig,
|
|
11
|
+
QuantizationConfig, ScalarQuantizationConfig, TwoBitQuantizationConfig,
|
|
12
12
|
};
|
|
13
13
|
use vectlite::{
|
|
14
|
-
Database as CoreDatabase, FusionStrategy, HybridSearchOptions, Metadata,
|
|
15
|
-
|
|
16
|
-
Store as CoreStore,
|
|
14
|
+
Database as CoreDatabase, DistanceMetric, FusionStrategy, HybridSearchOptions, Metadata,
|
|
15
|
+
MetadataFilter, MetadataValue, MultiVectorSearchOptions, MultiVectors, NamedVectors,
|
|
16
|
+
PayloadIndexType, Record, SearchOutcome, SearchResult, SparseVector, Store as CoreStore,
|
|
17
|
+
WriteOperation,
|
|
17
18
|
};
|
|
18
19
|
|
|
19
20
|
#[napi(js_name = "NativeDatabase")]
|
|
@@ -130,6 +131,12 @@ impl NativeDatabase {
|
|
|
130
131
|
Ok(database.dimension() as u32)
|
|
131
132
|
}
|
|
132
133
|
|
|
134
|
+
#[napi(getter)]
|
|
135
|
+
pub fn metric(&self) -> Result<String> {
|
|
136
|
+
let database = self.read()?;
|
|
137
|
+
Ok(database.metric().name().to_owned())
|
|
138
|
+
}
|
|
139
|
+
|
|
133
140
|
#[napi(getter, js_name = "readOnly")]
|
|
134
141
|
pub fn read_only(&self) -> Result<bool> {
|
|
135
142
|
let database = self.read()?;
|
|
@@ -146,10 +153,6 @@ impl NativeDatabase {
|
|
|
146
153
|
json_to_filter(&value)
|
|
147
154
|
})
|
|
148
155
|
.transpose()?;
|
|
149
|
-
if namespace.is_none() && filter.is_none() {
|
|
150
|
-
let database = self.read()?;
|
|
151
|
-
return Ok(database.len() as u32);
|
|
152
|
-
}
|
|
153
156
|
let database = self.read()?;
|
|
154
157
|
Ok(database.count_filtered(namespace.as_deref(), filter.as_ref()) as u32)
|
|
155
158
|
}
|
|
@@ -199,6 +202,38 @@ impl NativeDatabase {
|
|
|
199
202
|
stringify_value(Value::Array(json_records))
|
|
200
203
|
}
|
|
201
204
|
|
|
205
|
+
#[napi(js_name = "listCursor")]
|
|
206
|
+
pub fn list_cursor(
|
|
207
|
+
&self,
|
|
208
|
+
namespace: Option<String>,
|
|
209
|
+
filter_json: Option<String>,
|
|
210
|
+
limit: Option<u32>,
|
|
211
|
+
cursor: Option<String>,
|
|
212
|
+
) -> Result<String> {
|
|
213
|
+
let filter = filter_json
|
|
214
|
+
.as_ref()
|
|
215
|
+
.map(|json_str| {
|
|
216
|
+
let value: serde_json::Value = serde_json::from_str(json_str)
|
|
217
|
+
.map_err(|e| err(format!("invalid filter JSON: {e}")))?;
|
|
218
|
+
json_to_filter(&value)
|
|
219
|
+
})
|
|
220
|
+
.transpose()?;
|
|
221
|
+
let database = self.read()?;
|
|
222
|
+
let (records, next_cursor) = database.list_cursor(
|
|
223
|
+
namespace.as_deref(),
|
|
224
|
+
filter.as_ref(),
|
|
225
|
+
limit.unwrap_or(0) as usize,
|
|
226
|
+
cursor.as_deref(),
|
|
227
|
+
);
|
|
228
|
+
let records: Vec<Record> = records.into_iter().cloned().collect();
|
|
229
|
+
let json_records: Vec<Value> = records.iter().map(record_to_json).collect();
|
|
230
|
+
let result = serde_json::json!({
|
|
231
|
+
"records": json_records,
|
|
232
|
+
"cursor": next_cursor,
|
|
233
|
+
});
|
|
234
|
+
stringify_value(result)
|
|
235
|
+
}
|
|
236
|
+
|
|
202
237
|
#[napi(js_name = "deleteByFilter")]
|
|
203
238
|
pub fn delete_by_filter(&self, filter_json: String, namespace: Option<String>) -> Result<u32> {
|
|
204
239
|
let value: serde_json::Value = serde_json::from_str(&filter_json)
|
|
@@ -211,6 +246,70 @@ impl NativeDatabase {
|
|
|
211
246
|
.map_err(to_napi_error)
|
|
212
247
|
}
|
|
213
248
|
|
|
249
|
+
#[napi]
|
|
250
|
+
pub fn update_metadata(
|
|
251
|
+
&self,
|
|
252
|
+
id: String,
|
|
253
|
+
metadata_json: String,
|
|
254
|
+
namespace: Option<String>,
|
|
255
|
+
) -> Result<bool> {
|
|
256
|
+
let patch = parse_metadata_json(Some(metadata_json))?;
|
|
257
|
+
let mut database = self.write_open()?;
|
|
258
|
+
database
|
|
259
|
+
.update_metadata_in_namespace(namespace.unwrap_or_default(), &id, patch)
|
|
260
|
+
.map_err(to_napi_error)
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// -------------------------------------------------------------------
|
|
264
|
+
// TTL / Expiry
|
|
265
|
+
// -------------------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
#[napi(js_name = "setTtl")]
|
|
268
|
+
pub fn set_ttl(&self, id: String, ttl: f64, namespace: Option<String>) -> Result<bool> {
|
|
269
|
+
let mut database = self.write_open()?;
|
|
270
|
+
database
|
|
271
|
+
.set_ttl_in_namespace(&namespace.unwrap_or_default(), &id, ttl)
|
|
272
|
+
.map_err(to_napi_error)
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
#[napi(js_name = "clearTtl")]
|
|
276
|
+
pub fn clear_ttl(&self, id: String, namespace: Option<String>) -> Result<bool> {
|
|
277
|
+
let mut database = self.write_open()?;
|
|
278
|
+
database
|
|
279
|
+
.clear_ttl_in_namespace(&namespace.unwrap_or_default(), &id)
|
|
280
|
+
.map_err(to_napi_error)
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// -------------------------------------------------------------------
|
|
284
|
+
// Payload Indexes
|
|
285
|
+
// -------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
#[napi(js_name = "createIndex")]
|
|
288
|
+
pub fn create_index(&self, field: String, index_type: String) -> Result<bool> {
|
|
289
|
+
let ty = parse_payload_index_type(&index_type)?;
|
|
290
|
+
let mut database = self.write_open()?;
|
|
291
|
+
database.create_index(&field, ty).map_err(to_napi_error)
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
#[napi(js_name = "dropIndex")]
|
|
295
|
+
pub fn drop_index(&self, field: String) -> Result<bool> {
|
|
296
|
+
let mut database = self.write_open()?;
|
|
297
|
+
database.drop_index(&field).map_err(to_napi_error)
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
#[napi(js_name = "listIndexes")]
|
|
301
|
+
pub fn list_indexes(&self) -> Result<String> {
|
|
302
|
+
let database = self.read()?;
|
|
303
|
+
let indexes = database.list_indexes();
|
|
304
|
+
let arr: Vec<Value> = indexes
|
|
305
|
+
.into_iter()
|
|
306
|
+
.map(|(field, index_type)| {
|
|
307
|
+
json!({ "field": field, "type": index_type.name() })
|
|
308
|
+
})
|
|
309
|
+
.collect();
|
|
310
|
+
serde_json::to_string(&arr).map_err(|e| err(format!("JSON serialize: {e}")))
|
|
311
|
+
}
|
|
312
|
+
|
|
214
313
|
#[napi]
|
|
215
314
|
pub fn transaction(&self) -> Result<NativeTransaction> {
|
|
216
315
|
drop(self.read()?);
|
|
@@ -229,21 +328,27 @@ impl NativeDatabase {
|
|
|
229
328
|
namespace: Option<String>,
|
|
230
329
|
sparse_json: Option<String>,
|
|
231
330
|
vectors_json: Option<String>,
|
|
331
|
+
ttl: Option<f64>,
|
|
232
332
|
) -> Result<()> {
|
|
233
333
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
234
334
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
235
335
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
236
336
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
337
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
338
|
+
let record = Record {
|
|
339
|
+
namespace: namespace.unwrap_or_default(),
|
|
340
|
+
id,
|
|
341
|
+
vector,
|
|
342
|
+
vectors,
|
|
343
|
+
sparse,
|
|
344
|
+
metadata,
|
|
345
|
+
multi_vectors: MultiVectors::new(),
|
|
346
|
+
expires_at,
|
|
347
|
+
};
|
|
237
348
|
let mut database = self.write_open()?;
|
|
238
349
|
database
|
|
239
|
-
.
|
|
240
|
-
|
|
241
|
-
&id,
|
|
242
|
-
vector,
|
|
243
|
-
vectors,
|
|
244
|
-
sparse,
|
|
245
|
-
metadata,
|
|
246
|
-
)
|
|
350
|
+
.insert_many(std::iter::once(record))
|
|
351
|
+
.map(|_| ())
|
|
247
352
|
.map_err(to_napi_error)
|
|
248
353
|
}
|
|
249
354
|
|
|
@@ -256,21 +361,27 @@ impl NativeDatabase {
|
|
|
256
361
|
namespace: Option<String>,
|
|
257
362
|
sparse_json: Option<String>,
|
|
258
363
|
vectors_json: Option<String>,
|
|
364
|
+
ttl: Option<f64>,
|
|
259
365
|
) -> Result<()> {
|
|
260
366
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
261
367
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
262
368
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
263
369
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
370
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
371
|
+
let record = Record {
|
|
372
|
+
namespace: namespace.unwrap_or_default(),
|
|
373
|
+
id,
|
|
374
|
+
vector,
|
|
375
|
+
vectors,
|
|
376
|
+
sparse,
|
|
377
|
+
metadata,
|
|
378
|
+
multi_vectors: MultiVectors::new(),
|
|
379
|
+
expires_at,
|
|
380
|
+
};
|
|
264
381
|
let mut database = self.write_open()?;
|
|
265
382
|
database
|
|
266
|
-
.
|
|
267
|
-
|
|
268
|
-
&id,
|
|
269
|
-
vector,
|
|
270
|
-
vectors,
|
|
271
|
-
sparse,
|
|
272
|
-
metadata,
|
|
273
|
-
)
|
|
383
|
+
.upsert_many(std::iter::once(record))
|
|
384
|
+
.map(|_| ())
|
|
274
385
|
.map_err(to_napi_error)
|
|
275
386
|
}
|
|
276
387
|
|
|
@@ -404,6 +515,190 @@ impl NativeDatabase {
|
|
|
404
515
|
}))
|
|
405
516
|
}
|
|
406
517
|
|
|
518
|
+
// ---- Multi-vector / ColBERT-style late interaction ----
|
|
519
|
+
|
|
520
|
+
/// Upsert a record with multi-vector token embeddings (ColBERT-style).
|
|
521
|
+
///
|
|
522
|
+
/// `multi_vectors_json` is a JSON string mapping space names to arrays of
|
|
523
|
+
/// token vectors, e.g. `{"colbert": [[0.1, 0.2], [0.3, 0.4]]}`.
|
|
524
|
+
#[napi(js_name = "upsertMultiVectors")]
|
|
525
|
+
pub fn upsert_multi_vectors(
|
|
526
|
+
&self,
|
|
527
|
+
id: String,
|
|
528
|
+
vector: Vec<f64>,
|
|
529
|
+
multi_vectors_json: String,
|
|
530
|
+
options_json: Option<String>,
|
|
531
|
+
) -> Result<()> {
|
|
532
|
+
let vector: Vec<f32> = vector.iter().map(|&v| v as f32).collect();
|
|
533
|
+
let mv_value: Value = serde_json::from_str(&multi_vectors_json)
|
|
534
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
535
|
+
let mv = json_to_multi_vectors(&mv_value)?;
|
|
536
|
+
|
|
537
|
+
let (metadata, namespace) = if let Some(opts) = options_json {
|
|
538
|
+
let opts: Value = serde_json::from_str(&opts)
|
|
539
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
540
|
+
let metadata = opts
|
|
541
|
+
.get("metadata")
|
|
542
|
+
.map(|v| json_to_metadata(v))
|
|
543
|
+
.transpose()?
|
|
544
|
+
.unwrap_or_default();
|
|
545
|
+
let namespace = opts
|
|
546
|
+
.get("namespace")
|
|
547
|
+
.and_then(|v| v.as_str())
|
|
548
|
+
.unwrap_or("")
|
|
549
|
+
.to_string();
|
|
550
|
+
(metadata, namespace)
|
|
551
|
+
} else {
|
|
552
|
+
(Metadata::new(), String::new())
|
|
553
|
+
};
|
|
554
|
+
|
|
555
|
+
let mut database = self.write_open()?;
|
|
556
|
+
database
|
|
557
|
+
.upsert_multi_vectors_in_namespace(namespace, id, vector, metadata, mv)
|
|
558
|
+
.map_err(to_napi_error)
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/// Search using multi-vector late interaction (MaxSim) scoring.
|
|
562
|
+
///
|
|
563
|
+
/// Returns a JSON array of results with id, score, namespace, and metadata.
|
|
564
|
+
#[napi(js_name = "searchMultiVector")]
|
|
565
|
+
pub fn search_multi_vector(
|
|
566
|
+
&self,
|
|
567
|
+
space: String,
|
|
568
|
+
query_tokens_json: String,
|
|
569
|
+
options_json: Option<String>,
|
|
570
|
+
) -> Result<String> {
|
|
571
|
+
let qt_value: Value = serde_json::from_str(&query_tokens_json)
|
|
572
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
573
|
+
let qt_arr = qt_value
|
|
574
|
+
.as_array()
|
|
575
|
+
.ok_or_else(|| to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
576
|
+
"query_tokens must be a JSON array of arrays".to_owned(),
|
|
577
|
+
)))?;
|
|
578
|
+
let query_tokens: Vec<Vec<f32>> = qt_arr
|
|
579
|
+
.iter()
|
|
580
|
+
.map(|v| {
|
|
581
|
+
v.as_array()
|
|
582
|
+
.ok_or_else(|| {
|
|
583
|
+
to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
584
|
+
"each query token must be an array of numbers".to_owned(),
|
|
585
|
+
))
|
|
586
|
+
})?
|
|
587
|
+
.iter()
|
|
588
|
+
.map(|n| {
|
|
589
|
+
n.as_f64()
|
|
590
|
+
.map(|f| f as f32)
|
|
591
|
+
.ok_or_else(|| {
|
|
592
|
+
to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
593
|
+
"token values must be numbers".to_owned(),
|
|
594
|
+
))
|
|
595
|
+
})
|
|
596
|
+
})
|
|
597
|
+
.collect::<Result<Vec<f32>>>()
|
|
598
|
+
})
|
|
599
|
+
.collect::<Result<Vec<Vec<f32>>>>()?;
|
|
600
|
+
|
|
601
|
+
let (top_k, filter, namespace) = if let Some(opts) = options_json {
|
|
602
|
+
let opts: Value = serde_json::from_str(&opts)
|
|
603
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
604
|
+
let top_k = opts.get("k").and_then(|v| v.as_u64()).unwrap_or(10) as usize;
|
|
605
|
+
let filter = opts
|
|
606
|
+
.get("filter")
|
|
607
|
+
.map(|v| json_to_filter(v))
|
|
608
|
+
.transpose()?;
|
|
609
|
+
let namespace = opts
|
|
610
|
+
.get("namespace")
|
|
611
|
+
.and_then(|v| v.as_str())
|
|
612
|
+
.map(String::from);
|
|
613
|
+
(top_k, filter, namespace)
|
|
614
|
+
} else {
|
|
615
|
+
(10, None, None)
|
|
616
|
+
};
|
|
617
|
+
|
|
618
|
+
let options = MultiVectorSearchOptions {
|
|
619
|
+
top_k,
|
|
620
|
+
filter,
|
|
621
|
+
namespace,
|
|
622
|
+
};
|
|
623
|
+
|
|
624
|
+
let database = self.read()?;
|
|
625
|
+
let results = database
|
|
626
|
+
.search_multi_vector(&space, &query_tokens, options)
|
|
627
|
+
.map_err(to_napi_error)?;
|
|
628
|
+
|
|
629
|
+
let json_results: Vec<Value> = results
|
|
630
|
+
.into_iter()
|
|
631
|
+
.map(|r| {
|
|
632
|
+
json!({
|
|
633
|
+
"id": r.id,
|
|
634
|
+
"score": r.score,
|
|
635
|
+
"namespace": r.namespace,
|
|
636
|
+
"metadata": metadata_to_json(&r.metadata),
|
|
637
|
+
})
|
|
638
|
+
})
|
|
639
|
+
.collect();
|
|
640
|
+
|
|
641
|
+
serde_json::to_string(&json_results)
|
|
642
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/// Enable 2-bit quantization for a multi-vector space.
|
|
646
|
+
#[napi(js_name = "enableMultiVectorQuantization")]
|
|
647
|
+
pub fn enable_multi_vector_quantization(
|
|
648
|
+
&self,
|
|
649
|
+
space: String,
|
|
650
|
+
options_json: Option<String>,
|
|
651
|
+
) -> Result<()> {
|
|
652
|
+
let (method, rescore_multiplier) = if let Some(opts) = options_json {
|
|
653
|
+
let opts: Value = serde_json::from_str(&opts)
|
|
654
|
+
.map_err(|e| to_napi_error(vectlite::VectLiteError::InvalidFormat(e.to_string())))?;
|
|
655
|
+
let method = opts
|
|
656
|
+
.get("method")
|
|
657
|
+
.and_then(|v| v.as_str())
|
|
658
|
+
.unwrap_or("two_bit")
|
|
659
|
+
.to_string();
|
|
660
|
+
let rescore = opts
|
|
661
|
+
.get("rescoreMultiplier")
|
|
662
|
+
.and_then(|v| v.as_u64())
|
|
663
|
+
.map(|v| v as usize);
|
|
664
|
+
(method, rescore)
|
|
665
|
+
} else {
|
|
666
|
+
("two_bit".to_string(), None)
|
|
667
|
+
};
|
|
668
|
+
|
|
669
|
+
let config = match method.as_str() {
|
|
670
|
+
"two_bit" => MultiVectorQuantizationConfig::TwoBit(TwoBitQuantizationConfig {
|
|
671
|
+
rescore_multiplier: rescore_multiplier.unwrap_or(4),
|
|
672
|
+
}),
|
|
673
|
+
other => {
|
|
674
|
+
return Err(to_napi_error(vectlite::VectLiteError::InvalidFormat(
|
|
675
|
+
format!("unknown multi-vector quantization method: {other}. Supported: two_bit"),
|
|
676
|
+
)));
|
|
677
|
+
}
|
|
678
|
+
};
|
|
679
|
+
|
|
680
|
+
let mut database = self.write_open()?;
|
|
681
|
+
database
|
|
682
|
+
.enable_multi_vector_quantization(&space, config)
|
|
683
|
+
.map_err(to_napi_error)
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/// Disable multi-vector quantization for a space.
|
|
687
|
+
#[napi(js_name = "disableMultiVectorQuantization")]
|
|
688
|
+
pub fn disable_multi_vector_quantization(&self, space: String) -> Result<()> {
|
|
689
|
+
let mut database = self.write_open()?;
|
|
690
|
+
database
|
|
691
|
+
.disable_multi_vector_quantization(&space)
|
|
692
|
+
.map_err(to_napi_error)
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
/// Returns true if multi-vector quantization is enabled for the given space.
|
|
696
|
+
#[napi(js_name = "isMultiVectorQuantized")]
|
|
697
|
+
pub fn is_multi_vector_quantized(&self, space: String) -> Result<bool> {
|
|
698
|
+
let database = self.read()?;
|
|
699
|
+
Ok(database.is_multi_vector_quantized(&space))
|
|
700
|
+
}
|
|
701
|
+
|
|
407
702
|
#[napi]
|
|
408
703
|
pub fn snapshot(&self, dest: String) -> Result<()> {
|
|
409
704
|
let database = self.read()?;
|
|
@@ -449,6 +744,206 @@ impl NativeDatabase {
|
|
|
449
744
|
}
|
|
450
745
|
}
|
|
451
746
|
|
|
747
|
+
// -------------------------------------------------------------------
|
|
748
|
+
// Async tasks (run on libuv threadpool via napi::Task)
|
|
749
|
+
// -------------------------------------------------------------------
|
|
750
|
+
|
|
751
|
+
pub struct SearchTask {
|
|
752
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
753
|
+
query: Option<Vec<f32>>,
|
|
754
|
+
request: SearchRequest,
|
|
755
|
+
with_stats: bool,
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
impl napi::Task for SearchTask {
|
|
759
|
+
type Output = String;
|
|
760
|
+
type JsValue = String;
|
|
761
|
+
|
|
762
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
763
|
+
let database = self
|
|
764
|
+
.db
|
|
765
|
+
.read()
|
|
766
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
767
|
+
let sparse_ref = if self.request.sparse.is_empty() {
|
|
768
|
+
None
|
|
769
|
+
} else {
|
|
770
|
+
Some(&self.request.sparse)
|
|
771
|
+
};
|
|
772
|
+
let outcome = if self.request.all_namespaces {
|
|
773
|
+
database
|
|
774
|
+
.hybrid_search_all_namespaces_with_stats(
|
|
775
|
+
self.query.as_deref(),
|
|
776
|
+
sparse_ref,
|
|
777
|
+
self.request.options.clone(),
|
|
778
|
+
)
|
|
779
|
+
.map_err(to_napi_error)?
|
|
780
|
+
} else {
|
|
781
|
+
database
|
|
782
|
+
.hybrid_search_in_namespace_with_stats(
|
|
783
|
+
&self.request.namespace,
|
|
784
|
+
self.query.as_deref(),
|
|
785
|
+
sparse_ref,
|
|
786
|
+
self.request.options.clone(),
|
|
787
|
+
)
|
|
788
|
+
.map_err(to_napi_error)?
|
|
789
|
+
};
|
|
790
|
+
if self.with_stats {
|
|
791
|
+
stringify_value(search_outcome_to_json(
|
|
792
|
+
&outcome,
|
|
793
|
+
self.request.explain,
|
|
794
|
+
&self.request.fusion_name,
|
|
795
|
+
))
|
|
796
|
+
} else {
|
|
797
|
+
stringify_value(search_results_to_json(
|
|
798
|
+
&outcome.results,
|
|
799
|
+
self.request.explain,
|
|
800
|
+
&self.request.fusion_name,
|
|
801
|
+
))
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
|
|
806
|
+
Ok(output)
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
pub struct FlushTask {
|
|
811
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
impl napi::Task for FlushTask {
|
|
815
|
+
type Output = ();
|
|
816
|
+
type JsValue = ();
|
|
817
|
+
|
|
818
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
819
|
+
let mut database = self
|
|
820
|
+
.db
|
|
821
|
+
.write()
|
|
822
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
823
|
+
database.flush().map_err(to_napi_error)
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
|
|
827
|
+
Ok(())
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
pub struct CompactTask {
|
|
832
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
impl napi::Task for CompactTask {
|
|
836
|
+
type Output = ();
|
|
837
|
+
type JsValue = ();
|
|
838
|
+
|
|
839
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
840
|
+
let mut database = self
|
|
841
|
+
.db
|
|
842
|
+
.write()
|
|
843
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
844
|
+
database.compact().map_err(to_napi_error)
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
fn resolve(&mut self, _env: napi::Env, _output: Self::Output) -> Result<Self::JsValue> {
|
|
848
|
+
Ok(())
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
pub struct BulkIngestTask {
|
|
853
|
+
db: Arc<RwLock<CoreDatabase>>,
|
|
854
|
+
records: Vec<Record>,
|
|
855
|
+
batch_size: usize,
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
impl napi::Task for BulkIngestTask {
|
|
859
|
+
type Output = u32;
|
|
860
|
+
type JsValue = u32;
|
|
861
|
+
|
|
862
|
+
fn compute(&mut self) -> Result<Self::Output> {
|
|
863
|
+
let records = std::mem::take(&mut self.records);
|
|
864
|
+
let mut database = self
|
|
865
|
+
.db
|
|
866
|
+
.write()
|
|
867
|
+
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
868
|
+
database
|
|
869
|
+
.bulk_ingest(records, self.batch_size)
|
|
870
|
+
.map(|count| count as u32)
|
|
871
|
+
.map_err(to_napi_error)
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
|
|
875
|
+
Ok(output)
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
#[napi]
|
|
880
|
+
impl NativeDatabase {
|
|
881
|
+
#[napi(js_name = "searchAsync")]
|
|
882
|
+
pub fn search_async(
|
|
883
|
+
&self,
|
|
884
|
+
query: Option<Vec<f64>>,
|
|
885
|
+
options_json: Option<String>,
|
|
886
|
+
) -> Result<AsyncTask<SearchTask>> {
|
|
887
|
+
let request = parse_search_request(options_json)?;
|
|
888
|
+
let query = query
|
|
889
|
+
.map(|vector| js_vector_to_core(vector, "query vector"))
|
|
890
|
+
.transpose()?;
|
|
891
|
+
Ok(AsyncTask::new(SearchTask {
|
|
892
|
+
db: self.inner.clone(),
|
|
893
|
+
query,
|
|
894
|
+
request,
|
|
895
|
+
with_stats: false,
|
|
896
|
+
}))
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
#[napi(js_name = "searchWithStatsAsync")]
|
|
900
|
+
pub fn search_with_stats_async(
|
|
901
|
+
&self,
|
|
902
|
+
query: Option<Vec<f64>>,
|
|
903
|
+
options_json: Option<String>,
|
|
904
|
+
) -> Result<AsyncTask<SearchTask>> {
|
|
905
|
+
let request = parse_search_request(options_json)?;
|
|
906
|
+
let query = query
|
|
907
|
+
.map(|vector| js_vector_to_core(vector, "query vector"))
|
|
908
|
+
.transpose()?;
|
|
909
|
+
Ok(AsyncTask::new(SearchTask {
|
|
910
|
+
db: self.inner.clone(),
|
|
911
|
+
query,
|
|
912
|
+
request,
|
|
913
|
+
with_stats: true,
|
|
914
|
+
}))
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
#[napi(js_name = "flushAsync")]
|
|
918
|
+
pub fn flush_async(&self) -> AsyncTask<FlushTask> {
|
|
919
|
+
AsyncTask::new(FlushTask {
|
|
920
|
+
db: self.inner.clone(),
|
|
921
|
+
})
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
#[napi(js_name = "compactAsync")]
|
|
925
|
+
pub fn compact_async(&self) -> AsyncTask<CompactTask> {
|
|
926
|
+
AsyncTask::new(CompactTask {
|
|
927
|
+
db: self.inner.clone(),
|
|
928
|
+
})
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
#[napi(js_name = "bulkIngestAsync")]
|
|
932
|
+
pub fn bulk_ingest_async(
|
|
933
|
+
&self,
|
|
934
|
+
records_json: String,
|
|
935
|
+
namespace: Option<String>,
|
|
936
|
+
batch_size: u32,
|
|
937
|
+
) -> Result<AsyncTask<BulkIngestTask>> {
|
|
938
|
+
let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
|
|
939
|
+
Ok(AsyncTask::new(BulkIngestTask {
|
|
940
|
+
db: self.inner.clone(),
|
|
941
|
+
records,
|
|
942
|
+
batch_size: batch_size as usize,
|
|
943
|
+
}))
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
|
|
452
947
|
#[napi]
|
|
453
948
|
impl NativeTransaction {
|
|
454
949
|
#[napi]
|
|
@@ -466,11 +961,13 @@ impl NativeTransaction {
|
|
|
466
961
|
namespace: Option<String>,
|
|
467
962
|
sparse_json: Option<String>,
|
|
468
963
|
vectors_json: Option<String>,
|
|
964
|
+
ttl: Option<f64>,
|
|
469
965
|
) -> Result<()> {
|
|
470
966
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
471
967
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
472
968
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
473
969
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
970
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
474
971
|
self.stage(WriteOperation::Insert(Record {
|
|
475
972
|
namespace: namespace.unwrap_or_default(),
|
|
476
973
|
id,
|
|
@@ -478,6 +975,8 @@ impl NativeTransaction {
|
|
|
478
975
|
vectors,
|
|
479
976
|
sparse,
|
|
480
977
|
metadata,
|
|
978
|
+
multi_vectors: MultiVectors::new(),
|
|
979
|
+
expires_at,
|
|
481
980
|
}))
|
|
482
981
|
}
|
|
483
982
|
|
|
@@ -490,11 +989,13 @@ impl NativeTransaction {
|
|
|
490
989
|
namespace: Option<String>,
|
|
491
990
|
sparse_json: Option<String>,
|
|
492
991
|
vectors_json: Option<String>,
|
|
992
|
+
ttl: Option<f64>,
|
|
493
993
|
) -> Result<()> {
|
|
494
994
|
let metadata = parse_metadata_json(metadata_json)?;
|
|
495
995
|
let sparse = parse_sparse_json(sparse_json)?;
|
|
496
996
|
let vectors = parse_named_vectors_json(vectors_json)?;
|
|
497
997
|
let vector = js_vector_to_core(vector, "vector")?;
|
|
998
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
498
999
|
self.stage(WriteOperation::Upsert(Record {
|
|
499
1000
|
namespace: namespace.unwrap_or_default(),
|
|
500
1001
|
id,
|
|
@@ -502,6 +1003,8 @@ impl NativeTransaction {
|
|
|
502
1003
|
vectors,
|
|
503
1004
|
sparse,
|
|
504
1005
|
metadata,
|
|
1006
|
+
multi_vectors: MultiVectors::new(),
|
|
1007
|
+
expires_at,
|
|
505
1008
|
}))
|
|
506
1009
|
}
|
|
507
1010
|
|
|
@@ -660,7 +1163,13 @@ pub fn open(
|
|
|
660
1163
|
dimension: Option<u32>,
|
|
661
1164
|
read_only: bool,
|
|
662
1165
|
lock_timeout: Option<f64>,
|
|
1166
|
+
metric: Option<String>,
|
|
663
1167
|
) -> Result<NativeDatabase> {
|
|
1168
|
+
let parsed_metric = match metric.as_deref() {
|
|
1169
|
+
Some(name) => DistanceMetric::from_name(name).map_err(to_napi_error)?,
|
|
1170
|
+
None => DistanceMetric::Cosine,
|
|
1171
|
+
};
|
|
1172
|
+
|
|
664
1173
|
let database = if read_only {
|
|
665
1174
|
if !Path::new(&path).exists() {
|
|
666
1175
|
return Err(err("cannot open non-existent database in read-only mode"));
|
|
@@ -683,7 +1192,8 @@ pub fn open(
|
|
|
683
1192
|
db
|
|
684
1193
|
}
|
|
685
1194
|
(Some(dimension), None) => {
|
|
686
|
-
CoreDatabase::
|
|
1195
|
+
CoreDatabase::open_or_create_with_metric(&path, dimension as usize, parsed_metric)
|
|
1196
|
+
.map_err(to_napi_error)?
|
|
687
1197
|
}
|
|
688
1198
|
(None, Some(timeout)) => {
|
|
689
1199
|
CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?
|
|
@@ -694,7 +1204,8 @@ pub fn open(
|
|
|
694
1204
|
let Some(dimension) = dimension else {
|
|
695
1205
|
return Err(err("dimension is required when creating a new database"));
|
|
696
1206
|
};
|
|
697
|
-
CoreDatabase::
|
|
1207
|
+
CoreDatabase::create_with_metric(&path, dimension as usize, parsed_metric)
|
|
1208
|
+
.map_err(to_napi_error)?
|
|
698
1209
|
};
|
|
699
1210
|
|
|
700
1211
|
Ok(NativeDatabase {
|
|
@@ -738,6 +1249,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
|
|
|
738
1249
|
let fetch_k = get_usize(object, "fetchK")?.unwrap_or(0);
|
|
739
1250
|
let mmr_lambda = get_optional_f32(object, "mmrLambda")?;
|
|
740
1251
|
let vector_name = get_string(object, "vectorName")?;
|
|
1252
|
+
let truncate_dim = get_usize(object, "truncateDim")?;
|
|
741
1253
|
let fusion_name = get_string(object, "fusion")?.unwrap_or_else(|| "linear".to_owned());
|
|
742
1254
|
let rrf_k = get_usize(object, "rrfK")?.unwrap_or(60);
|
|
743
1255
|
let explain = get_bool(object, "explain")?.unwrap_or(false);
|
|
@@ -761,6 +1273,7 @@ fn parse_search_request(options_json: Option<String>) -> Result<SearchRequest> {
|
|
|
761
1273
|
mmr_lambda,
|
|
762
1274
|
vector_name,
|
|
763
1275
|
fusion: parse_fusion(&fusion_name, rrf_k)?,
|
|
1276
|
+
truncate_dim,
|
|
764
1277
|
multi_vector_queries: query_vectors,
|
|
765
1278
|
},
|
|
766
1279
|
})
|
|
@@ -885,6 +1398,39 @@ fn json_to_named_vectors(value: &Value) -> Result<NamedVectors> {
|
|
|
885
1398
|
Ok(vectors)
|
|
886
1399
|
}
|
|
887
1400
|
|
|
1401
|
+
fn json_to_multi_vectors(value: &Value) -> Result<MultiVectors> {
|
|
1402
|
+
let object = value
|
|
1403
|
+
.as_object()
|
|
1404
|
+
.ok_or_else(|| err("multi_vectors must be a JSON object"))?;
|
|
1405
|
+
let mut multi_vectors = MultiVectors::new();
|
|
1406
|
+
for (name, token_array) in object {
|
|
1407
|
+
if name.is_empty() {
|
|
1408
|
+
return Err(err("multi-vector space names must not be empty"));
|
|
1409
|
+
}
|
|
1410
|
+
let arr = token_array
|
|
1411
|
+
.as_array()
|
|
1412
|
+
.ok_or_else(|| err("multi-vector space value must be an array of arrays"))?;
|
|
1413
|
+
let mut token_vectors = Vec::with_capacity(arr.len());
|
|
1414
|
+
for item in arr {
|
|
1415
|
+
token_vectors.push(value_to_vector(item, "multi-vector token")?);
|
|
1416
|
+
}
|
|
1417
|
+
multi_vectors.insert(name.clone(), token_vectors);
|
|
1418
|
+
}
|
|
1419
|
+
Ok(multi_vectors)
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
fn multi_vectors_to_json(mv: &MultiVectors) -> Value {
|
|
1423
|
+
let mut map = Map::new();
|
|
1424
|
+
for (name, token_vectors) in mv {
|
|
1425
|
+
let arr: Vec<Value> = token_vectors
|
|
1426
|
+
.iter()
|
|
1427
|
+
.map(|v| Value::Array(v.iter().map(|&f| json!(f)).collect()))
|
|
1428
|
+
.collect();
|
|
1429
|
+
map.insert(name.clone(), Value::Array(arr));
|
|
1430
|
+
}
|
|
1431
|
+
Value::Object(map)
|
|
1432
|
+
}
|
|
1433
|
+
|
|
888
1434
|
fn json_to_filter(value: &Value) -> Result<MetadataFilter> {
|
|
889
1435
|
let object = value
|
|
890
1436
|
.as_object()
|
|
@@ -1069,6 +1615,18 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
|
|
|
1069
1615
|
.transpose()?
|
|
1070
1616
|
.unwrap_or_default();
|
|
1071
1617
|
|
|
1618
|
+
let multi_vectors = object
|
|
1619
|
+
.get("multi_vectors")
|
|
1620
|
+
.or_else(|| object.get("multiVectors"))
|
|
1621
|
+
.map(json_to_multi_vectors)
|
|
1622
|
+
.transpose()?
|
|
1623
|
+
.unwrap_or_default();
|
|
1624
|
+
|
|
1625
|
+
let ttl = object
|
|
1626
|
+
.get("ttl")
|
|
1627
|
+
.and_then(|v| v.as_f64());
|
|
1628
|
+
let expires_at = ttl_to_expires_at(ttl)?;
|
|
1629
|
+
|
|
1072
1630
|
Ok(Record {
|
|
1073
1631
|
namespace,
|
|
1074
1632
|
id: value_to_string(id)?,
|
|
@@ -1076,6 +1634,8 @@ fn json_to_record(object: &Map<String, Value>, default_namespace: Option<&str>)
|
|
|
1076
1634
|
vectors,
|
|
1077
1635
|
sparse,
|
|
1078
1636
|
metadata,
|
|
1637
|
+
multi_vectors,
|
|
1638
|
+
expires_at,
|
|
1079
1639
|
})
|
|
1080
1640
|
}
|
|
1081
1641
|
|
|
@@ -1087,6 +1647,8 @@ fn record_to_json(record: &Record) -> Value {
|
|
|
1087
1647
|
"vectors": named_vectors_to_json(&record.vectors),
|
|
1088
1648
|
"sparse": sparse_to_json(&record.sparse),
|
|
1089
1649
|
"metadata": metadata_to_json(&record.metadata),
|
|
1650
|
+
"multi_vectors": multi_vectors_to_json(&record.multi_vectors),
|
|
1651
|
+
"expires_at": record.expires_at,
|
|
1090
1652
|
})
|
|
1091
1653
|
}
|
|
1092
1654
|
|
|
@@ -1188,6 +1750,8 @@ fn search_stats_to_json(stats: &vectlite::SearchStats) -> Value {
|
|
|
1188
1750
|
"ann_loaded_from_disk": stats.ann_loaded_from_disk,
|
|
1189
1751
|
"wal_entries_replayed": stats.wal_entries_replayed,
|
|
1190
1752
|
"fusion": stats.fusion,
|
|
1753
|
+
"effective_dimension": stats.effective_dimension,
|
|
1754
|
+
"matryoshka_truncated": stats.matryoshka_truncated,
|
|
1191
1755
|
"rerank_applied": false,
|
|
1192
1756
|
"rerank_count": 0,
|
|
1193
1757
|
"timings": {
|
|
@@ -1342,6 +1906,23 @@ fn value_to_f32(value: &Value, label: &str) -> Result<f32> {
|
|
|
1342
1906
|
.ok_or_else(|| err(format!("{label} must contain numeric values")))
|
|
1343
1907
|
}
|
|
1344
1908
|
|
|
1909
|
+
/// Convert an optional TTL (seconds from now) to an absolute `expires_at` timestamp.
|
|
1910
|
+
fn ttl_to_expires_at(ttl: Option<f64>) -> Result<Option<f64>> {
|
|
1911
|
+
match ttl {
|
|
1912
|
+
None => Ok(None),
|
|
1913
|
+
Some(t) if t < 0.0 || t.is_nan() => {
|
|
1914
|
+
Err(err("ttl must be a non-negative finite number"))
|
|
1915
|
+
}
|
|
1916
|
+
Some(t) => {
|
|
1917
|
+
let now = std::time::SystemTime::now()
|
|
1918
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
1919
|
+
.unwrap_or_default()
|
|
1920
|
+
.as_secs_f64();
|
|
1921
|
+
Ok(Some(now + t))
|
|
1922
|
+
}
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1345
1926
|
fn js_vector_to_core(values: Vec<f64>, label: &str) -> Result<Vec<f32>> {
|
|
1346
1927
|
let mut vector = Vec::with_capacity(values.len());
|
|
1347
1928
|
for value in values {
|
|
@@ -1372,6 +1953,10 @@ fn closed_database_error() -> vectlite::VectLiteError {
|
|
|
1372
1953
|
vectlite::VectLiteError::InvalidFormat("database is closed".to_owned())
|
|
1373
1954
|
}
|
|
1374
1955
|
|
|
1956
|
+
fn parse_payload_index_type(name: &str) -> Result<PayloadIndexType> {
|
|
1957
|
+
PayloadIndexType::from_name(name).map_err(to_napi_error)
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1375
1960
|
fn parse_quantization_options(
|
|
1376
1961
|
options_json: Option<&str>,
|
|
1377
1962
|
) -> Result<(Option<usize>, Option<usize>, Option<usize>, Option<usize>)> {
|