vectlite 0.9.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -4
- package/index.d.ts +28 -0
- package/index.js +37 -2
- package/native/Cargo.toml +1 -1
- package/native/src/lib.rs +109 -10
- package/native/vectlite-core/Cargo.toml +1 -1
- package/native/vectlite-core/src/lib.rs +227 -12
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/vectlite.node +0 -0
- package/prebuilds/darwin-x64/vectlite.node +0 -0
- package/prebuilds/linux-x64-gnu/vectlite.node +0 -0
- package/prebuilds/win32-x64-msvc/vectlite.node +0 -0
package/README.md
CHANGED
|
@@ -68,7 +68,7 @@ db.close()
|
|
|
68
68
|
### Data Management
|
|
69
69
|
|
|
70
70
|
- **Physical collections** -- `vectlite.openStore()` manages a directory of independent databases
|
|
71
|
-
- **Bulk ingestion** -- `bulkIngest()` with
|
|
71
|
+
- **Bulk ingestion** -- `bulkIngest()` with Rayon-parallel HNSW build, coalesced WAL fsync, and tunable `m` / `efConstruction` / `efSearch`
|
|
72
72
|
- **Listing & filtered counts** -- `list()` and `count({ namespace, filter })` without a vector query
|
|
73
73
|
- **Delete by filter** -- `deleteByFilter()` for bulk deletion by metadata filter
|
|
74
74
|
- **Partial metadata updates** -- `updateMetadata()` merges a patch without re-writing the vector or rebuilding indexes
|
|
@@ -260,7 +260,7 @@ console.log(db.quantizationMethod) // "scalar", "binary", or "product"
|
|
|
260
260
|
db.disableQuantization()
|
|
261
261
|
```
|
|
262
262
|
|
|
263
|
-
`rescoreMultiplier` controls the number of quantized candidates rescored with exact float32 scoring: `k * rescoreMultiplier`, capped at the collection size. Increase it to trade latency for recall.
|
|
263
|
+
`rescoreMultiplier` (default **10**) controls the number of quantized candidates rescored with exact float32 scoring: `k * rescoreMultiplier`, capped at the collection size. Increase it to trade latency for recall.
|
|
264
264
|
|
|
265
265
|
For PQ, `numSubVectors` must divide the database dimension. If omitted, Vectlite chooses a compatible default; use `db.validNumSubVectors()` to inspect all valid values.
|
|
266
266
|
|
|
@@ -345,6 +345,37 @@ await db.compactAsync()
|
|
|
345
345
|
const count = await db.bulkIngestAsync(records, { batchSize: 5000 })
|
|
346
346
|
```
|
|
347
347
|
|
|
348
|
+
### Tuning the HNSW index
|
|
349
|
+
|
|
350
|
+
`bulkIngest()` and `bulkIngestAsync()` accept optional HNSW parameters that
|
|
351
|
+
control the recall/latency trade-off and trigger Rayon-backed parallel graph
|
|
352
|
+
construction once the dataset crosses `parallelInsertThreshold` (default 256):
|
|
353
|
+
|
|
354
|
+
```js
|
|
355
|
+
// Higher recall, slightly slower build/search
|
|
356
|
+
db.bulkIngest(records, {
|
|
357
|
+
batchSize: 5000,
|
|
358
|
+
m: 32, // max bidirectional links per node (default 16)
|
|
359
|
+
efConstruction: 400, // build-time search width (default 200)
|
|
360
|
+
efSearch: 200, // query-time search width (default: auto)
|
|
361
|
+
})
|
|
362
|
+
|
|
363
|
+
// Faster build/search, lower recall
|
|
364
|
+
db.bulkIngest(records, { m: 8, efConstruction: 100, efSearch: 40 })
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
The same parameters can be changed at any time without re-ingesting:
|
|
368
|
+
|
|
369
|
+
```js
|
|
370
|
+
db.setIndexConfig({ m: 32, efConstruction: 400 }) // rebuilds the ANN graph
|
|
371
|
+
db.setEfSearch(200) // query-time only, no rebuild
|
|
372
|
+
console.log(db.indexConfig())
|
|
373
|
+
// { m: 32, ef_construction: 400, ef_search: 200, parallel_insert_threshold: 256 }
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
Use higher `m` / `efConstruction` / `efSearch` to push Recall@10 toward `1.0`;
|
|
377
|
+
use lower values when latency or memory matter more than recall.
|
|
378
|
+
|
|
348
379
|
### OpenTelemetry Integration
|
|
349
380
|
|
|
350
381
|
vectlite ships with optional OpenTelemetry tracing. When enabled, every search
|
|
@@ -398,7 +429,10 @@ before re-throwing.
|
|
|
398
429
|
| `db.insert(id, vector, metadata, options)` | Insert a record (throws on duplicate id) |
|
|
399
430
|
| `db.upsertMany(records, { namespace })` | Upsert a batch of records |
|
|
400
431
|
| `db.insertMany(records, { namespace })` | Insert a batch |
|
|
401
|
-
| `db.bulkIngest(records, { namespace, batchSize })` | Fastest bulk import with
|
|
432
|
+
| `db.bulkIngest(records, { namespace, batchSize, m, efConstruction, efSearch, parallelInsertThreshold })` | Fastest bulk import with coalesced WAL fsync and Rayon-parallel HNSW build |
|
|
433
|
+
| `db.setIndexConfig({ m, efConstruction, efSearch, parallelInsertThreshold })` | Update HNSW parameters; rebuilds the ANN graph if `m`/`efConstruction` changed |
|
|
434
|
+
| `db.setEfSearch(efSearch)` | Adjust query-time HNSW search width without rebuilding |
|
|
435
|
+
| `db.indexConfig()` | Return the current HNSW configuration |
|
|
402
436
|
| `db.delete(id, { namespace })` | Delete a single record |
|
|
403
437
|
| `db.deleteMany(ids, { namespace })` | Delete multiple records by id |
|
|
404
438
|
| `db.deleteByFilter(filter, { namespace })` | Delete all records matching a filter |
|
|
@@ -462,7 +496,7 @@ before re-throwing.
|
|
|
462
496
|
| `db.searchWithStatsAsync(query, options)` | Non-blocking search with stats (returns Promise) |
|
|
463
497
|
| `db.flushAsync()` | Non-blocking flush/compact (returns Promise) |
|
|
464
498
|
| `db.compactAsync()` | Non-blocking compact (returns Promise) |
|
|
465
|
-
| `db.bulkIngestAsync(records, options)` | Non-blocking bulk import (returns Promise) |
|
|
499
|
+
| `db.bulkIngestAsync(records, options)` | Non-blocking bulk import (returns Promise); accepts the same HNSW tuning options as `bulkIngest` |
|
|
466
500
|
|
|
467
501
|
## Filter Operators
|
|
468
502
|
|
package/index.d.ts
CHANGED
|
@@ -110,6 +110,28 @@ export interface ListCursorResult {
|
|
|
110
110
|
export interface BulkIngestOptions {
|
|
111
111
|
namespace?: string | null
|
|
112
112
|
batchSize?: number
|
|
113
|
+
/** Max bidirectional links per HNSW node (default 16). */
|
|
114
|
+
m?: number | null
|
|
115
|
+
/** Build-time search width (default 200). Higher = better recall, slower build. */
|
|
116
|
+
efConstruction?: number | null
|
|
117
|
+
/** Query-time search width. `null` = auto (derived from k). */
|
|
118
|
+
efSearch?: number | null
|
|
119
|
+
/** Minimum dataset size to engage Rayon-parallel HNSW insertion (default 256). */
|
|
120
|
+
parallelInsertThreshold?: number | null
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export interface IndexConfig {
|
|
124
|
+
m: number
|
|
125
|
+
ef_construction: number
|
|
126
|
+
ef_search: number | null
|
|
127
|
+
parallel_insert_threshold: number
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export interface SetIndexConfigOptions {
|
|
131
|
+
m?: number | null
|
|
132
|
+
efConstruction?: number | null
|
|
133
|
+
efSearch?: number | null
|
|
134
|
+
parallelInsertThreshold?: number | null
|
|
113
135
|
}
|
|
114
136
|
|
|
115
137
|
export interface SearchOptions {
|
|
@@ -212,6 +234,12 @@ export class Database {
|
|
|
212
234
|
insertMany(records: Record[], options?: { namespace?: string | null }): number
|
|
213
235
|
upsertMany(records: Record[], options?: { namespace?: string | null }): number
|
|
214
236
|
bulkIngest(records: Record[], options?: BulkIngestOptions): number
|
|
237
|
+
/** Get the current HNSW configuration. */
|
|
238
|
+
indexConfig(): IndexConfig
|
|
239
|
+
/** Adjust query-time `ef_search` only (no rebuild). `null` reverts to auto. */
|
|
240
|
+
setEfSearch(efSearch: number | null): void
|
|
241
|
+
/** Update HNSW parameters; rebuilds the ANN graph if `m`/`efConstruction` changed. */
|
|
242
|
+
setIndexConfig(config: SetIndexConfigOptions): void
|
|
215
243
|
get(id: string, options?: { namespace?: string | null }): Record | null
|
|
216
244
|
delete(id: string, options?: { namespace?: string | null }): boolean
|
|
217
245
|
deleteMany(ids: string[], options?: { namespace?: string | null }): number
|
package/index.js
CHANGED
|
@@ -415,7 +415,34 @@ class Database {
|
|
|
415
415
|
|
|
416
416
|
bulkIngest(records, options = {}) {
|
|
417
417
|
return wrapError(() =>
|
|
418
|
-
this._native.bulkIngest(
|
|
418
|
+
this._native.bulkIngest(
|
|
419
|
+
encode(records),
|
|
420
|
+
options.namespace ?? null,
|
|
421
|
+
options.batchSize ?? 10_000,
|
|
422
|
+
options.m ?? null,
|
|
423
|
+
options.efConstruction ?? null,
|
|
424
|
+
options.efSearch ?? null,
|
|
425
|
+
options.parallelInsertThreshold ?? null,
|
|
426
|
+
),
|
|
427
|
+
)
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
indexConfig() {
|
|
431
|
+
return wrapError(() => decode(this._native.indexConfig()))
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
setEfSearch(efSearch) {
|
|
435
|
+
return wrapError(() => this._native.setEfSearch(efSearch ?? null))
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
setIndexConfig(config = {}) {
|
|
439
|
+
return wrapError(() =>
|
|
440
|
+
this._native.setIndexConfig(
|
|
441
|
+
config.m ?? null,
|
|
442
|
+
config.efConstruction ?? null,
|
|
443
|
+
config.efSearch ?? null,
|
|
444
|
+
config.parallelInsertThreshold ?? null,
|
|
445
|
+
),
|
|
419
446
|
)
|
|
420
447
|
}
|
|
421
448
|
|
|
@@ -585,7 +612,15 @@ class Database {
|
|
|
585
612
|
|
|
586
613
|
bulkIngestAsync(records, options = {}) {
|
|
587
614
|
return wrapAsync(
|
|
588
|
-
this._native.bulkIngestAsync(
|
|
615
|
+
this._native.bulkIngestAsync(
|
|
616
|
+
encode(records),
|
|
617
|
+
options.namespace ?? null,
|
|
618
|
+
options.batchSize ?? 10_000,
|
|
619
|
+
options.m ?? null,
|
|
620
|
+
options.efConstruction ?? null,
|
|
621
|
+
options.efSearch ?? null,
|
|
622
|
+
options.parallelInsertThreshold ?? null,
|
|
623
|
+
),
|
|
589
624
|
)
|
|
590
625
|
}
|
|
591
626
|
}
|
package/native/Cargo.toml
CHANGED
package/native/src/lib.rs
CHANGED
|
@@ -12,8 +12,8 @@ use vectlite::quantization::{
|
|
|
12
12
|
default_product_num_sub_vectors,
|
|
13
13
|
};
|
|
14
14
|
use vectlite::{
|
|
15
|
-
Database as CoreDatabase, DistanceMetric, FusionStrategy, HybridSearchOptions,
|
|
16
|
-
MetadataFilter, MetadataValue, MultiVectorSearchOptions, MultiVectors, NamedVectors,
|
|
15
|
+
Database as CoreDatabase, DistanceMetric, FusionStrategy, HybridSearchOptions, IndexConfig,
|
|
16
|
+
Metadata, MetadataFilter, MetadataValue, MultiVectorSearchOptions, MultiVectors, NamedVectors,
|
|
17
17
|
PayloadIndexType, Record, SearchOutcome, SearchResult, SparseVector, Store as CoreStore,
|
|
18
18
|
WriteOperation,
|
|
19
19
|
};
|
|
@@ -417,13 +417,55 @@ impl NativeDatabase {
|
|
|
417
417
|
records_json: String,
|
|
418
418
|
namespace: Option<String>,
|
|
419
419
|
batch_size: u32,
|
|
420
|
+
m: Option<u32>,
|
|
421
|
+
ef_construction: Option<u32>,
|
|
422
|
+
ef_search: Option<u32>,
|
|
423
|
+
parallel_insert_threshold: Option<u32>,
|
|
420
424
|
) -> Result<u32> {
|
|
421
425
|
let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
|
|
422
426
|
let mut database = self.write_open()?;
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
.
|
|
427
|
+
let tuning = merge_index_config(m, ef_construction, ef_search, parallel_insert_threshold);
|
|
428
|
+
let count = if let Some(cfg) = tuning {
|
|
429
|
+
let merged = apply_index_overrides(database.index_config(), cfg);
|
|
430
|
+
database.bulk_ingest_with_config(records, batch_size as usize, Some(merged))
|
|
431
|
+
} else {
|
|
432
|
+
database.bulk_ingest(records, batch_size as usize)
|
|
433
|
+
};
|
|
434
|
+
count.map(|n| n as u32).map_err(to_napi_error)
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
#[napi(js_name = "indexConfig")]
|
|
438
|
+
pub fn index_config(&self) -> Result<String> {
|
|
439
|
+
let cfg = self.read()?.index_config();
|
|
440
|
+
let value = json!({
|
|
441
|
+
"m": cfg.m as u32,
|
|
442
|
+
"ef_construction": cfg.ef_construction as u32,
|
|
443
|
+
"ef_search": cfg.ef_search.map(|v| v as u32),
|
|
444
|
+
"parallel_insert_threshold": cfg.parallel_insert_threshold as u32,
|
|
445
|
+
});
|
|
446
|
+
stringify_value(value)
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
#[napi(js_name = "setEfSearch")]
|
|
450
|
+
pub fn set_ef_search(&self, ef_search: Option<u32>) -> Result<()> {
|
|
451
|
+
let ef = ef_search.map(|v| v as usize);
|
|
452
|
+
self.write_open()?.set_ef_search(ef).map_err(to_napi_error)
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
#[napi(js_name = "setIndexConfig")]
|
|
456
|
+
pub fn set_index_config(
|
|
457
|
+
&self,
|
|
458
|
+
m: Option<u32>,
|
|
459
|
+
ef_construction: Option<u32>,
|
|
460
|
+
ef_search: Option<u32>,
|
|
461
|
+
parallel_insert_threshold: Option<u32>,
|
|
462
|
+
) -> Result<()> {
|
|
463
|
+
let mut database = self.write_open()?;
|
|
464
|
+
let overrides =
|
|
465
|
+
merge_index_config(m, ef_construction, ef_search, parallel_insert_threshold)
|
|
466
|
+
.ok_or_else(|| err("setIndexConfig requires at least one field"))?;
|
|
467
|
+
let merged = apply_index_overrides(database.index_config(), overrides);
|
|
468
|
+
database.set_index_config(merged).map_err(to_napi_error)
|
|
427
469
|
}
|
|
428
470
|
|
|
429
471
|
#[napi]
|
|
@@ -878,6 +920,7 @@ pub struct BulkIngestTask {
|
|
|
878
920
|
db: Arc<RwLock<CoreDatabase>>,
|
|
879
921
|
records: Vec<Record>,
|
|
880
922
|
batch_size: usize,
|
|
923
|
+
tuning: Option<IndexConfigPatch>,
|
|
881
924
|
}
|
|
882
925
|
|
|
883
926
|
impl napi::Task for BulkIngestTask {
|
|
@@ -890,10 +933,13 @@ impl napi::Task for BulkIngestTask {
|
|
|
890
933
|
.db
|
|
891
934
|
.write()
|
|
892
935
|
.map_err(|e| err(format!("lock poisoned: {e}")))?;
|
|
893
|
-
|
|
894
|
-
.
|
|
895
|
-
.
|
|
896
|
-
|
|
936
|
+
let res = if let Some(cfg) = self.tuning.clone() {
|
|
937
|
+
let merged = apply_index_overrides(database.index_config(), cfg);
|
|
938
|
+
database.bulk_ingest_with_config(records, self.batch_size, Some(merged))
|
|
939
|
+
} else {
|
|
940
|
+
database.bulk_ingest(records, self.batch_size)
|
|
941
|
+
};
|
|
942
|
+
res.map(|count| count as u32).map_err(to_napi_error)
|
|
897
943
|
}
|
|
898
944
|
|
|
899
945
|
fn resolve(&mut self, _env: napi::Env, output: Self::Output) -> Result<Self::JsValue> {
|
|
@@ -959,12 +1005,18 @@ impl NativeDatabase {
|
|
|
959
1005
|
records_json: String,
|
|
960
1006
|
namespace: Option<String>,
|
|
961
1007
|
batch_size: u32,
|
|
1008
|
+
m: Option<u32>,
|
|
1009
|
+
ef_construction: Option<u32>,
|
|
1010
|
+
ef_search: Option<u32>,
|
|
1011
|
+
parallel_insert_threshold: Option<u32>,
|
|
962
1012
|
) -> Result<AsyncTask<BulkIngestTask>> {
|
|
963
1013
|
let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
|
|
1014
|
+
let tuning = merge_index_config(m, ef_construction, ef_search, parallel_insert_threshold);
|
|
964
1015
|
Ok(AsyncTask::new(BulkIngestTask {
|
|
965
1016
|
db: self.inner.clone(),
|
|
966
1017
|
records,
|
|
967
1018
|
batch_size: batch_size as usize,
|
|
1019
|
+
tuning,
|
|
968
1020
|
}))
|
|
969
1021
|
}
|
|
970
1022
|
}
|
|
@@ -1962,6 +2014,53 @@ fn value_to_usize(value: &Value, label: &str) -> Result<usize> {
|
|
|
1962
2014
|
.ok_or_else(|| err(format!("{label} must be an unsigned integer")))
|
|
1963
2015
|
}
|
|
1964
2016
|
|
|
2017
|
+
#[derive(Clone, Copy)]
|
|
2018
|
+
struct IndexConfigPatch {
|
|
2019
|
+
m: Option<usize>,
|
|
2020
|
+
ef_construction: Option<usize>,
|
|
2021
|
+
ef_search: Option<usize>,
|
|
2022
|
+
parallel_insert_threshold: Option<usize>,
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
/// Pack the four optional HNSW tuning fields into a patch. Returns `None`
|
|
2026
|
+
/// when every field is `None`; explicit zeroes are preserved so core
|
|
2027
|
+
/// validation can reject invalid build/search widths instead of treating
|
|
2028
|
+
/// them as "not provided".
|
|
2029
|
+
fn merge_index_config(
|
|
2030
|
+
m: Option<u32>,
|
|
2031
|
+
ef_construction: Option<u32>,
|
|
2032
|
+
ef_search: Option<u32>,
|
|
2033
|
+
parallel_insert_threshold: Option<u32>,
|
|
2034
|
+
) -> Option<IndexConfigPatch> {
|
|
2035
|
+
if m.is_none()
|
|
2036
|
+
&& ef_construction.is_none()
|
|
2037
|
+
&& ef_search.is_none()
|
|
2038
|
+
&& parallel_insert_threshold.is_none()
|
|
2039
|
+
{
|
|
2040
|
+
return None;
|
|
2041
|
+
}
|
|
2042
|
+
Some(IndexConfigPatch {
|
|
2043
|
+
m: m.map(|v| v as usize),
|
|
2044
|
+
ef_construction: ef_construction.map(|v| v as usize),
|
|
2045
|
+
ef_search: ef_search.map(|v| v as usize),
|
|
2046
|
+
parallel_insert_threshold: parallel_insert_threshold.map(|v| v as usize),
|
|
2047
|
+
})
|
|
2048
|
+
}
|
|
2049
|
+
|
|
2050
|
+
/// Merge a tuning patch into the current `IndexConfig`. Omitted fields inherit
|
|
2051
|
+
/// from `current`; `ef_search = None` in the patch means "no change" because
|
|
2052
|
+
/// callers use `setEfSearch(null)` to reset query-time tuning to auto.
|
|
2053
|
+
fn apply_index_overrides(current: IndexConfig, patch: IndexConfigPatch) -> IndexConfig {
|
|
2054
|
+
IndexConfig {
|
|
2055
|
+
m: patch.m.unwrap_or(current.m),
|
|
2056
|
+
ef_construction: patch.ef_construction.unwrap_or(current.ef_construction),
|
|
2057
|
+
ef_search: patch.ef_search.or(current.ef_search),
|
|
2058
|
+
parallel_insert_threshold: patch
|
|
2059
|
+
.parallel_insert_threshold
|
|
2060
|
+
.unwrap_or(current.parallel_insert_threshold),
|
|
2061
|
+
}
|
|
2062
|
+
}
|
|
2063
|
+
|
|
1965
2064
|
fn err(message: impl Into<String>) -> NapiError {
|
|
1966
2065
|
NapiError::from_reason(message.into())
|
|
1967
2066
|
}
|
|
@@ -35,6 +35,10 @@ const ANN_OVERSAMPLE: usize = 8;
|
|
|
35
35
|
const ANN_MIN_CANDIDATES: usize = 64;
|
|
36
36
|
const ANN_M: usize = 16;
|
|
37
37
|
const ANN_EF_CONSTRUCTION: usize = 200;
|
|
38
|
+
/// Threshold above which HNSW construction uses parallel batch insert
|
|
39
|
+
/// (Rayon-based). Below this, sequential insert is cheaper because of
|
|
40
|
+
/// thread setup overhead.
|
|
41
|
+
const ANN_PARALLEL_INSERT_THRESHOLD: usize = 256;
|
|
38
42
|
const BM25_K1: f32 = 1.2;
|
|
39
43
|
const BM25_B: f32 = 0.75;
|
|
40
44
|
|
|
@@ -754,6 +758,87 @@ pub struct SearchOptions {
|
|
|
754
758
|
pub truncate_dim: Option<usize>,
|
|
755
759
|
}
|
|
756
760
|
|
|
761
|
+
/// HNSW tuning parameters. Exposed so callers can trade off recall, latency,
|
|
762
|
+
/// memory and build time.
|
|
763
|
+
///
|
|
764
|
+
/// Defaults mirror VectLite's historical built-in values (`m = 16`,
|
|
765
|
+
/// `ef_construction = 200`). `ef_search = None` means VectLite picks an
|
|
766
|
+
/// `ef_search` derived from `top_k * ANN_OVERSAMPLE`.
|
|
767
|
+
///
|
|
768
|
+
/// Reference: Malkov & Yashunin, *Efficient and robust approximate nearest
|
|
769
|
+
/// neighbor search using Hierarchical Navigable Small World graphs*.
|
|
770
|
+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
771
|
+
pub struct IndexConfig {
|
|
772
|
+
/// Max number of bidirectional links per node. Higher = better recall,
|
|
773
|
+
/// more memory, slower build. Typical range: 8..64.
|
|
774
|
+
pub m: usize,
|
|
775
|
+
/// Width of the search during graph construction. Higher = better recall,
|
|
776
|
+
/// slower build. Typical range: 64..800.
|
|
777
|
+
pub ef_construction: usize,
|
|
778
|
+
/// Width of the search at query time. None = auto (derived from top_k).
|
|
779
|
+
/// Higher = better recall, slower search.
|
|
780
|
+
pub ef_search: Option<usize>,
|
|
781
|
+
/// Use parallel (Rayon-backed) HNSW insertion when the dataset has at
|
|
782
|
+
/// least this many vectors. Defaults to `ANN_PARALLEL_INSERT_THRESHOLD`.
|
|
783
|
+
/// Set very high to disable parallel insert.
|
|
784
|
+
pub parallel_insert_threshold: usize,
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
impl Default for IndexConfig {
|
|
788
|
+
fn default() -> Self {
|
|
789
|
+
Self {
|
|
790
|
+
m: ANN_M,
|
|
791
|
+
ef_construction: ANN_EF_CONSTRUCTION,
|
|
792
|
+
ef_search: None,
|
|
793
|
+
parallel_insert_threshold: ANN_PARALLEL_INSERT_THRESHOLD,
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
impl IndexConfig {
|
|
799
|
+
/// A preset tuned for higher recall at the cost of build/search time.
|
|
800
|
+
/// Useful for benchmark comparisons where recall@10 must approach 1.0.
|
|
801
|
+
pub fn high_recall() -> Self {
|
|
802
|
+
Self {
|
|
803
|
+
m: 32,
|
|
804
|
+
ef_construction: 400,
|
|
805
|
+
ef_search: Some(200),
|
|
806
|
+
parallel_insert_threshold: ANN_PARALLEL_INSERT_THRESHOLD,
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
/// A preset tuned for fast build & low latency, lower recall.
|
|
811
|
+
pub fn fast() -> Self {
|
|
812
|
+
Self {
|
|
813
|
+
m: 8,
|
|
814
|
+
ef_construction: 100,
|
|
815
|
+
ef_search: Some(40),
|
|
816
|
+
parallel_insert_threshold: ANN_PARALLEL_INSERT_THRESHOLD,
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
fn validate(&self) -> Result<()> {
|
|
821
|
+
if self.m == 0 {
|
|
822
|
+
return Err(VectLiteError::InvalidFormat(
|
|
823
|
+
"IndexConfig.m must be >= 1".to_owned(),
|
|
824
|
+
));
|
|
825
|
+
}
|
|
826
|
+
if self.ef_construction == 0 {
|
|
827
|
+
return Err(VectLiteError::InvalidFormat(
|
|
828
|
+
"IndexConfig.ef_construction must be >= 1".to_owned(),
|
|
829
|
+
));
|
|
830
|
+
}
|
|
831
|
+
if let Some(ef) = self.ef_search {
|
|
832
|
+
if ef == 0 {
|
|
833
|
+
return Err(VectLiteError::InvalidFormat(
|
|
834
|
+
"IndexConfig.ef_search must be >= 1 when set".to_owned(),
|
|
835
|
+
));
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
Ok(())
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
757
842
|
impl Default for SearchOptions {
|
|
758
843
|
fn default() -> Self {
|
|
759
844
|
Self {
|
|
@@ -1230,6 +1315,10 @@ pub struct Database {
|
|
|
1230
1315
|
payload_index_defs: BTreeMap<String, PayloadIndexType>,
|
|
1231
1316
|
/// Live payload indexes, populated from records.
|
|
1232
1317
|
payload_indexes: BTreeMap<String, PayloadIndexData>,
|
|
1318
|
+
/// HNSW tuning parameters. Not persisted to disk: this is a per-session
|
|
1319
|
+
/// knob so callers can change recall/latency tradeoffs without migrating
|
|
1320
|
+
/// data files. A subsequent `set_index_config` triggers a rebuild.
|
|
1321
|
+
index_config: IndexConfig,
|
|
1233
1322
|
}
|
|
1234
1323
|
|
|
1235
1324
|
#[derive(Default)]
|
|
@@ -1327,6 +1416,7 @@ impl Database {
|
|
|
1327
1416
|
multi_vector_quantized_keys: BTreeMap::new(),
|
|
1328
1417
|
payload_index_defs: BTreeMap::new(),
|
|
1329
1418
|
payload_indexes: BTreeMap::new(),
|
|
1419
|
+
index_config: IndexConfig::default(),
|
|
1330
1420
|
};
|
|
1331
1421
|
|
|
1332
1422
|
database.flush()?;
|
|
@@ -2487,11 +2577,41 @@ impl Database {
|
|
|
2487
2577
|
/// `batch_size`, but the ANN index and sparse index are only rebuilt once
|
|
2488
2578
|
/// at the very end, making this much faster than `upsert_many` for large
|
|
2489
2579
|
/// imports.
|
|
2580
|
+
///
|
|
2581
|
+
/// Performance notes:
|
|
2582
|
+
/// - The WAL is written without a per-batch `fsync` (each batch goes
|
|
2583
|
+
/// through `BufWriter` and is appended to the open file). A single
|
|
2584
|
+
/// `sync_all` is issued at the end. This avoids the per-batch fsync
|
|
2585
|
+
/// tax that dominates ingestion latency on macOS and modern SSDs.
|
|
2586
|
+
/// - The final ANN rebuild uses parallel HNSW insertion (Rayon) when
|
|
2587
|
+
/// the dataset is large enough (see
|
|
2588
|
+
/// `IndexConfig.parallel_insert_threshold`).
|
|
2490
2589
|
pub fn bulk_ingest<I>(&mut self, records: I, batch_size: usize) -> Result<usize>
|
|
2590
|
+
where
|
|
2591
|
+
I: IntoIterator<Item = Record>,
|
|
2592
|
+
{
|
|
2593
|
+
self.bulk_ingest_with_config(records, batch_size, None)
|
|
2594
|
+
}
|
|
2595
|
+
|
|
2596
|
+
/// Bulk-ingest with an override for the HNSW index configuration. The
|
|
2597
|
+
/// override is applied for the rebuild step at the end, so the resulting
|
|
2598
|
+
/// graph uses the requested `m` / `ef_construction`. The new config is
|
|
2599
|
+
/// also stored on the database (so subsequent searches use the
|
|
2600
|
+
/// corresponding `ef_search`).
|
|
2601
|
+
pub fn bulk_ingest_with_config<I>(
|
|
2602
|
+
&mut self,
|
|
2603
|
+
records: I,
|
|
2604
|
+
batch_size: usize,
|
|
2605
|
+
config: Option<IndexConfig>,
|
|
2606
|
+
) -> Result<usize>
|
|
2491
2607
|
where
|
|
2492
2608
|
I: IntoIterator<Item = Record>,
|
|
2493
2609
|
{
|
|
2494
2610
|
self.check_writable()?;
|
|
2611
|
+
if let Some(cfg) = config {
|
|
2612
|
+
cfg.validate()?;
|
|
2613
|
+
self.index_config = cfg;
|
|
2614
|
+
}
|
|
2495
2615
|
let batch_size = batch_size.max(1);
|
|
2496
2616
|
let mut total = 0_usize;
|
|
2497
2617
|
let mut batch = Vec::with_capacity(batch_size);
|
|
@@ -2502,7 +2622,8 @@ impl Database {
|
|
|
2502
2622
|
|
|
2503
2623
|
if batch.len() >= batch_size {
|
|
2504
2624
|
total += batch.len();
|
|
2505
|
-
|
|
2625
|
+
// Coalesced WAL writes: append without per-batch fsync.
|
|
2626
|
+
self.append_wal_batch_unsynced(&batch)?;
|
|
2506
2627
|
self.apply_ops_in_memory(batch);
|
|
2507
2628
|
batch = Vec::with_capacity(batch_size);
|
|
2508
2629
|
}
|
|
@@ -2510,11 +2631,16 @@ impl Database {
|
|
|
2510
2631
|
|
|
2511
2632
|
if !batch.is_empty() {
|
|
2512
2633
|
total += batch.len();
|
|
2513
|
-
self.
|
|
2634
|
+
self.append_wal_batch_unsynced(&batch)?;
|
|
2514
2635
|
self.apply_ops_in_memory(batch);
|
|
2515
2636
|
}
|
|
2516
2637
|
|
|
2517
2638
|
if total > 0 {
|
|
2639
|
+
// Single fsync at the very end to make all batches durable in
|
|
2640
|
+
// one shot. This is the major ingestion optimisation: instead
|
|
2641
|
+
// of paying fsync per batch (every `batch_size` records) we pay
|
|
2642
|
+
// it once for the whole bulk_ingest call.
|
|
2643
|
+
self.sync_wal()?;
|
|
2518
2644
|
self.rebuild_sparse_index();
|
|
2519
2645
|
self.rebuild_ann();
|
|
2520
2646
|
self.ann_loaded_from_disk = false;
|
|
@@ -2526,6 +2652,42 @@ impl Database {
|
|
|
2526
2652
|
Ok(total)
|
|
2527
2653
|
}
|
|
2528
2654
|
|
|
2655
|
+
/// Replace the HNSW tuning parameters and rebuild the ANN index.
|
|
2656
|
+
/// Use this to trade off recall vs latency without re-ingesting data.
|
|
2657
|
+
pub fn set_index_config(&mut self, config: IndexConfig) -> Result<()> {
|
|
2658
|
+
self.check_writable()?;
|
|
2659
|
+
config.validate()?;
|
|
2660
|
+
let changed_build_params = self.index_config.m != config.m
|
|
2661
|
+
|| self.index_config.ef_construction != config.ef_construction;
|
|
2662
|
+
self.index_config = config;
|
|
2663
|
+
if changed_build_params {
|
|
2664
|
+
// m / ef_construction affect graph structure → full rebuild.
|
|
2665
|
+
self.rebuild_ann();
|
|
2666
|
+
self.ann_loaded_from_disk = false;
|
|
2667
|
+
self.persist_ann_to_disk()?;
|
|
2668
|
+
}
|
|
2669
|
+
Ok(())
|
|
2670
|
+
}
|
|
2671
|
+
|
|
2672
|
+
/// Return the current HNSW tuning parameters.
|
|
2673
|
+
pub fn index_config(&self) -> IndexConfig {
|
|
2674
|
+
self.index_config
|
|
2675
|
+
}
|
|
2676
|
+
|
|
2677
|
+
/// Convenience: update only the query-time `ef_search` without rebuilding
|
|
2678
|
+
/// the index. Higher = better recall, slower search.
|
|
2679
|
+
pub fn set_ef_search(&mut self, ef_search: Option<usize>) -> Result<()> {
|
|
2680
|
+
if let Some(ef) = ef_search {
|
|
2681
|
+
if ef == 0 {
|
|
2682
|
+
return Err(VectLiteError::InvalidFormat(
|
|
2683
|
+
"ef_search must be >= 1".to_owned(),
|
|
2684
|
+
));
|
|
2685
|
+
}
|
|
2686
|
+
}
|
|
2687
|
+
self.index_config.ef_search = ef_search;
|
|
2688
|
+
Ok(())
|
|
2689
|
+
}
|
|
2690
|
+
|
|
2529
2691
|
pub fn compact(&mut self) -> Result<()> {
|
|
2530
2692
|
self.check_writable()?;
|
|
2531
2693
|
self.compact_inner()
|
|
@@ -3498,6 +3660,17 @@ impl Database {
|
|
|
3498
3660
|
}
|
|
3499
3661
|
|
|
3500
3662
|
fn append_wal_batch(&self, ops: &[WalOp]) -> Result<()> {
|
|
3663
|
+
self.append_wal_batch_inner(ops, true)
|
|
3664
|
+
}
|
|
3665
|
+
|
|
3666
|
+
/// Append a WAL batch without issuing an fsync. The caller is responsible
|
|
3667
|
+
/// for issuing `sync_wal` later (typically once at the end of a bulk
|
|
3668
|
+
/// ingest). This is the hot path for `bulk_ingest`.
|
|
3669
|
+
fn append_wal_batch_unsynced(&self, ops: &[WalOp]) -> Result<()> {
|
|
3670
|
+
self.append_wal_batch_inner(ops, false)
|
|
3671
|
+
}
|
|
3672
|
+
|
|
3673
|
+
fn append_wal_batch_inner(&self, ops: &[WalOp], sync: bool) -> Result<()> {
|
|
3501
3674
|
if let Some(parent) = self.wal_path.parent() {
|
|
3502
3675
|
if !parent.as_os_str().is_empty() {
|
|
3503
3676
|
fs::create_dir_all(parent)?;
|
|
@@ -3522,6 +3695,21 @@ impl Database {
|
|
|
3522
3695
|
|
|
3523
3696
|
write_u32(&mut file, u32_from_usize(buffer.len())?)?;
|
|
3524
3697
|
file.write_all(&buffer)?;
|
|
3698
|
+
if sync {
|
|
3699
|
+
file.sync_all()?;
|
|
3700
|
+
}
|
|
3701
|
+
Ok(())
|
|
3702
|
+
}
|
|
3703
|
+
|
|
3704
|
+
/// Force a durability fence on the WAL file. Opens the file in append
|
|
3705
|
+
/// mode and calls `sync_all`, which makes all previous unsynced writes
|
|
3706
|
+
/// durable in one shot. This is used by `bulk_ingest` to amortise fsync
|
|
3707
|
+
/// cost across many batches.
|
|
3708
|
+
fn sync_wal(&self) -> Result<()> {
|
|
3709
|
+
if !self.wal_path.exists() {
|
|
3710
|
+
return Ok(());
|
|
3711
|
+
}
|
|
3712
|
+
let file = OpenOptions::new().append(true).open(&self.wal_path)?;
|
|
3525
3713
|
file.sync_all()?;
|
|
3526
3714
|
Ok(())
|
|
3527
3715
|
}
|
|
@@ -3696,6 +3884,7 @@ impl Database {
|
|
|
3696
3884
|
multi_vector_quantized_keys: BTreeMap::new(),
|
|
3697
3885
|
payload_index_defs: BTreeMap::new(),
|
|
3698
3886
|
payload_indexes: BTreeMap::new(),
|
|
3887
|
+
index_config: IndexConfig::default(),
|
|
3699
3888
|
})
|
|
3700
3889
|
}
|
|
3701
3890
|
|
|
@@ -3854,13 +4043,14 @@ impl Database {
|
|
|
3854
4043
|
}
|
|
3855
4044
|
}
|
|
3856
4045
|
|
|
4046
|
+
let cfg = self.index_config;
|
|
3857
4047
|
self.ann.global = global_by_vector
|
|
3858
4048
|
.into_iter()
|
|
3859
4049
|
.filter_map(|(vector_name, records)| {
|
|
3860
4050
|
if records.len() < ANN_MIN_POINTS {
|
|
3861
4051
|
None
|
|
3862
4052
|
} else {
|
|
3863
|
-
Some((vector_name, build_ann_index(records, self.metric)))
|
|
4053
|
+
Some((vector_name, build_ann_index(records, self.metric, &cfg)))
|
|
3864
4054
|
}
|
|
3865
4055
|
})
|
|
3866
4056
|
.collect();
|
|
@@ -3874,7 +4064,7 @@ impl Database {
|
|
|
3874
4064
|
if records.len() < ANN_MIN_POINTS {
|
|
3875
4065
|
None
|
|
3876
4066
|
} else {
|
|
3877
|
-
Some((vector_name, build_ann_index(records, self.metric)))
|
|
4067
|
+
Some((vector_name, build_ann_index(records, self.metric, &cfg)))
|
|
3878
4068
|
}
|
|
3879
4069
|
})
|
|
3880
4070
|
.collect::<BTreeMap<_, _>>();
|
|
@@ -4205,7 +4395,14 @@ impl Database {
|
|
|
4205
4395
|
return None;
|
|
4206
4396
|
}
|
|
4207
4397
|
|
|
4208
|
-
|
|
4398
|
+
// ef_search controls recall vs latency at query time. When the user
|
|
4399
|
+
// explicitly sets `IndexConfig.ef_search`, honour it directly.
|
|
4400
|
+
// Otherwise default to max(candidate_count, ef_construction) which is
|
|
4401
|
+
// a conservative high-recall heuristic.
|
|
4402
|
+
let ef_search = match self.index_config.ef_search {
|
|
4403
|
+
Some(ef) => ef.max(candidate_count),
|
|
4404
|
+
None => candidate_count.max(self.index_config.ef_construction),
|
|
4405
|
+
};
|
|
4209
4406
|
let neighbours = index.hnsw.search(query, candidate_count, ef_search);
|
|
4210
4407
|
Some(
|
|
4211
4408
|
neighbours
|
|
@@ -4475,23 +4672,41 @@ fn score_dense_prefix(
|
|
|
4475
4672
|
metric.score(&left[..dimension], &right[..dimension])
|
|
4476
4673
|
}
|
|
4477
4674
|
|
|
4478
|
-
fn build_ann_index(
|
|
4675
|
+
fn build_ann_index(
|
|
4676
|
+
records: Vec<(RecordKey, &Vec<f32>)>,
|
|
4677
|
+
metric: DistanceMetric,
|
|
4678
|
+
config: &IndexConfig,
|
|
4679
|
+
) -> AnnIndex {
|
|
4479
4680
|
let max_layer = compute_hnsw_layers(records.len());
|
|
4480
4681
|
let count = records.len();
|
|
4682
|
+
let use_parallel = count >= config.parallel_insert_threshold;
|
|
4481
4683
|
|
|
4482
4684
|
macro_rules! build_hnsw {
|
|
4483
4685
|
($dist_type:ty, $dist_val:expr, $variant:ident) => {{
|
|
4484
4686
|
let mut hnsw = Hnsw::<f32, $dist_type>::new(
|
|
4485
|
-
|
|
4486
|
-
count,
|
|
4687
|
+
config.m,
|
|
4688
|
+
count.max(1),
|
|
4487
4689
|
max_layer,
|
|
4488
|
-
|
|
4690
|
+
config.ef_construction,
|
|
4489
4691
|
$dist_val,
|
|
4490
4692
|
);
|
|
4491
4693
|
let mut keys = Vec::with_capacity(count);
|
|
4492
|
-
|
|
4493
|
-
|
|
4494
|
-
|
|
4694
|
+
if use_parallel {
|
|
4695
|
+
// hnsw_rs's `parallel_insert` takes `&[(&Vec<T>, usize)]`
|
|
4696
|
+
// (the API is built around owned-Vec borrows) and uses Rayon
|
|
4697
|
+
// internally so the dominant cost (distance calculations
|
|
4698
|
+
// during graph neighbour selection) is multi-threaded.
|
|
4699
|
+
let mut batch: Vec<(&Vec<f32>, usize)> = Vec::with_capacity(count);
|
|
4700
|
+
for (origin_id, (key, vector)) in records.into_iter().enumerate() {
|
|
4701
|
+
batch.push((vector, origin_id));
|
|
4702
|
+
keys.push(key);
|
|
4703
|
+
}
|
|
4704
|
+
hnsw.parallel_insert(&batch);
|
|
4705
|
+
} else {
|
|
4706
|
+
for (origin_id, (key, vector)) in records.into_iter().enumerate() {
|
|
4707
|
+
hnsw.insert((vector.as_slice(), origin_id));
|
|
4708
|
+
keys.push(key);
|
|
4709
|
+
}
|
|
4495
4710
|
}
|
|
4496
4711
|
hnsw.set_searching_mode(true);
|
|
4497
4712
|
AnnIndex {
|
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|