vectlite 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -68,7 +68,7 @@ db.close()
68
68
  ### Data Management
69
69
 
70
70
  - **Physical collections** -- `vectlite.openStore()` manages a directory of independent databases
71
- - **Bulk ingestion** -- `bulkIngest()` with Rayon-parallel HNSW build, coalesced WAL fsync, and tunable `m` / `efConstruction` / `efSearch`
71
+ - **Bulk ingestion** -- `bulkIngest()` with Rayon-parallel HNSW build, coalesced WAL fsync, and tunable `m` / `efConstruction` / `efSearch` / tombstone rebuild threshold
72
72
  - **Listing & filtered counts** -- `list()` and `count({ namespace, filter })` without a vector query
73
73
  - **Delete by filter** -- `deleteByFilter()` for bulk deletion by metadata filter
74
74
  - **Partial metadata updates** -- `updateMetadata()` merges a patch without re-writing the vector or rebuilding indexes
@@ -429,10 +429,15 @@ before re-throwing.
429
429
  | `db.insert(id, vector, metadata, options)` | Insert a record (throws on duplicate id) |
430
430
  | `db.upsertMany(records, { namespace })` | Upsert a batch of records |
431
431
  | `db.insertMany(records, { namespace })` | Insert a batch |
432
- | `db.bulkIngest(records, { namespace, batchSize, m, efConstruction, efSearch, parallelInsertThreshold })` | Fastest bulk import with coalesced WAL fsync and Rayon-parallel HNSW build |
433
- | `db.setIndexConfig({ m, efConstruction, efSearch, parallelInsertThreshold })` | Update HNSW parameters; rebuilds the ANN graph if `m`/`efConstruction` changed |
432
+ | `db.bulkIngest(records, { namespace, batchSize, m, efConstruction, efSearch, parallelInsertThreshold, tombstoneRebuildPct })` | Fastest bulk import with coalesced WAL fsync and Rayon-parallel HNSW build |
433
+ | `db.setIndexConfig({ m, efConstruction, efSearch, parallelInsertThreshold, tombstoneRebuildPct })` | Update HNSW parameters; rebuilds the ANN graph if `m`/`efConstruction` changed |
434
434
  | `db.setEfSearch(efSearch)` | Adjust query-time HNSW search width without rebuilding |
435
435
  | `db.indexConfig()` | Return the current HNSW configuration |
436
+ | `db.setWalSyncMode(mode, n)` | Configure WAL fsync cadence: `'per_op'`, `'every_n'`, or `'on_flush'` |
437
+ | `db.walSyncMode()` | Return the current WAL sync mode |
438
+ | `db.tombstoneStats()` | Return live and tombstoned HNSW node counts |
439
+ | `db.prepareForScan()` | Materialise the contiguous vector arena |
440
+ | `db.vectorArenaLen()` | Return the vector arena size or `null` |
436
441
  | `db.delete(id, { namespace })` | Delete a single record |
437
442
  | `db.deleteMany(ids, { namespace })` | Delete multiple records by id |
438
443
  | `db.deleteByFilter(filter, { namespace })` | Delete all records matching a filter |
package/index.d.ts CHANGED
@@ -118,6 +118,11 @@ export interface BulkIngestOptions {
118
118
  efSearch?: number | null
119
119
  /** Minimum dataset size to engage Rayon-parallel HNSW insertion (default 256). */
120
120
  parallelInsertThreshold?: number | null
121
+ /**
122
+ * Percentage (0..=100) of dead nodes at which `compact()` triggers an
123
+ * HNSW rebuild. Default 30. Set to 100 to disable automatic rebuild.
124
+ */
125
+ tombstoneRebuildPct?: number | null
121
126
  }
122
127
 
123
128
  export interface IndexConfig {
@@ -125,6 +130,7 @@ export interface IndexConfig {
125
130
  ef_construction: number
126
131
  ef_search: number | null
127
132
  parallel_insert_threshold: number
133
+ tombstone_rebuild_pct: number
128
134
  }
129
135
 
130
136
  export interface SetIndexConfigOptions {
@@ -132,6 +138,22 @@ export interface SetIndexConfigOptions {
132
138
  efConstruction?: number | null
133
139
  efSearch?: number | null
134
140
  parallelInsertThreshold?: number | null
141
+ tombstoneRebuildPct?: number | null
142
+ }
143
+
144
+ /** WAL fsync mode. See `Database.setWalSyncMode`. */
145
+ export type WalSyncMode = 'per_op' | 'every_n' | 'on_flush'
146
+
147
+ export type WalSyncModeInfo =
148
+ | { mode: 'per_op' }
149
+ | { mode: 'every_n'; n: number }
150
+ | { mode: 'on_flush' }
151
+
152
+ export interface TombstoneStats {
153
+ /** Live (non-tombstoned) records across all HNSW graphs. */
154
+ live: number
155
+ /** Dead (tombstoned) records still in the graphs, awaiting compact(). */
156
+ dead: number
135
157
  }
136
158
 
137
159
  export interface SearchOptions {
@@ -240,6 +262,31 @@ export class Database {
240
262
  setEfSearch(efSearch: number | null): void
241
263
  /** Update HNSW parameters; rebuilds the ANN graph if `m`/`efConstruction` changed. */
242
264
  setIndexConfig(config: SetIndexConfigOptions): void
265
+ /**
266
+ * Configure WAL durability.
267
+ *
268
+ * - `"per_op"` (default): fsync after every insert. Strongest durability.
269
+ * - `"every_n"` : fsync once every `n` inserts (pass `n` as 2nd arg).
270
+ * - `"on_flush"` : only fsync at `flush()` / `compact()` / `close()`.
271
+ *
272
+ * On macOS APFS, `"on_flush"` typically multiplies ingestion throughput
273
+ * by 5–10× at the cost of losing un-flushed writes on a crash.
274
+ */
275
+ setWalSyncMode(mode: WalSyncMode, n?: number | null): void
276
+ /** Return the current WAL sync mode. */
277
+ walSyncMode(): WalSyncModeInfo
278
+ /** Total live and tombstoned record counts across every HNSW graph. */
279
+ tombstoneStats(): TombstoneStats
280
+ /**
281
+ * Materialise the contiguous-vector arena up front for cache- and
282
+ * SIMD-friendly scans. Normally built lazily on first use.
283
+ */
284
+ prepareForScan(): void
285
+ /**
286
+ * Number of vectors in the contiguous arena, or `null` if it hasn't
287
+ * been materialised yet in this session.
288
+ */
289
+ vectorArenaLen(): number | null
243
290
  get(id: string, options?: { namespace?: string | null }): Record | null
244
291
  delete(id: string, options?: { namespace?: string | null }): boolean
245
292
  deleteMany(ids: string[], options?: { namespace?: string | null }): number
package/index.js CHANGED
@@ -423,6 +423,7 @@ class Database {
423
423
  options.efConstruction ?? null,
424
424
  options.efSearch ?? null,
425
425
  options.parallelInsertThreshold ?? null,
426
+ options.tombstoneRebuildPct ?? null,
426
427
  ),
427
428
  )
428
429
  }
@@ -442,10 +443,61 @@ class Database {
442
443
  config.efConstruction ?? null,
443
444
  config.efSearch ?? null,
444
445
  config.parallelInsertThreshold ?? null,
446
+ config.tombstoneRebuildPct ?? null,
445
447
  ),
446
448
  )
447
449
  }
448
450
 
451
+ /**
452
+ * Configure WAL durability. Valid modes are:
453
+ * - "per_op" : fsync after every insert (default, strongest durability)
454
+ * - "every_n" : fsync once every `n` inserts — pass `n` as second arg
455
+ * - "on_flush": fsync only at flush() / compact() / close()
456
+ * On macOS APFS, "on_flush" typically multiplies ingestion throughput by
457
+ * 5–10× at the cost of losing un-flushed writes on a crash.
458
+ */
459
+ setWalSyncMode(mode, n = null) {
460
+ return wrapError(() => this._native.setWalSyncMode(mode, n))
461
+ }
462
+
463
+ /**
464
+ * Return the current WAL sync mode. Shape:
465
+ * { mode: "per_op" } | { mode: "every_n", n: number } | { mode: "on_flush" }
466
+ */
467
+ walSyncMode() {
468
+ return wrapError(() => decode(this._native.walSyncMode()))
469
+ }
470
+
471
+ /**
472
+ * Return `{ live, dead }` summed across every HNSW graph (global +
473
+ * namespaced). Use to monitor when a compact() will rebuild the graph
474
+ * for recall.
475
+ */
476
+ tombstoneStats() {
477
+ return wrapError(() => {
478
+ const [live, dead] = this._native.tombstoneStats()
479
+ return { live, dead }
480
+ })
481
+ }
482
+
483
+ /**
484
+ * Materialise the contiguous-vector arena up front. The arena mirrors
485
+ * every record's default dense vector into a single flat Float32 buffer
486
+ * for cache- and SIMD-friendly brute-force / rescoring scans. Built
487
+ * lazily on first use otherwise.
488
+ */
489
+ prepareForScan() {
490
+ return wrapError(() => this._native.prepareForScan())
491
+ }
492
+
493
+ /**
494
+ * Number of vectors in the contiguous arena, or `null` if it has not
495
+ * been materialised yet in this session.
496
+ */
497
+ vectorArenaLen() {
498
+ return wrapError(() => this._native.vectorArenaLen())
499
+ }
500
+
449
501
  get(id, options = {}) {
450
502
  return wrapError(() => decode(this._native.get(id, options.namespace ?? null)))
451
503
  }
@@ -620,6 +672,7 @@ class Database {
620
672
  options.efConstruction ?? null,
621
673
  options.efSearch ?? null,
622
674
  options.parallelInsertThreshold ?? null,
675
+ options.tombstoneRebuildPct ?? null,
623
676
  ),
624
677
  )
625
678
  }
package/native/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "vectlite-node"
3
- version = "0.10.0"
3
+ version = "0.11.0"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "Node.js bindings for vectlite."
package/native/src/lib.rs CHANGED
@@ -15,7 +15,7 @@ use vectlite::{
15
15
  Database as CoreDatabase, DistanceMetric, FusionStrategy, HybridSearchOptions, IndexConfig,
16
16
  Metadata, MetadataFilter, MetadataValue, MultiVectorSearchOptions, MultiVectors, NamedVectors,
17
17
  PayloadIndexType, Record, SearchOutcome, SearchResult, SparseVector, Store as CoreStore,
18
- WriteOperation,
18
+ WalSyncMode, WriteOperation,
19
19
  };
20
20
 
21
21
  #[napi(js_name = "NativeDatabase")]
@@ -421,10 +421,17 @@ impl NativeDatabase {
421
421
  ef_construction: Option<u32>,
422
422
  ef_search: Option<u32>,
423
423
  parallel_insert_threshold: Option<u32>,
424
+ tombstone_rebuild_pct: Option<u32>,
424
425
  ) -> Result<u32> {
425
426
  let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
426
427
  let mut database = self.write_open()?;
427
- let tuning = merge_index_config(m, ef_construction, ef_search, parallel_insert_threshold);
428
+ let tuning = merge_index_config(
429
+ m,
430
+ ef_construction,
431
+ ef_search,
432
+ parallel_insert_threshold,
433
+ tombstone_rebuild_pct,
434
+ );
428
435
  let count = if let Some(cfg) = tuning {
429
436
  let merged = apply_index_overrides(database.index_config(), cfg);
430
437
  database.bulk_ingest_with_config(records, batch_size as usize, Some(merged))
@@ -442,6 +449,7 @@ impl NativeDatabase {
442
449
  "ef_construction": cfg.ef_construction as u32,
443
450
  "ef_search": cfg.ef_search.map(|v| v as u32),
444
451
  "parallel_insert_threshold": cfg.parallel_insert_threshold as u32,
452
+ "tombstone_rebuild_pct": cfg.tombstone_rebuild_pct as u32,
445
453
  });
446
454
  stringify_value(value)
447
455
  }
@@ -459,15 +467,92 @@ impl NativeDatabase {
459
467
  ef_construction: Option<u32>,
460
468
  ef_search: Option<u32>,
461
469
  parallel_insert_threshold: Option<u32>,
470
+ tombstone_rebuild_pct: Option<u32>,
462
471
  ) -> Result<()> {
463
472
  let mut database = self.write_open()?;
464
- let overrides =
465
- merge_index_config(m, ef_construction, ef_search, parallel_insert_threshold)
466
- .ok_or_else(|| err("setIndexConfig requires at least one field"))?;
473
+ let overrides = merge_index_config(
474
+ m,
475
+ ef_construction,
476
+ ef_search,
477
+ parallel_insert_threshold,
478
+ tombstone_rebuild_pct,
479
+ )
480
+ .ok_or_else(|| err("setIndexConfig requires at least one field"))?;
467
481
  let merged = apply_index_overrides(database.index_config(), overrides);
468
482
  database.set_index_config(merged).map_err(to_napi_error)
469
483
  }
470
484
 
485
+ /// Configure WAL durability. `mode` is one of: `"per_op"` (the default,
486
+ /// fsync after every insert), `"every_n"` (fsync once every `n` inserts
487
+ /// — provide `n`), `"on_flush"` (only fsync at flush / compact / close).
488
+ ///
489
+ /// Relaxing this knob is the single biggest ingestion throughput lever
490
+ /// on macOS APFS: `on_flush` typically multiplies throughput by 5–10×
491
+ /// at the cost of losing un-flushed writes on a crash.
492
+ #[napi(js_name = "setWalSyncMode")]
493
+ pub fn set_wal_sync_mode(&self, mode: String, n: Option<u32>) -> Result<()> {
494
+ let parsed = match mode.to_ascii_lowercase().as_str() {
495
+ "per_op" | "perop" => WalSyncMode::PerOp,
496
+ "every_n" | "everyn" => {
497
+ let n = n.ok_or_else(|| {
498
+ err("setWalSyncMode(\"every_n\", ...) requires the second `n` argument")
499
+ })?;
500
+ WalSyncMode::EveryN(n as usize)
501
+ }
502
+ "on_flush" | "onflush" => WalSyncMode::OnFlush,
503
+ other => {
504
+ return Err(err(format!(
505
+ "unknown WAL sync mode '{other}' (expected 'per_op', 'every_n', or 'on_flush')"
506
+ )));
507
+ }
508
+ };
509
+ let mut database = self.write_open()?;
510
+ database.set_wal_sync_mode(parsed).map_err(to_napi_error)
511
+ }
512
+
513
+ /// Return the current WAL sync mode as a JSON string: either
514
+ /// `{"mode":"per_op"}`, `{"mode":"every_n","n":64}`, or
515
+ /// `{"mode":"on_flush"}`.
516
+ #[napi(js_name = "walSyncMode")]
517
+ pub fn wal_sync_mode(&self) -> Result<String> {
518
+ let database = self.read()?;
519
+ let value = match database.wal_sync_mode() {
520
+ WalSyncMode::PerOp => json!({"mode": "per_op"}),
521
+ WalSyncMode::EveryN(n) => json!({"mode": "every_n", "n": n as u32}),
522
+ WalSyncMode::OnFlush => json!({"mode": "on_flush"}),
523
+ };
524
+ stringify_value(value)
525
+ }
526
+
527
+ /// Return `[live, dead]` summed across every HNSW graph (global +
528
+ /// namespaced). Useful for monitoring when to `compact()`.
529
+ #[napi(js_name = "tombstoneStats")]
530
+ pub fn tombstone_stats(&self) -> Result<Vec<u32>> {
531
+ let database = self.read()?;
532
+ let (live, dead) = database.tombstone_stats();
533
+ Ok(vec![live as u32, dead as u32])
534
+ }
535
+
536
+ /// Materialise the contiguous-vector arena. Mirrors every record's
537
+ /// default dense vector into a single flat `Float32Array`-shaped
538
+ /// buffer for cache- and SIMD-friendly brute-force / rescoring scans.
539
+ /// Normally built lazily; call this before a heavy scan to pay the
540
+ /// build cost up front. Cheap when already fresh.
541
+ #[napi(js_name = "prepareForScan")]
542
+ pub fn prepare_for_scan(&self) -> Result<()> {
543
+ let mut database = self.write_open()?;
544
+ database.prepare_for_scan();
545
+ Ok(())
546
+ }
547
+
548
+ /// Number of vectors in the contiguous arena, or `null` if it has
549
+ /// not been materialised yet in this session.
550
+ #[napi(js_name = "vectorArenaLen")]
551
+ pub fn vector_arena_len(&self) -> Result<Option<u32>> {
552
+ let database = self.read()?;
553
+ Ok(database.vector_arena_len().map(|n| n as u32))
554
+ }
555
+
471
556
  #[napi]
472
557
  pub fn get(&self, id: String, namespace: Option<String>) -> Result<Option<String>> {
473
558
  let record = {
@@ -1009,9 +1094,16 @@ impl NativeDatabase {
1009
1094
  ef_construction: Option<u32>,
1010
1095
  ef_search: Option<u32>,
1011
1096
  parallel_insert_threshold: Option<u32>,
1097
+ tombstone_rebuild_pct: Option<u32>,
1012
1098
  ) -> Result<AsyncTask<BulkIngestTask>> {
1013
1099
  let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
1014
- let tuning = merge_index_config(m, ef_construction, ef_search, parallel_insert_threshold);
1100
+ let tuning = merge_index_config(
1101
+ m,
1102
+ ef_construction,
1103
+ ef_search,
1104
+ parallel_insert_threshold,
1105
+ tombstone_rebuild_pct,
1106
+ );
1015
1107
  Ok(AsyncTask::new(BulkIngestTask {
1016
1108
  db: self.inner.clone(),
1017
1109
  records,
@@ -2020,9 +2112,10 @@ struct IndexConfigPatch {
2020
2112
  ef_construction: Option<usize>,
2021
2113
  ef_search: Option<usize>,
2022
2114
  parallel_insert_threshold: Option<usize>,
2115
+ tombstone_rebuild_pct: Option<u8>,
2023
2116
  }
2024
2117
 
2025
- /// Pack the four optional HNSW tuning fields into a patch. Returns `None`
2118
+ /// Pack the five optional HNSW tuning fields into a patch. Returns `None`
2026
2119
  /// when every field is `None`; explicit zeroes are preserved so core
2027
2120
  /// validation can reject invalid build/search widths instead of treating
2028
2121
  /// them as "not provided".
@@ -2031,11 +2124,13 @@ fn merge_index_config(
2031
2124
  ef_construction: Option<u32>,
2032
2125
  ef_search: Option<u32>,
2033
2126
  parallel_insert_threshold: Option<u32>,
2127
+ tombstone_rebuild_pct: Option<u32>,
2034
2128
  ) -> Option<IndexConfigPatch> {
2035
2129
  if m.is_none()
2036
2130
  && ef_construction.is_none()
2037
2131
  && ef_search.is_none()
2038
2132
  && parallel_insert_threshold.is_none()
2133
+ && tombstone_rebuild_pct.is_none()
2039
2134
  {
2040
2135
  return None;
2041
2136
  }
@@ -2044,6 +2139,7 @@ fn merge_index_config(
2044
2139
  ef_construction: ef_construction.map(|v| v as usize),
2045
2140
  ef_search: ef_search.map(|v| v as usize),
2046
2141
  parallel_insert_threshold: parallel_insert_threshold.map(|v| v as usize),
2142
+ tombstone_rebuild_pct: tombstone_rebuild_pct.map(|v| if v > 100 { 101 } else { v as u8 }),
2047
2143
  })
2048
2144
  }
2049
2145
 
@@ -2058,6 +2154,9 @@ fn apply_index_overrides(current: IndexConfig, patch: IndexConfigPatch) -> Index
2058
2154
  parallel_insert_threshold: patch
2059
2155
  .parallel_insert_threshold
2060
2156
  .unwrap_or(current.parallel_insert_threshold),
2157
+ tombstone_rebuild_pct: patch
2158
+ .tombstone_rebuild_pct
2159
+ .unwrap_or(current.tombstone_rebuild_pct),
2061
2160
  }
2062
2161
  }
2063
2162
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "vectlite-core"
3
- version = "0.10.0"
3
+ version = "0.11.0"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "Core storage engine for vectlite."