vectlite 0.1.8 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -31,8 +31,11 @@ db.upsert('doc2', embedding2, { source: 'notes', title: 'Billing' })
31
31
  // Search with filters
32
32
  const results = db.search(embeddingQuery, { k: 5, filter: { source: 'blog' } })
33
33
 
34
+ // Query-free inspection
35
+ console.log(db.count({ filter: { source: 'blog' } }))
36
+
34
37
  // Clean up
35
- db.compact()
38
+ db.close()
36
39
  ```
37
40
 
38
41
  ## Features
@@ -62,9 +65,13 @@ db.compact()
62
65
 
63
66
  - **Physical collections** -- `vectlite.openStore()` manages a directory of independent databases
64
67
  - **Bulk ingestion** -- `bulkIngest()` with deferred index rebuilds for fast imports
68
+ - **Listing & filtered counts** -- `list()` and `count({ namespace, filter })` without a vector query
69
+ - **Delete by filter** -- `deleteByFilter()` for bulk deletion by metadata filter
65
70
  - **Snapshots** -- `db.snapshot(path)` creates a self-contained copy
66
71
  - **Backup / Restore** -- `db.backup(dir)` and `vectlite.restore(dir, path)` for full roundtrips
67
72
  - **Read-only mode** -- `vectlite.open(path, { readOnly: true })` for safe concurrent readers
73
+ - **Explicit close** -- `db.close()` to release locks deterministically
74
+ - **Lock timeouts** -- `lockTimeout` for bounded lock acquisition waits
68
75
 
69
76
  ## Usage
70
77
 
@@ -145,11 +152,23 @@ const restored = vectlite.restore('/backups/full/', 'restored.vdb')
145
152
  ### Read-Only Mode
146
153
 
147
154
  ```js
148
- const ro = vectlite.open('knowledge.vdb', { readOnly: true })
155
+ const ro = vectlite.open('knowledge.vdb', { readOnly: true, lockTimeout: 5 })
149
156
  const results = ro.search(query, { k: 5 }) // Reads work
150
157
  ro.upsert(...) // Throws VectLiteError
151
158
  ```
152
159
 
160
+ ### Listing, Counting, and Lifecycle
161
+
162
+ ```js
163
+ const db = vectlite.open('knowledge.vdb', { dimension: 384, lockTimeout: 5 })
164
+
165
+ const records = db.list({ namespace: 'docs', filter: { stale: false }, limit: 20 })
166
+ const count = db.count({ namespace: 'docs', filter: { source: 'blog' } })
167
+ const deleted = db.deleteByFilter({ stale: true }, { namespace: 'docs' })
168
+
169
+ db.close()
170
+ ```
171
+
153
172
  ### Search Diagnostics
154
173
 
155
174
  ```js
@@ -164,6 +183,46 @@ console.log(outcome.stats.used_ann) // true
164
183
  console.log(outcome.results[0].explain) // Detailed scoring breakdown
165
184
  ```
166
185
 
186
+ ## Database Methods Reference
187
+
188
+ ### Write Methods
189
+
190
+ | Method | Description |
191
+ |---|---|
192
+ | `db.upsert(id, vector, metadata, options)` | Insert or update a single record |
193
+ | `db.insert(id, vector, metadata, options)` | Insert a record (throws on duplicate id) |
194
+ | `db.upsertMany(records, { namespace })` | Upsert a batch of records |
195
+ | `db.insertMany(records, { namespace })` | Insert a batch |
196
+ | `db.bulkIngest(records, { namespace, batchSize })` | Fastest bulk import with batched WAL writes |
197
+ | `db.delete(id, { namespace })` | Delete a single record |
198
+ | `db.deleteMany(ids, { namespace })` | Delete multiple records by id |
199
+ | `db.deleteByFilter(filter, { namespace })` | Delete all records matching a filter |
200
+
201
+ ### Read Methods
202
+
203
+ | Method | Description |
204
+ |---|---|
205
+ | `db.get(id, { namespace })` | Get a single record by id |
206
+ | `db.search(query, options)` | Search and return a list of results |
207
+ | `db.searchWithStats(query, options)` | Search with detailed performance stats |
208
+ | `db.count({ namespace, filter })` | Count records, optionally scoped by namespace/filter |
209
+ | `db.list({ namespace, filter, limit, offset })` | List records without issuing a vector query |
210
+ | `db.namespaces()` | List all namespaces |
211
+ | `db.dimension` | Vector dimension (property) |
212
+ | `db.path` | Database file path (property) |
213
+ | `db.readOnly` | Whether the database is read-only (property) |
214
+
215
+ ### Maintenance Methods
216
+
217
+ | Method | Description |
218
+ |---|---|
219
+ | `db.compact()` | Fold WAL into snapshot and persist ANN indexes |
220
+ | `db.flush()` | Alias for `compact()` |
221
+ | `db.snapshot(dest)` | Create a self-contained `.vdb` copy |
222
+ | `db.backup(destDir)` | Full backup including ANN sidecar files |
223
+ | `db.transaction()` | Begin an atomic transaction |
224
+ | `db.close()` | Flush pending state, release the file lock, and invalidate the handle |
225
+
167
226
  ## Filter Operators
168
227
 
169
228
  | Operator | Example | Description |
package/index.d.ts CHANGED
@@ -82,6 +82,16 @@ export interface WriteOptions {
82
82
  vectors?: NamedVectors | null
83
83
  }
84
84
 
85
+ export interface CountOptions {
86
+ namespace?: string | null
87
+ filter?: Filter | null
88
+ }
89
+
90
+ export interface ListOptions extends CountOptions {
91
+ limit?: number | null
92
+ offset?: number | null
93
+ }
94
+
85
95
  export interface BulkIngestOptions {
86
96
  namespace?: string | null
87
97
  batchSize?: number
@@ -108,6 +118,7 @@ export interface SearchOptions {
108
118
  export interface OpenOptions {
109
119
  dimension?: number | null
110
120
  readOnly?: boolean
121
+ lockTimeout?: number | null
111
122
  }
112
123
 
113
124
  export class VectLiteError extends Error {}
@@ -130,8 +141,10 @@ export class Database {
130
141
  readonly dimension: number
131
142
  readonly readOnly: boolean
132
143
 
133
- count(): number
144
+ count(options?: CountOptions): number
134
145
  namespaces(): string[]
146
+ close(): void
147
+ list(options?: ListOptions): Record[]
135
148
  transaction(): Transaction
136
149
  insert(id: string, vector: number[], metadata?: Metadata | null, options?: WriteOptions): void
137
150
  upsert(id: string, vector: number[], metadata?: Metadata | null, options?: WriteOptions): void
@@ -141,6 +154,7 @@ export class Database {
141
154
  get(id: string, options?: { namespace?: string | null }): Record | null
142
155
  delete(id: string, options?: { namespace?: string | null }): boolean
143
156
  deleteMany(ids: string[], options?: { namespace?: string | null }): number
157
+ deleteByFilter(filter: Filter, options?: { namespace?: string | null }): number
144
158
  flush(): void
145
159
  compact(): void
146
160
  snapshot(dest: string): void
package/index.js CHANGED
@@ -209,14 +209,31 @@ class Database {
209
209
  return wrapError(() => this._native.readOnly)
210
210
  }
211
211
 
212
- count() {
213
- return wrapError(() => this._native.count())
212
+ count(options = {}) {
213
+ return wrapError(() => this._native.count(options.namespace ?? null, encode(options.filter)))
214
214
  }
215
215
 
216
216
  namespaces() {
217
217
  return wrapError(() => this._native.namespaces())
218
218
  }
219
219
 
220
+ close() {
221
+ return wrapError(() => this._native.close())
222
+ }
223
+
224
+ list(options = {}) {
225
+ return wrapError(() =>
226
+ decode(
227
+ this._native.list(
228
+ options.namespace ?? null,
229
+ encode(options.filter),
230
+ options.limit ?? null,
231
+ options.offset ?? null,
232
+ ),
233
+ ),
234
+ )
235
+ }
236
+
220
237
  transaction() {
221
238
  return wrapError(() => new Transaction(this._native.transaction()))
222
239
  }
@@ -261,6 +278,10 @@ class Database {
261
278
  return wrapError(() => this._native.deleteMany(ids, options.namespace ?? null))
262
279
  }
263
280
 
281
+ deleteByFilter(filter, options = {}) {
282
+ return wrapError(() => this._native.deleteByFilter(encode(filter), options.namespace ?? null))
283
+ }
284
+
264
285
  flush() {
265
286
  return wrapError(() => this._native.flush())
266
287
  }
@@ -323,7 +344,9 @@ class Store {
323
344
  }
324
345
 
325
346
  function open(path, options = {}) {
326
- return wrapError(() => new Database(native.open(path, options.dimension ?? null, options.readOnly ?? false)))
347
+ return wrapError(() =>
348
+ new Database(native.open(path, options.dimension ?? null, options.readOnly ?? false, options.lockTimeout ?? null)),
349
+ )
327
350
  }
328
351
 
329
352
  function openStore(root) {
package/native/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "vectlite-node"
3
- version = "0.1.8"
3
+ version = "0.1.11"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "Node.js bindings for vectlite."
package/native/src/lib.rs CHANGED
@@ -133,9 +133,21 @@ impl NativeDatabase {
133
133
  }
134
134
 
135
135
  #[napi]
136
- pub fn count(&self) -> Result<u32> {
136
+ pub fn count(&self, namespace: Option<String>, filter_json: Option<String>) -> Result<u32> {
137
+ let filter = filter_json
138
+ .as_ref()
139
+ .map(|json_str| {
140
+ let value: serde_json::Value = serde_json::from_str(json_str)
141
+ .map_err(|e| err(format!("invalid filter JSON: {e}")))?;
142
+ json_to_filter(&value)
143
+ })
144
+ .transpose()?;
145
+ if namespace.is_none() && filter.is_none() {
146
+ let database = self.read()?;
147
+ return Ok(database.len() as u32);
148
+ }
137
149
  let database = self.read()?;
138
- Ok(database.len() as u32)
150
+ Ok(database.count_filtered(namespace.as_deref(), filter.as_ref()) as u32)
139
151
  }
140
152
 
141
153
  #[napi]
@@ -145,11 +157,63 @@ impl NativeDatabase {
145
157
  }
146
158
 
147
159
  #[napi]
148
- pub fn transaction(&self) -> NativeTransaction {
149
- NativeTransaction {
160
+ pub fn close(&self) -> Result<()> {
161
+ let mut database = self.write()?;
162
+ database.close().map_err(to_napi_error)
163
+ }
164
+
165
+ #[napi]
166
+ pub fn list(
167
+ &self,
168
+ namespace: Option<String>,
169
+ filter_json: Option<String>,
170
+ limit: Option<u32>,
171
+ offset: Option<u32>,
172
+ ) -> Result<String> {
173
+ let filter = filter_json
174
+ .as_ref()
175
+ .map(|json_str| {
176
+ let value: serde_json::Value = serde_json::from_str(json_str)
177
+ .map_err(|e| err(format!("invalid filter JSON: {e}")))?;
178
+ json_to_filter(&value)
179
+ })
180
+ .transpose()?;
181
+ let records = {
182
+ let database = self.read()?;
183
+ database
184
+ .list(
185
+ namespace.as_deref(),
186
+ filter.as_ref(),
187
+ limit.unwrap_or(0) as usize,
188
+ offset.unwrap_or(0) as usize,
189
+ )
190
+ .into_iter()
191
+ .cloned()
192
+ .collect::<Vec<_>>()
193
+ };
194
+ let json_records: Vec<Value> = records.iter().map(record_to_json).collect();
195
+ stringify_value(Value::Array(json_records))
196
+ }
197
+
198
+ #[napi(js_name = "deleteByFilter")]
199
+ pub fn delete_by_filter(&self, filter_json: String, namespace: Option<String>) -> Result<u32> {
200
+ let value: serde_json::Value = serde_json::from_str(&filter_json)
201
+ .map_err(|e| err(format!("invalid filter JSON: {e}")))?;
202
+ let filter = json_to_filter(&value)?;
203
+ let mut database = self.write_open()?;
204
+ database
205
+ .delete_by_filter(namespace.as_deref(), &filter)
206
+ .map(|count| count as u32)
207
+ .map_err(to_napi_error)
208
+ }
209
+
210
+ #[napi]
211
+ pub fn transaction(&self) -> Result<NativeTransaction> {
212
+ drop(self.read()?);
213
+ Ok(NativeTransaction {
150
214
  inner: Arc::clone(&self.inner),
151
215
  staged: Mutex::new(TransactionState::default()),
152
- }
216
+ })
153
217
  }
154
218
 
155
219
  #[napi]
@@ -166,7 +230,7 @@ impl NativeDatabase {
166
230
  let sparse = parse_sparse_json(sparse_json)?;
167
231
  let vectors = parse_named_vectors_json(vectors_json)?;
168
232
  let vector = js_vector_to_core(vector, "vector")?;
169
- let mut database = self.write()?;
233
+ let mut database = self.write_open()?;
170
234
  database
171
235
  .insert_with_vectors_in_namespace(
172
236
  namespace.unwrap_or_default(),
@@ -193,7 +257,7 @@ impl NativeDatabase {
193
257
  let sparse = parse_sparse_json(sparse_json)?;
194
258
  let vectors = parse_named_vectors_json(vectors_json)?;
195
259
  let vector = js_vector_to_core(vector, "vector")?;
196
- let mut database = self.write()?;
260
+ let mut database = self.write_open()?;
197
261
  database
198
262
  .upsert_with_vectors_in_namespace(
199
263
  namespace.unwrap_or_default(),
@@ -209,7 +273,7 @@ impl NativeDatabase {
209
273
  #[napi(js_name = "insertMany")]
210
274
  pub fn insert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
211
275
  let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
212
- let mut database = self.write()?;
276
+ let mut database = self.write_open()?;
213
277
  database
214
278
  .insert_many(records)
215
279
  .map(|count| count as u32)
@@ -219,7 +283,7 @@ impl NativeDatabase {
219
283
  #[napi(js_name = "upsertMany")]
220
284
  pub fn upsert_many(&self, records_json: String, namespace: Option<String>) -> Result<u32> {
221
285
  let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
222
- let mut database = self.write()?;
286
+ let mut database = self.write_open()?;
223
287
  database
224
288
  .upsert_many(records)
225
289
  .map(|count| count as u32)
@@ -234,7 +298,7 @@ impl NativeDatabase {
234
298
  batch_size: u32,
235
299
  ) -> Result<u32> {
236
300
  let records = parse_record_batch_json(&records_json, namespace.as_deref())?;
237
- let mut database = self.write()?;
301
+ let mut database = self.write_open()?;
238
302
  database
239
303
  .bulk_ingest(records, batch_size as usize)
240
304
  .map(|count| count as u32)
@@ -257,7 +321,7 @@ impl NativeDatabase {
257
321
 
258
322
  #[napi]
259
323
  pub fn delete(&self, id: String, namespace: Option<String>) -> Result<bool> {
260
- let mut database = self.write()?;
324
+ let mut database = self.write_open()?;
261
325
  database
262
326
  .delete_in_namespace(&namespace.unwrap_or_default(), &id)
263
327
  .map_err(to_napi_error)
@@ -265,7 +329,7 @@ impl NativeDatabase {
265
329
 
266
330
  #[napi(js_name = "deleteMany")]
267
331
  pub fn delete_many(&self, ids: Vec<String>, namespace: Option<String>) -> Result<u32> {
268
- let mut database = self.write()?;
332
+ let mut database = self.write_open()?;
269
333
  database
270
334
  .delete_many_in_namespace(&namespace.unwrap_or_default(), ids)
271
335
  .map(|count| count as u32)
@@ -274,13 +338,13 @@ impl NativeDatabase {
274
338
 
275
339
  #[napi]
276
340
  pub fn flush(&self) -> Result<()> {
277
- let mut database = self.write()?;
341
+ let mut database = self.write_open()?;
278
342
  database.flush().map_err(to_napi_error)
279
343
  }
280
344
 
281
345
  #[napi]
282
346
  pub fn compact(&self) -> Result<()> {
283
- let mut database = self.write()?;
347
+ let mut database = self.write_open()?;
284
348
  database.compact().map_err(to_napi_error)
285
349
  }
286
350
 
@@ -478,9 +542,14 @@ impl NativeTransaction {
478
542
 
479
543
  impl NativeDatabase {
480
544
  fn read(&self) -> Result<RwLockReadGuard<'_, CoreDatabase>> {
481
- self.inner
545
+ let database = self
546
+ .inner
482
547
  .read()
483
- .map_err(|_| err("database read lock poisoned"))
548
+ .map_err(|_| err("database read lock poisoned"))?;
549
+ if database.is_closed() {
550
+ return Err(to_napi_error(closed_database_error()));
551
+ }
552
+ Ok(database)
484
553
  }
485
554
 
486
555
  fn write(&self) -> Result<RwLockWriteGuard<'_, CoreDatabase>> {
@@ -489,6 +558,14 @@ impl NativeDatabase {
489
558
  .map_err(|_| err("database write lock poisoned"))
490
559
  }
491
560
 
561
+ fn write_open(&self) -> Result<RwLockWriteGuard<'_, CoreDatabase>> {
562
+ let database = self.write()?;
563
+ if database.is_closed() {
564
+ return Err(to_napi_error(closed_database_error()));
565
+ }
566
+ Ok(database)
567
+ }
568
+
492
569
  fn execute_search(
493
570
  &self,
494
571
  query: Option<Vec<f32>>,
@@ -522,18 +599,40 @@ impl NativeDatabase {
522
599
  }
523
600
 
524
601
  #[napi]
525
- pub fn open(path: String, dimension: Option<u32>, read_only: bool) -> Result<NativeDatabase> {
602
+ pub fn open(
603
+ path: String,
604
+ dimension: Option<u32>,
605
+ read_only: bool,
606
+ lock_timeout: Option<f64>,
607
+ ) -> Result<NativeDatabase> {
526
608
  let database = if read_only {
527
609
  if !Path::new(&path).exists() {
528
610
  return Err(err("cannot open non-existent database in read-only mode"));
529
611
  }
530
- CoreDatabase::open_read_only(&path).map_err(to_napi_error)?
612
+ match lock_timeout {
613
+ Some(timeout) => CoreDatabase::open_read_only_with_timeout(&path, Some(timeout))
614
+ .map_err(to_napi_error)?,
615
+ None => CoreDatabase::open_read_only(&path).map_err(to_napi_error)?,
616
+ }
531
617
  } else if Path::new(&path).exists() {
532
- match dimension {
533
- Some(dimension) => {
618
+ match (dimension, lock_timeout) {
619
+ (Some(dimension), Some(timeout)) => {
620
+ let db = CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?;
621
+ if db.dimension() != dimension as usize {
622
+ return Err(to_napi_error(vectlite::VectLiteError::DimensionMismatch {
623
+ expected: db.dimension(),
624
+ found: dimension as usize,
625
+ }));
626
+ }
627
+ db
628
+ }
629
+ (Some(dimension), None) => {
534
630
  CoreDatabase::open_or_create(&path, dimension as usize).map_err(to_napi_error)?
535
631
  }
536
- None => CoreDatabase::open(&path).map_err(to_napi_error)?,
632
+ (None, Some(timeout)) => {
633
+ CoreDatabase::open_with_timeout(&path, timeout).map_err(to_napi_error)?
634
+ }
635
+ (None, None) => CoreDatabase::open(&path).map_err(to_napi_error)?,
537
636
  }
538
637
  } else {
539
638
  let Some(dimension) = dimension else {
@@ -1212,3 +1311,7 @@ fn err(message: impl Into<String>) -> NapiError {
1212
1311
  fn to_napi_error(error: vectlite::VectLiteError) -> NapiError {
1213
1312
  err(error.to_string())
1214
1313
  }
1314
+
1315
+ fn closed_database_error() -> vectlite::VectLiteError {
1316
+ vectlite::VectLiteError::InvalidFormat("database is closed".to_owned())
1317
+ }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "vectlite-core"
3
- version = "0.1.8"
3
+ version = "0.1.11"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "Core storage engine for vectlite."
@@ -81,7 +81,9 @@ impl fmt::Display for VectLiteError {
81
81
  Self::DimensionMismatch { expected, found } => {
82
82
  write!(
83
83
  f,
84
- "vector dimension mismatch: expected {expected}, found {found}"
84
+ "vector dimension mismatch: expected {expected}, found {found}. \
85
+ If you changed embedding models, delete the existing .vdb file \
86
+ or use a different path to create a new database with dimension {found}"
85
87
  )
86
88
  }
87
89
  Self::DuplicateId { namespace, id } => {
@@ -808,12 +810,44 @@ impl Database {
808
810
  Ok(database)
809
811
  }
810
812
 
813
+ /// Open an existing database with a lock timeout in seconds.
814
+ /// If the lock cannot be acquired within the timeout, returns
815
+ /// `VectLiteError::LockContention`.
816
+ pub fn open_with_timeout(path: impl AsRef<Path>, timeout_secs: f64) -> Result<Self> {
817
+ let path = path.as_ref().to_path_buf();
818
+ let timeout = timeout_duration(timeout_secs, "lock_timeout")?;
819
+ let lock = acquire_exclusive_lock_with_timeout(&path, Some(timeout))?;
820
+ let mut file = File::open(&path)?;
821
+ let mut database = Self::read_from(&path, &mut file)?;
822
+ database._lock_file = Some(lock);
823
+ database.read_only = false;
824
+ database.replay_wal()?;
825
+ database.rebuild_sparse_index();
826
+ database.ann_loaded_from_disk = database.try_load_ann_from_disk();
827
+ if !database.ann_loaded_from_disk {
828
+ database.rebuild_ann();
829
+ }
830
+ Ok(database)
831
+ }
832
+
811
833
  /// Open an existing database in read-only mode. Acquires a shared lock
812
834
  /// so multiple readers can coexist. All write operations will return
813
835
  /// `VectLiteError::ReadOnly`.
814
836
  pub fn open_read_only(path: impl AsRef<Path>) -> Result<Self> {
837
+ Self::open_read_only_with_timeout(path, None)
838
+ }
839
+
840
+ /// Open an existing database in read-only mode, optionally waiting for a
841
+ /// shared lock to become available.
842
+ pub fn open_read_only_with_timeout(
843
+ path: impl AsRef<Path>,
844
+ timeout_secs: Option<f64>,
845
+ ) -> Result<Self> {
815
846
  let path = path.as_ref().to_path_buf();
816
- let lock = acquire_shared_lock(&path)?;
847
+ let timeout = timeout_secs
848
+ .map(|seconds| timeout_duration(seconds, "lock_timeout"))
849
+ .transpose()?;
850
+ let lock = acquire_shared_lock_with_timeout(&path, timeout)?;
817
851
  let mut file = File::open(&path)?;
818
852
  let mut database = Self::read_from(&path, &mut file)?;
819
853
  database._lock_file = Some(lock);
@@ -831,7 +865,43 @@ impl Database {
831
865
  self.read_only
832
866
  }
833
867
 
868
+ /// Returns true if the database has been closed.
869
+ pub fn is_closed(&self) -> bool {
870
+ self._lock_file.is_none() && self.records.is_empty() && self.dimension == 0
871
+ }
872
+
873
+ /// Close the database: flush WAL (if writable), release the file lock,
874
+ /// and clear all in-memory state. After calling this, any further
875
+ /// operation will return an error.
876
+ pub fn close(&mut self) -> Result<()> {
877
+ if self.is_closed() {
878
+ return Ok(());
879
+ }
880
+ // Flush WAL to main file if writable
881
+ if !self.read_only {
882
+ self.compact_inner()?;
883
+ }
884
+ // Release the lock by dropping the file handle
885
+ self._lock_file = None;
886
+ // Clear in-memory state
887
+ self.records.clear();
888
+ self.ann = AnnCatalog::default();
889
+ self.sparse_index = SparseIndex::default();
890
+ self.dimension = 0;
891
+ Ok(())
892
+ }
893
+
894
+ fn check_open(&self) -> Result<()> {
895
+ if self.is_closed() {
896
+ return Err(VectLiteError::InvalidFormat(
897
+ "database is closed".to_owned(),
898
+ ));
899
+ }
900
+ Ok(())
901
+ }
902
+
834
903
  fn check_writable(&self) -> Result<()> {
904
+ self.check_open()?;
835
905
  if self.read_only {
836
906
  return Err(VectLiteError::ReadOnly);
837
907
  }
@@ -869,6 +939,95 @@ impl Database {
869
939
  self.records.is_empty()
870
940
  }
871
941
 
942
+ /// Count records, optionally filtered by namespace and/or metadata filter.
943
+ pub fn count_filtered(
944
+ &self,
945
+ namespace: Option<&str>,
946
+ filter: Option<&MetadataFilter>,
947
+ ) -> usize {
948
+ self.records
949
+ .iter()
950
+ .filter(|((ns, _), record)| {
951
+ if let Some(target_ns) = namespace {
952
+ if ns != target_ns {
953
+ return false;
954
+ }
955
+ }
956
+ if let Some(filter) = filter {
957
+ if !filter.matches(&record.metadata) {
958
+ return false;
959
+ }
960
+ }
961
+ true
962
+ })
963
+ .count()
964
+ }
965
+
966
+ /// List records by namespace and/or metadata filter without requiring a
967
+ /// vector query. Returns records ordered by (namespace, id).
968
+ pub fn list(
969
+ &self,
970
+ namespace: Option<&str>,
971
+ filter: Option<&MetadataFilter>,
972
+ limit: usize,
973
+ offset: usize,
974
+ ) -> Vec<&Record> {
975
+ self.records
976
+ .iter()
977
+ .filter(|((ns, _), record)| {
978
+ if let Some(target_ns) = namespace {
979
+ if ns != target_ns {
980
+ return false;
981
+ }
982
+ }
983
+ if let Some(filter) = filter {
984
+ if !filter.matches(&record.metadata) {
985
+ return false;
986
+ }
987
+ }
988
+ true
989
+ })
990
+ .skip(offset)
991
+ .take(if limit == 0 { usize::MAX } else { limit })
992
+ .map(|(_, record)| record)
993
+ .collect()
994
+ }
995
+
996
+ /// Delete all records matching a filter, optionally within a namespace.
997
+ /// Returns the number of records deleted.
998
+ pub fn delete_by_filter(
999
+ &mut self,
1000
+ namespace: Option<&str>,
1001
+ filter: &MetadataFilter,
1002
+ ) -> Result<usize> {
1003
+ self.check_writable()?;
1004
+ let keys_to_delete: Vec<(String, String)> = self
1005
+ .records
1006
+ .iter()
1007
+ .filter(|((ns, _), record)| {
1008
+ if let Some(target_ns) = namespace {
1009
+ if ns != target_ns {
1010
+ return false;
1011
+ }
1012
+ }
1013
+ filter.matches(&record.metadata)
1014
+ })
1015
+ .map(|(key, _)| key.clone())
1016
+ .collect();
1017
+
1018
+ let count = keys_to_delete.len();
1019
+ if count == 0 {
1020
+ return Ok(0);
1021
+ }
1022
+
1023
+ let ops: Vec<WalOp> = keys_to_delete
1024
+ .into_iter()
1025
+ .map(|(namespace, id)| WalOp::Delete { namespace, id })
1026
+ .collect();
1027
+ self.apply_wal_batch(ops)?;
1028
+ Ok(count)
1029
+ }
1030
+
872
1031
  pub fn insert(
873
1032
  &mut self,
874
1033
  id: impl Into<String>,
@@ -1026,7 +1185,11 @@ impl Database {
1026
1185
  return Ok(0);
1027
1186
  }
1028
1187
 
1029
- self.apply_wal_batch(records.into_iter().map(WalOp::Upsert).collect())?;
1188
+ self.apply_wal_batch_deferred(records.into_iter().map(WalOp::Upsert).collect())?;
1189
+ self.rebuild_sparse_index();
1190
+ self.rebuild_ann();
1191
+ self.ann_loaded_from_disk = false;
1192
+ self.persist_ann_to_disk()?;
1030
1193
  Ok(count)
1031
1194
  }
1032
1195
 
@@ -1048,7 +1211,11 @@ impl Database {
1048
1211
  return Ok(0);
1049
1212
  }
1050
1213
 
1051
- self.apply_wal_batch(records.into_iter().map(WalOp::Upsert).collect())?;
1214
+ self.apply_wal_batch_deferred(records.into_iter().map(WalOp::Upsert).collect())?;
1215
+ self.rebuild_sparse_index();
1216
+ self.rebuild_ann();
1217
+ self.ann_loaded_from_disk = false;
1218
+ self.persist_ann_to_disk()?;
1052
1219
  Ok(count)
1053
1220
  }
1054
1221
 
@@ -1239,7 +1406,15 @@ impl Database {
1239
1406
  WriteOperation::Delete { namespace, id } => Ok(WalOp::Delete { namespace, id }),
1240
1407
  })
1241
1408
  .collect::<Result<Vec<_>>>()?;
1242
- self.apply_wal_batch(ops)
1409
+ if ops.is_empty() {
1410
+ return Ok(());
1411
+ }
1412
+ self.apply_wal_batch_deferred(ops)?;
1413
+ self.rebuild_sparse_index();
1414
+ self.rebuild_ann();
1415
+ self.ann_loaded_from_disk = false;
1416
+ self.persist_ann_to_disk()?;
1417
+ Ok(())
1243
1418
  }
1244
1419
 
1245
1420
  fn hybrid_search_internal(
@@ -1249,6 +1424,7 @@ impl Database {
1249
1424
  options: HybridSearchOptions,
1250
1425
  namespace: Option<&str>,
1251
1426
  ) -> Result<SearchOutcome> {
1427
+ self.check_open()?;
1252
1428
  if let Some(query) = dense_query {
1253
1429
  self.validate_vector(query)?;
1254
1430
  }
@@ -1298,7 +1474,9 @@ impl Database {
1298
1474
  .unwrap_or_default();
1299
1475
  let sparse_us = sparse_start.elapsed().as_micros() as u64;
1300
1476
 
1301
- let candidate_keys = if dense_query.is_some() && ann_candidates.is_none() {
1477
+ let candidate_keys = if dense_query.is_none() {
1478
+ Some(sparse_candidates.clone())
1479
+ } else if dense_query.is_some() && ann_candidates.is_none() {
1302
1480
  None
1303
1481
  } else {
1304
1482
  merge_candidate_keys(
@@ -1459,6 +1637,7 @@ impl Database {
1459
1637
  /// self-contained `.vdb` file (WAL is folded in). The current database is
1460
1638
  /// not modified. Works in both read-only and read-write mode.
1461
1639
  pub fn snapshot(&self, dest: impl AsRef<Path>) -> Result<()> {
1640
+ self.check_open()?;
1462
1641
  let dest = dest.as_ref();
1463
1642
  if let Some(parent) = dest.parent() {
1464
1643
  if !parent.as_os_str().is_empty() {
@@ -1479,6 +1658,7 @@ impl Database {
1479
1658
  /// including the `.vdb` file and ANN sidecar files. The backup is
1480
1659
  /// compacted (WAL folded in). Works in both read-only and read-write mode.
1481
1660
  pub fn backup(&self, dest: impl AsRef<Path>) -> Result<()> {
1661
+ self.check_open()?;
1482
1662
  let dest = dest.as_ref();
1483
1663
  fs::create_dir_all(dest)?;
1484
1664
 
@@ -1562,15 +1742,45 @@ impl Database {
1562
1742
  return Ok(());
1563
1743
  }
1564
1744
 
1745
+ let has_sparse = ops.iter().any(|op| match op {
1746
+ WalOp::Upsert(record) => {
1747
+ !record.sparse.is_empty()
1748
+ || self
1749
+ .records
1750
+ .get(&(record.namespace.clone(), record.id.clone()))
1751
+ .map_or(false, |r| !r.sparse.is_empty())
1752
+ }
1753
+ WalOp::Delete { namespace, id } => self
1754
+ .records
1755
+ .get(&(namespace.clone(), id.clone()))
1756
+ .map_or(false, |r| !r.sparse.is_empty()),
1757
+ });
1758
+
1565
1759
  self.append_wal_batch(&ops)?;
1566
1760
  self.apply_ops_in_memory(ops);
1567
- self.rebuild_sparse_index();
1761
+
1762
+ if has_sparse {
1763
+ self.rebuild_sparse_index();
1764
+ }
1568
1765
  self.rebuild_ann();
1569
1766
  self.ann_loaded_from_disk = false;
1570
1767
  self.persist_ann_to_disk()?;
1571
1768
  Ok(())
1572
1769
  }
1573
1770
 
1771
+ /// Write ops to WAL and apply in memory, but defer index rebuilds.
1772
+ /// The caller is responsible for calling `rebuild_sparse_index()`,
1773
+ /// `rebuild_ann()`, and `persist_ann_to_disk()` after all batches are done.
1774
+ fn apply_wal_batch_deferred(&mut self, ops: Vec<WalOp>) -> Result<()> {
1775
+ if ops.is_empty() {
1776
+ return Ok(());
1777
+ }
1778
+
1779
+ self.append_wal_batch(&ops)?;
1780
+ self.apply_ops_in_memory(ops);
1781
+ Ok(())
1782
+ }
1783
+
1574
1784
  fn apply_ops_in_memory(&mut self, ops: Vec<WalOp>) {
1575
1785
  for op in ops {
1576
1786
  match op {
@@ -2409,6 +2619,15 @@ fn candidate_count(top_k: usize, total: usize) -> usize {
2409
2619
  .min(total)
2410
2620
  }
2411
2621
 
2622
+ fn timeout_duration(timeout_secs: f64, label: &str) -> Result<std::time::Duration> {
2623
+ if !timeout_secs.is_finite() || timeout_secs < 0.0 {
2624
+ return Err(VectLiteError::InvalidFormat(format!(
2625
+ "{label} must be a finite, non-negative number of seconds"
2626
+ )));
2627
+ }
2628
+ Ok(std::time::Duration::from_secs_f64(timeout_secs))
2629
+ }
2630
+
2412
2631
  fn wal_path(path: &Path) -> PathBuf {
2413
2632
  let mut wal = path.as_os_str().to_os_string();
2414
2633
  wal.push(".wal");
@@ -2422,6 +2641,13 @@ fn lock_path(path: &Path) -> PathBuf {
2422
2641
  }
2423
2642
 
2424
2643
  fn acquire_exclusive_lock(path: &Path) -> Result<File> {
2644
+ acquire_exclusive_lock_with_timeout(path, None)
2645
+ }
2646
+
2647
+ fn acquire_exclusive_lock_with_timeout(
2648
+ path: &Path,
2649
+ timeout: Option<std::time::Duration>,
2650
+ ) -> Result<File> {
2425
2651
  if let Some(parent) = path.parent() {
2426
2652
  if !parent.as_os_str().is_empty() && !parent.exists() {
2427
2653
  fs::create_dir_all(parent)?;
@@ -2433,16 +2659,43 @@ fn acquire_exclusive_lock(path: &Path) -> Result<File> {
2433
2659
  .read(true)
2434
2660
  .write(true)
2435
2661
  .open(lock_path(path))?;
2436
- file.try_lock_exclusive().map_err(|err| {
2437
- VectLiteError::LockContention(format!(
2438
- "could not acquire exclusive lock on '{}': {err}",
2439
- path.display()
2440
- ))
2441
- })?;
2662
+
2663
+ match timeout {
2664
+ None => {
2665
+ file.try_lock_exclusive().map_err(|err| {
2666
+ VectLiteError::LockContention(format!(
2667
+ "could not acquire exclusive lock on '{}': {err}",
2668
+ path.display()
2669
+ ))
2670
+ })?;
2671
+ }
2672
+ Some(duration) => {
2673
+ let start = Instant::now();
2674
+ let interval = std::time::Duration::from_millis(50);
2675
+ loop {
2676
+ match file.try_lock_exclusive() {
2677
+ Ok(()) => break,
2678
+ Err(err) => {
2679
+ if start.elapsed() >= duration {
2680
+ return Err(VectLiteError::LockContention(format!(
2681
+ "could not acquire exclusive lock on '{}' after {:.1}s: {err}",
2682
+ path.display(),
2683
+ duration.as_secs_f64()
2684
+ )));
2685
+ }
2686
+ std::thread::sleep(interval);
2687
+ }
2688
+ }
2689
+ }
2690
+ }
2691
+ }
2442
2692
  Ok(file)
2443
2693
  }
2444
2694
 
2445
- fn acquire_shared_lock(path: &Path) -> Result<File> {
2695
+ fn acquire_shared_lock_with_timeout(
2696
+ path: &Path,
2697
+ timeout: Option<std::time::Duration>,
2698
+ ) -> Result<File> {
2446
2699
  let lock_file = lock_path(path);
2447
2700
  if !lock_file.exists() {
2448
2701
  // Lock file may not exist yet for read-only opens on existing dbs
@@ -2458,12 +2711,36 @@ fn acquire_shared_lock(path: &Path) -> Result<File> {
2458
2711
  .read(true)
2459
2712
  .write(true)
2460
2713
  .open(&lock_file)?;
2461
- file.try_lock_shared().map_err(|err| {
2462
- VectLiteError::LockContention(format!(
2463
- "could not acquire shared lock on '{}': {err}",
2464
- path.display()
2465
- ))
2466
- })?;
2714
+
2715
+ match timeout {
2716
+ None => {
2717
+ file.try_lock_shared().map_err(|err| {
2718
+ VectLiteError::LockContention(format!(
2719
+ "could not acquire shared lock on '{}': {err}",
2720
+ path.display()
2721
+ ))
2722
+ })?;
2723
+ }
2724
+ Some(duration) => {
2725
+ let start = Instant::now();
2726
+ let interval = std::time::Duration::from_millis(50);
2727
+ loop {
2728
+ match file.try_lock_shared() {
2729
+ Ok(()) => break,
2730
+ Err(err) => {
2731
+ if start.elapsed() >= duration {
2732
+ return Err(VectLiteError::LockContention(format!(
2733
+ "could not acquire shared lock on '{}' after {:.1}s: {err}",
2734
+ path.display(),
2735
+ duration.as_secs_f64()
2736
+ )));
2737
+ }
2738
+ std::thread::sleep(interval);
2739
+ }
2740
+ }
2741
+ }
2742
+ }
2743
+ }
2467
2744
  Ok(file)
2468
2745
  }
2469
2746
 
@@ -3088,7 +3365,7 @@ fn usize_from_u64(value: u64) -> Result<usize> {
3088
3365
  mod tests {
3089
3366
  use super::{
3090
3367
  Database, HybridSearchOptions, Metadata, MetadataFilter, MetadataValue, NamedVectors,
3091
- Record, SearchOptions, SparseVector,
3368
+ Record, SearchOptions, SparseVector, VectLiteError,
3092
3369
  };
3093
3370
  use std::path::{Path, PathBuf};
3094
3371
  use std::time::{SystemTime, UNIX_EPOCH};
@@ -3343,6 +3620,67 @@ mod tests {
3343
3620
  cleanup(&path);
3344
3621
  }
3345
3622
 
3623
+ #[test]
3624
+ fn upsert_without_sparse_rebuilds_sparse_index() {
3625
+ let path = temp_file("sparse-upsert-clear");
3626
+ let mut database = Database::create(&path, 2).expect("create database");
3627
+
3628
+ let mut sparse_auth = SparseVector::new();
3629
+ sparse_auth.insert("auth".to_owned(), 1.0);
3630
+
3631
+ database
3632
+ .upsert_with_sparse_in_namespace(
3633
+ "docs",
3634
+ "doc1",
3635
+ vec![1.0, 0.0],
3636
+ sparse_auth,
3637
+ Metadata::new(),
3638
+ )
3639
+ .expect("insert sparse doc");
3640
+
3641
+ let mut query_sparse = SparseVector::new();
3642
+ query_sparse.insert("auth".to_owned(), 1.0);
3643
+
3644
+ let initial_outcome = database
3645
+ .hybrid_search_in_namespace_with_stats(
3646
+ "docs",
3647
+ None,
3648
+ Some(&query_sparse),
3649
+ HybridSearchOptions {
3650
+ top_k: 10,
3651
+ filter: None,
3652
+ dense_weight: 0.0,
3653
+ sparse_weight: 1.0,
3654
+ ..HybridSearchOptions::default()
3655
+ },
3656
+ )
3657
+ .expect("initial sparse search");
3658
+ assert_eq!(initial_outcome.stats.sparse_candidate_count, 1);
3659
+
3660
+ database
3661
+ .upsert_in_namespace("docs", "doc1", vec![1.0, 0.0], Metadata::new())
3662
+ .expect("replace doc without sparse terms");
3663
+
3664
+ let updated_outcome = database
3665
+ .hybrid_search_in_namespace_with_stats(
3666
+ "docs",
3667
+ None,
3668
+ Some(&query_sparse),
3669
+ HybridSearchOptions {
3670
+ top_k: 10,
3671
+ filter: None,
3672
+ dense_weight: 0.0,
3673
+ sparse_weight: 1.0,
3674
+ ..HybridSearchOptions::default()
3675
+ },
3676
+ )
3677
+ .expect("sparse search after clearing sparse terms");
3678
+ assert_eq!(updated_outcome.stats.sparse_candidate_count, 0);
3679
+ assert!(updated_outcome.results.is_empty());
3680
+
3681
+ cleanup(&path);
3682
+ }
3683
+
3346
3684
  #[test]
3347
3685
  fn named_vectors_roundtrip_and_search() {
3348
3686
  let path = temp_file("named-vectors");
@@ -3528,6 +3866,69 @@ mod tests {
3528
3866
  cleanup(&path);
3529
3867
  }
3530
3868
 
3869
+ #[test]
3870
+ fn closed_database_rejects_result_based_operations() {
3871
+ let path = temp_file("closed-db");
3872
+ let snapshot = temp_file("closed-db-snapshot");
3873
+ let mut database = Database::create(&path, 2).expect("create database");
3874
+ database
3875
+ .insert("doc1", vec![1.0, 0.0], Metadata::new())
3876
+ .expect("insert doc1");
3877
+ database.close().expect("close database");
3878
+
3879
+ let search_err = database
3880
+ .search(
3881
+ &[1.0, 0.0],
3882
+ SearchOptions {
3883
+ top_k: 1,
3884
+ filter: None,
3885
+ },
3886
+ )
3887
+ .expect_err("search on closed database should fail");
3888
+ assert!(matches!(
3889
+ search_err,
3890
+ VectLiteError::InvalidFormat(message) if message.contains("database is closed")
3891
+ ));
3892
+
3893
+ let snapshot_err = database
3894
+ .snapshot(&snapshot)
3895
+ .expect_err("snapshot on closed database should fail");
3896
+ assert!(matches!(
3897
+ snapshot_err,
3898
+ VectLiteError::InvalidFormat(message) if message.contains("database is closed")
3899
+ ));
3900
+
3901
+ cleanup(&path);
3902
+ cleanup(&snapshot);
3903
+ }
3904
+
3905
+ #[test]
3906
+ fn lock_timeout_must_be_non_negative_and_finite() {
3907
+ let path = temp_file("timeout-validation");
3908
+ let database = Database::create(&path, 2).expect("create database");
3909
+
3910
+ let negative_err = match Database::open_with_timeout(&path, -1.0) {
3911
+ Ok(_) => panic!("negative lock timeout should fail"),
3912
+ Err(err) => err,
3913
+ };
3914
+ assert!(matches!(
3915
+ negative_err,
3916
+ VectLiteError::InvalidFormat(message) if message.contains("lock_timeout")
3917
+ ));
3918
+
3919
+ let nan_err = match Database::open_with_timeout(&path, f64::NAN) {
3920
+ Ok(_) => panic!("NaN lock timeout should fail"),
3921
+ Err(err) => err,
3922
+ };
3923
+ assert!(matches!(
3924
+ nan_err,
3925
+ VectLiteError::InvalidFormat(message) if message.contains("lock_timeout")
3926
+ ));
3927
+
3928
+ drop(database);
3929
+ cleanup(&path);
3930
+ }
3931
+
3531
3932
  fn temp_file(name: &str) -> PathBuf {
3532
3933
  let nanos = SystemTime::now()
3533
3934
  .duration_since(UNIX_EPOCH)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vectlite",
3
- "version": "0.1.8",
3
+ "version": "0.1.11",
4
4
  "description": "Embedded vector store for local-first AI applications.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
Binary file