vectlite 0.1.12 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -1
- package/index.d.ts +55 -0
- package/index.js +171 -12
- package/native/Cargo.toml +1 -1
- package/native/src/lib.rs +612 -27
- package/native/vectlite-core/Cargo.toml +2 -1
- package/native/vectlite-core/src/lib.rs +3871 -183
- package/native/vectlite-core/src/quantization.rs +500 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/vectlite.node +0 -0
- package/prebuilds/darwin-x64/vectlite.node +0 -0
- package/prebuilds/linux-x64-gnu/vectlite.node +0 -0
- package/prebuilds/win32-x64-msvc/vectlite.node +0 -0
package/README.md
CHANGED
|
@@ -43,10 +43,12 @@ db.close()
|
|
|
43
43
|
### Core
|
|
44
44
|
|
|
45
45
|
- **Single-file storage** -- one `.vdb` file per database, portable and easy to back up
|
|
46
|
-
- **
|
|
46
|
+
- **Distance metrics** -- cosine (default), euclidean (L2), dot product, manhattan (L1) with SIMD acceleration
|
|
47
|
+
- **Dense vectors** -- automatic HNSW indexing with metric-aware distance functions
|
|
47
48
|
- **Sparse vectors** -- BM25-scored inverted index for keyword retrieval
|
|
48
49
|
- **Hybrid search** -- dense + sparse fusion with linear or RRF strategies
|
|
49
50
|
- **Vector quantization** -- scalar (int8, 4x), binary (32x), and product quantization (PQ) with 2-stage rescoring
|
|
51
|
+
- **Multi-vector / ColBERT** -- late interaction search with per-token MaxSim scoring and 2-bit quantization (~16x compression)
|
|
50
52
|
- **Rich metadata** -- string, number, boolean, null, array, and nested object values
|
|
51
53
|
- **Crash-safe WAL** -- writes land in a write-ahead log first, then checkpoint with `compact()`
|
|
52
54
|
- **Transactions** -- atomic batched writes with `db.transaction()`
|
|
@@ -61,6 +63,7 @@ db.close()
|
|
|
61
63
|
- **MMR diversification** -- `mmrLambda` controls relevance vs. diversity trade-off
|
|
62
64
|
- **Namespaces** -- logical isolation with per-namespace or cross-namespace search
|
|
63
65
|
- **Observability** -- `searchWithStats()` returns timings, BM25 term scores, ANN stats, and per-result explain payloads
|
|
66
|
+
- **Payload indexes** -- keyword and numeric indexes on metadata fields accelerate filtered queries on large collections
|
|
64
67
|
|
|
65
68
|
### Data Management
|
|
66
69
|
|
|
@@ -68,14 +71,35 @@ db.close()
|
|
|
68
71
|
- **Bulk ingestion** -- `bulkIngest()` with deferred index rebuilds for fast imports
|
|
69
72
|
- **Listing & filtered counts** -- `list()` and `count({ namespace, filter })` without a vector query
|
|
70
73
|
- **Delete by filter** -- `deleteByFilter()` for bulk deletion by metadata filter
|
|
74
|
+
- **Partial metadata updates** -- `updateMetadata()` merges a patch without re-writing the vector or rebuilding indexes
|
|
71
75
|
- **Snapshots** -- `db.snapshot(path)` creates a self-contained copy
|
|
72
76
|
- **Backup / Restore** -- `db.backup(dir)` and `vectlite.restore(dir, path)` for full roundtrips
|
|
73
77
|
- **Read-only mode** -- `vectlite.open(path, { readOnly: true })` for safe concurrent readers
|
|
74
78
|
- **Explicit close** -- `db.close()` to release locks deterministically
|
|
75
79
|
- **Lock timeouts** -- `lockTimeout` for bounded lock acquisition waits
|
|
80
|
+
- **TTL / Expiry** -- `setTtl()` / `clearTtl()` or `ttl` option on insert/upsert; expired records auto-filtered from reads and GC'd on compact
|
|
81
|
+
- **Cursor-based pagination** -- `listCursor()` for efficient iteration over large collections
|
|
82
|
+
- **Async API** -- `searchAsync()`, `compactAsync()`, `flushAsync()`, `bulkIngestAsync()` run on the libuv threadpool
|
|
76
83
|
|
|
77
84
|
## Usage
|
|
78
85
|
|
|
86
|
+
### Distance Metrics
|
|
87
|
+
|
|
88
|
+
```js
|
|
89
|
+
// Default is cosine similarity
|
|
90
|
+
const db = vectlite.open('knowledge.vdb', { dimension: 384 })
|
|
91
|
+
|
|
92
|
+
// Choose a different metric at creation time
|
|
93
|
+
const db2 = vectlite.open('knowledge.vdb', { dimension: 384, metric: 'euclidean' })
|
|
94
|
+
const db3 = vectlite.open('knowledge.vdb', { dimension: 384, metric: 'dotproduct' })
|
|
95
|
+
const db4 = vectlite.open('knowledge.vdb', { dimension: 384, metric: 'manhattan' })
|
|
96
|
+
|
|
97
|
+
// Aliases: 'l2', 'dot', 'ip', 'l1'
|
|
98
|
+
console.log(db2.metric) // "euclidean"
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
The metric is persisted in the database file. Scores are always oriented so that **higher is better**.
|
|
102
|
+
|
|
79
103
|
### Hybrid Search
|
|
80
104
|
|
|
81
105
|
```js
|
|
@@ -167,6 +191,9 @@ const records = db.list({ namespace: 'docs', filter: { stale: false }, limit: 20
|
|
|
167
191
|
const count = db.count({ namespace: 'docs', filter: { source: 'blog' } })
|
|
168
192
|
const deleted = db.deleteByFilter({ stale: true }, { namespace: 'docs' })
|
|
169
193
|
|
|
194
|
+
// Partial metadata update (merge patch -- only touches specified keys)
|
|
195
|
+
db.updateMetadata('doc1', { status: 'reviewed', score: 0.95 })
|
|
196
|
+
|
|
170
197
|
db.close()
|
|
171
198
|
```
|
|
172
199
|
|
|
@@ -184,6 +211,24 @@ console.log(outcome.stats.used_ann) // true
|
|
|
184
211
|
console.log(outcome.results[0].explain) // Detailed scoring breakdown
|
|
185
212
|
```
|
|
186
213
|
|
|
214
|
+
### Payload Indexes
|
|
215
|
+
|
|
216
|
+
Create keyword or numeric indexes on metadata fields to accelerate filtered queries on large collections. Indexes are automatically used by `search()`, `count()`, and `list()`.
|
|
217
|
+
|
|
218
|
+
```js
|
|
219
|
+
// Create indexes on frequently-filtered fields
|
|
220
|
+
db.createIndex('source', 'keyword') // string equality, $in
|
|
221
|
+
db.createIndex('score', 'numeric') // range queries: $gt, $gte, $lt, $lte
|
|
222
|
+
|
|
223
|
+
// Filtered queries now use indexes automatically
|
|
224
|
+
const count = db.count({ filter: { source: 'blog' } })
|
|
225
|
+
const results = db.search(query, { k: 10, filter: { score: { $gte: 0.8 } } })
|
|
226
|
+
|
|
227
|
+
// Inspect and manage indexes
|
|
228
|
+
console.log(db.listIndexes()) // [{ field: 'source', type: 'keyword' }, ...]
|
|
229
|
+
db.dropIndex('score')
|
|
230
|
+
```
|
|
231
|
+
|
|
187
232
|
### Vector Quantization
|
|
188
233
|
|
|
189
234
|
Reduce memory usage and accelerate search with quantized vectors. All methods use a 2-stage pipeline: fast quantized candidate selection followed by exact float32 rescoring.
|
|
@@ -211,6 +256,130 @@ db.disableQuantization()
|
|
|
211
256
|
|
|
212
257
|
Quantization parameters persist across reopens in a `.vdb.quant` sidecar file. The quantized index auto-rebuilds on inserts and upserts.
|
|
213
258
|
|
|
259
|
+
### Multi-Vector / ColBERT Search
|
|
260
|
+
|
|
261
|
+
Store token-level embeddings (ColBERT, ColPali) and search with MaxSim late interaction scoring.
|
|
262
|
+
|
|
263
|
+
```js
|
|
264
|
+
// Upsert with per-token ColBERT embeddings
|
|
265
|
+
db.upsertMultiVectors('doc1', denseVector,
|
|
266
|
+
JSON.stringify({ colbert: [tokenVec1, tokenVec2] }),
|
|
267
|
+
JSON.stringify({ metadata: { source: 'paper' } })
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
// MaxSim search
|
|
271
|
+
const results = JSON.parse(
|
|
272
|
+
db.searchMultiVector('colbert', JSON.stringify(queryTokenVectors))
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
// Enable 2-bit quantization (~16x compression)
|
|
276
|
+
db.enableMultiVectorQuantization('colbert')
|
|
277
|
+
|
|
278
|
+
// Check and disable
|
|
279
|
+
console.log(db.isMultiVectorQuantized('colbert')) // true
|
|
280
|
+
db.disableMultiVectorQuantization('colbert')
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### TTL / Expiry
|
|
284
|
+
|
|
285
|
+
Records can automatically expire after a time-to-live. Expired records are transparently filtered from all reads and permanently removed on `compact()`.
|
|
286
|
+
|
|
287
|
+
```js
|
|
288
|
+
// Set TTL on insert/upsert (seconds)
|
|
289
|
+
db.upsert('session1', embedding, { user: 'alice' }, { ttl: 3600 }) // expires in 1 hour
|
|
290
|
+
|
|
291
|
+
// Set/clear TTL on existing records
|
|
292
|
+
db.setTtl('doc1', 86400) // expire in 24 hours
|
|
293
|
+
db.clearTtl('doc1') // remove expiry
|
|
294
|
+
|
|
295
|
+
// Expired records are invisible to get/list/count/search
|
|
296
|
+
const record = db.get('session1') // null after TTL elapses
|
|
297
|
+
|
|
298
|
+
// compact() garbage-collects expired records from disk
|
|
299
|
+
db.compact()
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### Cursor-Based Pagination
|
|
303
|
+
|
|
304
|
+
Efficiently iterate over large collections without offset overhead.
|
|
305
|
+
|
|
306
|
+
```js
|
|
307
|
+
// Paginate 100 records at a time
|
|
308
|
+
let cursor = null
|
|
309
|
+
do {
|
|
310
|
+
const page = db.listCursor({ limit: 100, cursor })
|
|
311
|
+
for (const record of page.records) {
|
|
312
|
+
process(record)
|
|
313
|
+
}
|
|
314
|
+
cursor = page.cursor
|
|
315
|
+
} while (cursor !== null)
|
|
316
|
+
|
|
317
|
+
// Works with namespace and filter
|
|
318
|
+
const page = db.listCursor({ namespace: 'docs', filter: { source: 'blog' }, limit: 50 })
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Async API
|
|
322
|
+
|
|
323
|
+
Non-blocking versions of heavy operations that run on the libuv threadpool.
|
|
324
|
+
|
|
325
|
+
```js
|
|
326
|
+
// Async search (returns a Promise)
|
|
327
|
+
const results = await db.searchAsync(queryEmbedding, { k: 10, filter: { source: 'blog' } })
|
|
328
|
+
|
|
329
|
+
// Async search with stats
|
|
330
|
+
const outcome = await db.searchWithStatsAsync(queryEmbedding, { k: 10 })
|
|
331
|
+
|
|
332
|
+
// Async maintenance
|
|
333
|
+
await db.flushAsync()
|
|
334
|
+
await db.compactAsync()
|
|
335
|
+
|
|
336
|
+
// Async bulk ingestion
|
|
337
|
+
const count = await db.bulkIngestAsync(records, { batchSize: 5000 })
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### OpenTelemetry Integration
|
|
341
|
+
|
|
342
|
+
vectlite ships with optional OpenTelemetry tracing. When enabled, every search
|
|
343
|
+
call is wrapped in a span carrying semantic DB attributes and search-specific
|
|
344
|
+
metrics. `@opentelemetry/api` is loaded lazily -- it is **not** a runtime
|
|
345
|
+
dependency.
|
|
346
|
+
|
|
347
|
+
```js
|
|
348
|
+
const vectlite = require('vectlite')
|
|
349
|
+
|
|
350
|
+
// Auto-detect: resolves a tracer from @opentelemetry/api if installed
|
|
351
|
+
const tracer = vectlite.configureOpenTelemetry()
|
|
352
|
+
|
|
353
|
+
// Or supply your own tracer
|
|
354
|
+
vectlite.configureOpenTelemetry({ tracer: myTracer })
|
|
355
|
+
|
|
356
|
+
// Custom tracer name (default: 'vectlite')
|
|
357
|
+
vectlite.configureOpenTelemetry({ tracerName: 'my-app' })
|
|
358
|
+
|
|
359
|
+
// Disable
|
|
360
|
+
vectlite.configureOpenTelemetry(false)
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
When a tracer is active, each `search` / `searchWithStats` / `searchAsync` /
|
|
364
|
+
`searchWithStatsAsync` call creates a `vectlite.search` span with these
|
|
365
|
+
attributes:
|
|
366
|
+
|
|
367
|
+
| Attribute | Description |
|
|
368
|
+
|---|---|
|
|
369
|
+
| `db.system` | Always `"vectlite"` |
|
|
370
|
+
| `db.operation.name` | Always `"search"` |
|
|
371
|
+
| `vectlite.search.k` | Requested result count |
|
|
372
|
+
| `vectlite.search.namespace` | Target namespace |
|
|
373
|
+
| `vectlite.search.has_dense` | Whether a dense query vector was provided |
|
|
374
|
+
| `vectlite.search.has_sparse` | Whether sparse terms were provided |
|
|
375
|
+
| `vectlite.search.fusion` | Fusion strategy (`"linear"` or `"rrf"`) |
|
|
376
|
+
| `vectlite.search.used_ann` | Whether HNSW was used (set after completion) |
|
|
377
|
+
| `vectlite.search.result_count` | Number of results returned (set after completion) |
|
|
378
|
+
| `vectlite.search.total_us` | Total search time in microseconds (set after completion) |
|
|
379
|
+
|
|
380
|
+
If a search throws, the span records the exception and sets an error status
|
|
381
|
+
before re-throwing.
|
|
382
|
+
|
|
214
383
|
## Database Methods Reference
|
|
215
384
|
|
|
216
385
|
### Write Methods
|
|
@@ -225,6 +394,9 @@ Quantization parameters persist across reopens in a `.vdb.quant` sidecar file. T
|
|
|
225
394
|
| `db.delete(id, { namespace })` | Delete a single record |
|
|
226
395
|
| `db.deleteMany(ids, { namespace })` | Delete multiple records by id |
|
|
227
396
|
| `db.deleteByFilter(filter, { namespace })` | Delete all records matching a filter |
|
|
397
|
+
| `db.updateMetadata(id, metadata, { namespace })` | Merge a metadata patch into an existing record (no vector rewrite) |
|
|
398
|
+
| `db.setTtl(id, seconds, { namespace })` | Set time-to-live on a record (seconds from now) |
|
|
399
|
+
| `db.clearTtl(id, { namespace })` | Remove TTL from a record |
|
|
228
400
|
|
|
229
401
|
### Read Methods
|
|
230
402
|
|
|
@@ -235,11 +407,21 @@ Quantization parameters persist across reopens in a `.vdb.quant` sidecar file. T
|
|
|
235
407
|
| `db.searchWithStats(query, options)` | Search with detailed performance stats |
|
|
236
408
|
| `db.count({ namespace, filter })` | Count records, optionally scoped by namespace/filter |
|
|
237
409
|
| `db.list({ namespace, filter, limit, offset })` | List records without issuing a vector query |
|
|
410
|
+
| `db.listCursor({ namespace, filter, limit, cursor })` | Cursor-based pagination for large collections |
|
|
238
411
|
| `db.namespaces()` | List all namespaces |
|
|
239
412
|
| `db.dimension` | Vector dimension (property) |
|
|
240
413
|
| `db.path` | Database file path (property) |
|
|
414
|
+
| `db.metric` | Distance metric name: `"cosine"`, `"euclidean"`, `"dotproduct"`, or `"manhattan"` (property) |
|
|
241
415
|
| `db.readOnly` | Whether the database is read-only (property) |
|
|
242
416
|
|
|
417
|
+
### Index Methods
|
|
418
|
+
|
|
419
|
+
| Method | Description |
|
|
420
|
+
|---|---|
|
|
421
|
+
| `db.createIndex(field, indexType)` | Create a payload index (`'keyword'` or `'numeric'`) on a metadata field |
|
|
422
|
+
| `db.dropIndex(field)` | Remove an index |
|
|
423
|
+
| `db.listIndexes()` | List all active indexes as `[{ field, type }, ...]` |
|
|
424
|
+
|
|
243
425
|
### Quantization Methods
|
|
244
426
|
|
|
245
427
|
| Method | Description |
|
|
@@ -260,6 +442,16 @@ Quantization parameters persist across reopens in a `.vdb.quant` sidecar file. T
|
|
|
260
442
|
| `db.transaction()` | Begin an atomic transaction |
|
|
261
443
|
| `db.close()` | Flush pending state, release the file lock, and invalidate the handle |
|
|
262
444
|
|
|
445
|
+
### Async Methods
|
|
446
|
+
|
|
447
|
+
| Method | Description |
|
|
448
|
+
|---|---|
|
|
449
|
+
| `db.searchAsync(query, options)` | Non-blocking search (returns Promise) |
|
|
450
|
+
| `db.searchWithStatsAsync(query, options)` | Non-blocking search with stats (returns Promise) |
|
|
451
|
+
| `db.flushAsync()` | Non-blocking flush/compact (returns Promise) |
|
|
452
|
+
| `db.compactAsync()` | Non-blocking compact (returns Promise) |
|
|
453
|
+
| `db.bulkIngestAsync(records, options)` | Non-blocking bulk import (returns Promise) |
|
|
454
|
+
|
|
263
455
|
## Filter Operators
|
|
264
456
|
|
|
265
457
|
| Operator | Example | Description |
|
package/index.d.ts
CHANGED
|
@@ -21,6 +21,7 @@ export interface Record {
|
|
|
21
21
|
vectors: NamedVectors
|
|
22
22
|
sparse: SparseVector
|
|
23
23
|
metadata: Metadata
|
|
24
|
+
expires_at: number | null
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
export interface SearchTimings {
|
|
@@ -41,6 +42,8 @@ export interface SearchStats {
|
|
|
41
42
|
ann_loaded_from_disk: boolean
|
|
42
43
|
wal_entries_replayed: number
|
|
43
44
|
fusion: string
|
|
45
|
+
effective_dimension: number
|
|
46
|
+
matryoshka_truncated: boolean
|
|
44
47
|
rerank_applied: boolean
|
|
45
48
|
rerank_count: number
|
|
46
49
|
timings: SearchTimings
|
|
@@ -80,6 +83,7 @@ export interface WriteOptions {
|
|
|
80
83
|
namespace?: string | null
|
|
81
84
|
sparse?: SparseVector | null
|
|
82
85
|
vectors?: NamedVectors | null
|
|
86
|
+
ttl?: number | null
|
|
83
87
|
}
|
|
84
88
|
|
|
85
89
|
export interface CountOptions {
|
|
@@ -92,6 +96,16 @@ export interface ListOptions extends CountOptions {
|
|
|
92
96
|
offset?: number | null
|
|
93
97
|
}
|
|
94
98
|
|
|
99
|
+
export interface ListCursorOptions extends CountOptions {
|
|
100
|
+
limit?: number | null
|
|
101
|
+
cursor?: string | null
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export interface ListCursorResult {
|
|
105
|
+
records: Record[]
|
|
106
|
+
cursor: string | null
|
|
107
|
+
}
|
|
108
|
+
|
|
95
109
|
export interface BulkIngestOptions {
|
|
96
110
|
namespace?: string | null
|
|
97
111
|
batchSize?: number
|
|
@@ -110,15 +124,19 @@ export interface SearchOptions {
|
|
|
110
124
|
vectorName?: string | null
|
|
111
125
|
fusion?: 'linear' | 'rrf'
|
|
112
126
|
rrfK?: number
|
|
127
|
+
truncateDim?: number | null
|
|
113
128
|
explain?: boolean
|
|
114
129
|
queryVectors?: { [name: string]: number[] } | null
|
|
115
130
|
vectorWeights?: { [name: string]: number } | null
|
|
116
131
|
}
|
|
117
132
|
|
|
133
|
+
export type DistanceMetric = 'cosine' | 'euclidean' | 'dotproduct' | 'manhattan' | 'l2' | 'dot' | 'ip' | 'l1'
|
|
134
|
+
|
|
118
135
|
export interface OpenOptions {
|
|
119
136
|
dimension?: number | null
|
|
120
137
|
readOnly?: boolean
|
|
121
138
|
lockTimeout?: number | null
|
|
139
|
+
metric?: DistanceMetric | null
|
|
122
140
|
}
|
|
123
141
|
|
|
124
142
|
export class VectLiteError extends Error {}
|
|
@@ -139,12 +157,14 @@ export class Database {
|
|
|
139
157
|
readonly path: string
|
|
140
158
|
readonly walPath: string
|
|
141
159
|
readonly dimension: number
|
|
160
|
+
readonly metric: string
|
|
142
161
|
readonly readOnly: boolean
|
|
143
162
|
|
|
144
163
|
count(options?: CountOptions): number
|
|
145
164
|
namespaces(): string[]
|
|
146
165
|
close(): void
|
|
147
166
|
list(options?: ListOptions): Record[]
|
|
167
|
+
listCursor(options?: ListCursorOptions): ListCursorResult
|
|
148
168
|
transaction(): Transaction
|
|
149
169
|
insert(id: string, vector: number[], metadata?: Metadata | null, options?: WriteOptions): void
|
|
150
170
|
upsert(id: string, vector: number[], metadata?: Metadata | null, options?: WriteOptions): void
|
|
@@ -155,12 +175,23 @@ export class Database {
|
|
|
155
175
|
delete(id: string, options?: { namespace?: string | null }): boolean
|
|
156
176
|
deleteMany(ids: string[], options?: { namespace?: string | null }): number
|
|
157
177
|
deleteByFilter(filter: Filter, options?: { namespace?: string | null }): number
|
|
178
|
+
updateMetadata(id: string, metadata: Metadata, options?: { namespace?: string | null }): boolean
|
|
179
|
+
setTtl(id: string, ttl: number, options?: { namespace?: string | null }): boolean
|
|
180
|
+
clearTtl(id: string, options?: { namespace?: string | null }): boolean
|
|
181
|
+
createIndex(field: string, indexType: 'keyword' | 'numeric'): boolean
|
|
182
|
+
dropIndex(field: string): boolean
|
|
183
|
+
listIndexes(): Array<{ field: string; type: 'keyword' | 'numeric' }>
|
|
158
184
|
flush(): void
|
|
159
185
|
compact(): void
|
|
160
186
|
snapshot(dest: string): void
|
|
161
187
|
backup(dest: string): void
|
|
162
188
|
search(query?: number[] | null, options?: SearchOptions): SearchResult[]
|
|
163
189
|
searchWithStats(query?: number[] | null, options?: SearchOptions): SearchResponse
|
|
190
|
+
searchAsync(query?: number[] | null, options?: SearchOptions): Promise<SearchResult[]>
|
|
191
|
+
searchWithStatsAsync(query?: number[] | null, options?: SearchOptions): Promise<SearchResponse>
|
|
192
|
+
flushAsync(): Promise<void>
|
|
193
|
+
compactAsync(): Promise<void>
|
|
194
|
+
bulkIngestAsync(records: Record[], options?: BulkIngestOptions): Promise<number>
|
|
164
195
|
}
|
|
165
196
|
|
|
166
197
|
export class Store {
|
|
@@ -176,6 +207,30 @@ export class Store {
|
|
|
176
207
|
export function open(path: string, options?: OpenOptions): Database
|
|
177
208
|
export function openStore(root: string): Store
|
|
178
209
|
export function restore(source: string, dest: string): Database
|
|
210
|
+
export interface OpenTelemetryOptions {
|
|
211
|
+
/** Pass `false` or `{ enabled: false }` to disable tracing. */
|
|
212
|
+
enabled?: boolean
|
|
213
|
+
/** Supply your own OTel `Tracer` instance. */
|
|
214
|
+
tracer?: unknown
|
|
215
|
+
/** Tracer name used when auto-resolving via `@opentelemetry/api`. Defaults to `'vectlite'`. */
|
|
216
|
+
tracerName?: string
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Configure optional OpenTelemetry tracing for search operations.
|
|
221
|
+
*
|
|
222
|
+
* When a tracer is active, every `search`, `searchWithStats`, `searchAsync`,
|
|
223
|
+
* and `searchWithStatsAsync` call is wrapped in a span with semantic
|
|
224
|
+
* `db.system` / `db.operation.name` attributes and search-specific metrics.
|
|
225
|
+
*
|
|
226
|
+
* `@opentelemetry/api` is loaded lazily via `require()` -- it is **not** a
|
|
227
|
+
* runtime dependency. If the package is not installed the function returns
|
|
228
|
+
* `null` and search calls remain un-instrumented.
|
|
229
|
+
*
|
|
230
|
+
* @returns The resolved tracer, or `null` if tracing could not be configured.
|
|
231
|
+
*/
|
|
232
|
+
export function configureOpenTelemetry(options?: OpenTelemetryOptions | false): unknown | null
|
|
233
|
+
|
|
179
234
|
export function sparseTerms(text: string): SparseVector
|
|
180
235
|
export function upsertText(
|
|
181
236
|
db: Database,
|
package/index.js
CHANGED
|
@@ -63,6 +63,7 @@ function loadNative() {
|
|
|
63
63
|
const native = loadNative()
|
|
64
64
|
|
|
65
65
|
const TOKEN_RE = /[a-z0-9]+/g
|
|
66
|
+
let otelTracer = null
|
|
66
67
|
|
|
67
68
|
class VectLiteError extends Error {
|
|
68
69
|
constructor(message, cause) {
|
|
@@ -102,6 +103,84 @@ function decode(value) {
|
|
|
102
103
|
return value == null ? null : JSON.parse(value)
|
|
103
104
|
}
|
|
104
105
|
|
|
106
|
+
function configureOpenTelemetry(options = {}) {
|
|
107
|
+
if (options === false || options?.enabled === false) {
|
|
108
|
+
otelTracer = null
|
|
109
|
+
return null
|
|
110
|
+
}
|
|
111
|
+
if (options?.tracer != null) {
|
|
112
|
+
otelTracer = options.tracer
|
|
113
|
+
return otelTracer
|
|
114
|
+
}
|
|
115
|
+
try {
|
|
116
|
+
const { trace } = require('@opentelemetry/api')
|
|
117
|
+
otelTracer = trace.getTracer(options?.tracerName ?? 'vectlite')
|
|
118
|
+
return otelTracer
|
|
119
|
+
} catch {
|
|
120
|
+
otelTracer = null
|
|
121
|
+
return null
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function searchAttributes(query, options, stats = null) {
|
|
126
|
+
const attrs = {
|
|
127
|
+
'db.system': 'vectlite',
|
|
128
|
+
'db.operation.name': 'search',
|
|
129
|
+
'vectlite.search.k': options?.k ?? 10,
|
|
130
|
+
'vectlite.search.namespace': options?.namespace ?? '',
|
|
131
|
+
'vectlite.search.all_namespaces': Boolean(options?.allNamespaces),
|
|
132
|
+
'vectlite.search.has_dense': query != null,
|
|
133
|
+
'vectlite.search.has_sparse': options?.sparse != null,
|
|
134
|
+
'vectlite.search.fusion': options?.fusion ?? 'linear',
|
|
135
|
+
}
|
|
136
|
+
if (options?.vectorName != null) attrs['vectlite.search.vector_name'] = options.vectorName
|
|
137
|
+
if (options?.truncateDim != null) attrs['vectlite.search.truncate_dim'] = options.truncateDim
|
|
138
|
+
if (stats != null) {
|
|
139
|
+
attrs['vectlite.search.used_ann'] = Boolean(stats.used_ann)
|
|
140
|
+
attrs['vectlite.search.exact_fallback'] = Boolean(stats.exact_fallback)
|
|
141
|
+
attrs['vectlite.search.considered_count'] = stats.considered_count ?? 0
|
|
142
|
+
attrs['vectlite.search.result_count'] = stats.result_count ?? 0
|
|
143
|
+
attrs['vectlite.search.effective_dimension'] = stats.effective_dimension ?? 0
|
|
144
|
+
attrs['vectlite.search.matryoshka_truncated'] = Boolean(stats.matryoshka_truncated)
|
|
145
|
+
attrs['vectlite.search.total_us'] = stats.timings?.total_us ?? 0
|
|
146
|
+
}
|
|
147
|
+
return attrs
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function withSearchSpan(query, options, fn) {
|
|
151
|
+
if (otelTracer == null) {
|
|
152
|
+
return fn()
|
|
153
|
+
}
|
|
154
|
+
return otelTracer.startActiveSpan('vectlite.search', { attributes: searchAttributes(query, options) }, (span) => {
|
|
155
|
+
try {
|
|
156
|
+
const value = fn()
|
|
157
|
+
if (isPromiseLike(value)) {
|
|
158
|
+
return value.then(
|
|
159
|
+
(resolved) => {
|
|
160
|
+
span.setAttributes(searchAttributes(query, options, resolved?.stats ?? null))
|
|
161
|
+
span.end()
|
|
162
|
+
return resolved
|
|
163
|
+
},
|
|
164
|
+
(error) => {
|
|
165
|
+
span.recordException?.(error)
|
|
166
|
+
span.setStatus?.({ code: 2, message: error?.message ?? String(error) })
|
|
167
|
+
span.end()
|
|
168
|
+
throw error
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
}
|
|
172
|
+
span.setAttributes(searchAttributes(query, options, value?.stats ?? null))
|
|
173
|
+
span.end()
|
|
174
|
+
return value
|
|
175
|
+
} catch (error) {
|
|
176
|
+
span.recordException?.(error)
|
|
177
|
+
span.setStatus?.({ code: 2, message: error?.message ?? String(error) })
|
|
178
|
+
span.end()
|
|
179
|
+
throw error
|
|
180
|
+
}
|
|
181
|
+
})
|
|
182
|
+
}
|
|
183
|
+
|
|
105
184
|
function asArray(values) {
|
|
106
185
|
return Array.from(values)
|
|
107
186
|
}
|
|
@@ -123,6 +202,7 @@ function normalizeWriteOptions(options = {}) {
|
|
|
123
202
|
namespace: options.namespace ?? null,
|
|
124
203
|
sparse: options.sparse ?? null,
|
|
125
204
|
vectors: options.vectors ?? null,
|
|
205
|
+
ttl: options.ttl ?? null,
|
|
126
206
|
}
|
|
127
207
|
}
|
|
128
208
|
|
|
@@ -150,16 +230,16 @@ class Transaction {
|
|
|
150
230
|
}
|
|
151
231
|
|
|
152
232
|
insert(id, vector, metadata = null, options = {}) {
|
|
153
|
-
const { namespace, sparse, vectors } = normalizeWriteOptions(options)
|
|
233
|
+
const { namespace, sparse, vectors, ttl } = normalizeWriteOptions(options)
|
|
154
234
|
return wrapError(() =>
|
|
155
|
-
this._native.insert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors)),
|
|
235
|
+
this._native.insert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors), ttl),
|
|
156
236
|
)
|
|
157
237
|
}
|
|
158
238
|
|
|
159
239
|
upsert(id, vector, metadata = null, options = {}) {
|
|
160
|
-
const { namespace, sparse, vectors } = normalizeWriteOptions(options)
|
|
240
|
+
const { namespace, sparse, vectors, ttl } = normalizeWriteOptions(options)
|
|
161
241
|
return wrapError(() =>
|
|
162
|
-
this._native.upsert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors)),
|
|
242
|
+
this._native.upsert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors), ttl),
|
|
163
243
|
)
|
|
164
244
|
}
|
|
165
245
|
|
|
@@ -205,6 +285,10 @@ class Database {
|
|
|
205
285
|
return wrapError(() => this._native.dimension)
|
|
206
286
|
}
|
|
207
287
|
|
|
288
|
+
get metric() {
|
|
289
|
+
return wrapError(() => this._native.metric)
|
|
290
|
+
}
|
|
291
|
+
|
|
208
292
|
get readOnly() {
|
|
209
293
|
return wrapError(() => this._native.readOnly)
|
|
210
294
|
}
|
|
@@ -234,21 +318,35 @@ class Database {
|
|
|
234
318
|
)
|
|
235
319
|
}
|
|
236
320
|
|
|
321
|
+
listCursor(options = {}) {
|
|
322
|
+
return wrapError(() => {
|
|
323
|
+
const raw = decode(
|
|
324
|
+
this._native.listCursor(
|
|
325
|
+
options.namespace ?? null,
|
|
326
|
+
encode(options.filter),
|
|
327
|
+
options.limit ?? null,
|
|
328
|
+
options.cursor ?? null,
|
|
329
|
+
),
|
|
330
|
+
)
|
|
331
|
+
return { records: raw.records, cursor: raw.cursor ?? null }
|
|
332
|
+
})
|
|
333
|
+
}
|
|
334
|
+
|
|
237
335
|
transaction() {
|
|
238
336
|
return wrapError(() => new Transaction(this._native.transaction()))
|
|
239
337
|
}
|
|
240
338
|
|
|
241
339
|
insert(id, vector, metadata = null, options = {}) {
|
|
242
|
-
const { namespace, sparse, vectors } = normalizeWriteOptions(options)
|
|
340
|
+
const { namespace, sparse, vectors, ttl } = normalizeWriteOptions(options)
|
|
243
341
|
return wrapError(() =>
|
|
244
|
-
this._native.insert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors)),
|
|
342
|
+
this._native.insert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors), ttl),
|
|
245
343
|
)
|
|
246
344
|
}
|
|
247
345
|
|
|
248
346
|
upsert(id, vector, metadata = null, options = {}) {
|
|
249
|
-
const { namespace, sparse, vectors } = normalizeWriteOptions(options)
|
|
347
|
+
const { namespace, sparse, vectors, ttl } = normalizeWriteOptions(options)
|
|
250
348
|
return wrapError(() =>
|
|
251
|
-
this._native.upsert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors)),
|
|
349
|
+
this._native.upsert(id, asArray(vector), encode(metadata), namespace, encode(sparse), encode(vectors), ttl),
|
|
252
350
|
)
|
|
253
351
|
}
|
|
254
352
|
|
|
@@ -282,6 +380,32 @@ class Database {
|
|
|
282
380
|
return wrapError(() => this._native.deleteByFilter(encode(filter), options.namespace ?? null))
|
|
283
381
|
}
|
|
284
382
|
|
|
383
|
+
updateMetadata(id, metadata, options = {}) {
|
|
384
|
+
return wrapError(() =>
|
|
385
|
+
this._native.updateMetadata(id, encode(metadata), options.namespace ?? null),
|
|
386
|
+
)
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
setTtl(id, ttl, options = {}) {
|
|
390
|
+
return wrapError(() => this._native.setTtl(id, ttl, options.namespace ?? null))
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
clearTtl(id, options = {}) {
|
|
394
|
+
return wrapError(() => this._native.clearTtl(id, options.namespace ?? null))
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
createIndex(field, indexType) {
|
|
398
|
+
return wrapError(() => this._native.createIndex(field, indexType))
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
dropIndex(field) {
|
|
402
|
+
return wrapError(() => this._native.dropIndex(field))
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
listIndexes() {
|
|
406
|
+
return wrapError(() => decode(this._native.listIndexes()))
|
|
407
|
+
}
|
|
408
|
+
|
|
285
409
|
flush() {
|
|
286
410
|
return wrapError(() => this._native.flush())
|
|
287
411
|
}
|
|
@@ -299,12 +423,46 @@ class Database {
|
|
|
299
423
|
}
|
|
300
424
|
|
|
301
425
|
search(query = null, options = {}) {
|
|
302
|
-
return
|
|
426
|
+
return withSearchSpan(query, options, () =>
|
|
427
|
+
wrapError(() => decode(this._native.search(query == null ? null : asArray(query), encode(options)))),
|
|
428
|
+
)
|
|
303
429
|
}
|
|
304
430
|
|
|
305
431
|
searchWithStats(query = null, options = {}) {
|
|
306
|
-
return
|
|
307
|
-
|
|
432
|
+
return withSearchSpan(query, options, () =>
|
|
433
|
+
wrapError(() =>
|
|
434
|
+
decode(this._native.searchWithStats(query == null ? null : asArray(query), encode(options))),
|
|
435
|
+
),
|
|
436
|
+
)
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
searchAsync(query = null, options = {}) {
|
|
440
|
+
return withSearchSpan(query, options, () =>
|
|
441
|
+
wrapAsync(
|
|
442
|
+
this._native.searchAsync(query == null ? null : asArray(query), encode(options)),
|
|
443
|
+
).then(decode),
|
|
444
|
+
)
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
searchWithStatsAsync(query = null, options = {}) {
|
|
448
|
+
return withSearchSpan(query, options, () =>
|
|
449
|
+
wrapAsync(
|
|
450
|
+
this._native.searchWithStatsAsync(query == null ? null : asArray(query), encode(options)),
|
|
451
|
+
).then(decode),
|
|
452
|
+
)
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
flushAsync() {
|
|
456
|
+
return wrapAsync(this._native.flushAsync())
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
compactAsync() {
|
|
460
|
+
return wrapAsync(this._native.compactAsync())
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
bulkIngestAsync(records, options = {}) {
|
|
464
|
+
return wrapAsync(
|
|
465
|
+
this._native.bulkIngestAsync(encode(records), options.namespace ?? null, options.batchSize ?? 10_000),
|
|
308
466
|
)
|
|
309
467
|
}
|
|
310
468
|
}
|
|
@@ -345,7 +503,7 @@ class Store {
|
|
|
345
503
|
|
|
346
504
|
function open(path, options = {}) {
|
|
347
505
|
return wrapError(() =>
|
|
348
|
-
new Database(native.open(path, options.dimension ?? null, options.readOnly ?? false, options.lockTimeout ?? null)),
|
|
506
|
+
new Database(native.open(path, options.dimension ?? null, options.readOnly ?? false, options.lockTimeout ?? null, options.metric ?? null)),
|
|
349
507
|
)
|
|
350
508
|
}
|
|
351
509
|
|
|
@@ -393,6 +551,7 @@ module.exports = {
|
|
|
393
551
|
Store,
|
|
394
552
|
Transaction,
|
|
395
553
|
VectLiteError,
|
|
554
|
+
configureOpenTelemetry,
|
|
396
555
|
open,
|
|
397
556
|
openStore,
|
|
398
557
|
restore,
|