vectlite 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,75 +1,200 @@
1
- # Node Binding
1
+ # vectlite
2
2
 
3
- The Node binding now exists in-repo and builds from source.
3
+ [![npm version](https://img.shields.io/npm/v/vectlite.svg)](https://www.npmjs.com/package/vectlite)
4
+ [![Node versions](https://img.shields.io/node/v/vectlite.svg)](https://www.npmjs.com/package/vectlite)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
4
6
 
5
- Current state:
7
+ Embedded vector store for local-first AI applications.
6
8
 
7
- - Rust addon implemented with `napi-rs`
8
- - JavaScript wrapper and TypeScript declarations included
9
- - local smoke test available in `bindings/node/tests`
10
- - npm package can be published as a source-build package
11
- - installing the npm package requires a working Rust toolchain on the target machine
9
+ **vectlite** is a single-file, zero-dependency vector database written in Rust with Node.js bindings. It gives you dense + sparse hybrid search, HNSW indexing, metadata filtering, transactions, and crash-safe persistence in a single `.vdb` file -- no server, no Docker, no network calls.
12
10
 
13
- ## Local Build
14
-
15
- From the repository root:
11
+ ## Installation
16
12
 
17
13
  ```bash
18
- cd bindings/node
19
- npm run build
14
+ npm install vectlite
20
15
  ```
21
16
 
22
- This compiles the Rust addon and writes `bindings/node/vectlite.node`.
17
+ Requires Node.js 18+. Pre-built binaries are available for macOS (x86_64, arm64), Linux (x86_64), and Windows (x86_64). Other platforms fall back to compiling from source (requires Rust/Cargo).
23
18
 
24
- ## Local Test
19
+ ## Quick Start
25
20
 
26
- ```bash
27
- cd bindings/node
28
- npm test
21
+ ```js
22
+ const vectlite = require('vectlite')
23
+
24
+ // Create or open a database
25
+ const db = vectlite.open('knowledge.vdb', { dimension: 384 })
26
+
27
+ // Insert records with vectors, metadata, and sparse terms
28
+ db.upsert('doc1', embedding, { source: 'blog', title: 'Auth Guide' })
29
+ db.upsert('doc2', embedding2, { source: 'notes', title: 'Billing' })
30
+
31
+ // Search with filters
32
+ const results = db.search(embeddingQuery, { k: 5, filter: { source: 'blog' } })
33
+
34
+ // Clean up
35
+ db.compact()
29
36
  ```
30
37
 
31
- ## npm Package Model
38
+ ## Features
39
+
40
+ ### Core
32
41
 
33
- The npm package is set up as a source-build package:
42
+ - **Single-file storage** -- one `.vdb` file per database, portable and easy to back up
43
+ - **Dense vectors** -- cosine similarity with automatic HNSW indexing for large collections
44
+ - **Sparse vectors** -- BM25-scored inverted index for keyword retrieval
45
+ - **Hybrid search** -- dense + sparse fusion with linear or RRF strategies
46
+ - **Rich metadata** -- string, number, boolean, null, array, and nested object values
47
+ - **Crash-safe WAL** -- writes land in a write-ahead log first, then checkpoint with `compact()`
48
+ - **Transactions** -- atomic batched writes with `db.transaction()`
49
+ - **File locking** -- advisory locks prevent corruption from concurrent access
34
50
 
35
- - `prepack` stages a self-contained native crate plus the core Rust crate
36
- - `install` compiles the addon with Cargo on the target machine
37
- - the published tarball does not ship prebuilt binaries yet
51
+ ### Search & Retrieval
38
52
 
39
- That keeps one source of truth for the Rust core, but it means `npm install vectlite` requires:
53
+ - **Metadata filters** -- MongoDB-style operators: `$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`, `$in`, `$nin`, `$contains`, `$exists`, `$and`, `$or`, `$not`
54
+ - **Nested filters** -- dot-path traversal (`author.name`), `$elemMatch`, `$size` on arrays and objects
55
+ - **Named vectors** -- multiple vector spaces per record (`vectors: { title: [...], body: [...] }`)
56
+ - **Multi-vector queries** -- weighted search across vector spaces in a single call
57
+ - **MMR diversification** -- `mmrLambda` controls relevance vs. diversity trade-off
58
+ - **Namespaces** -- logical isolation with per-namespace or cross-namespace search
59
+ - **Observability** -- `searchWithStats()` returns timings, BM25 term scores, ANN stats, and per-result explain payloads
40
60
 
41
- - Node 18+
42
- - Rust/Cargo installed
43
- - registry/network access to fetch Rust crates during the build
61
+ ### Data Management
62
+
63
+ - **Physical collections** -- `vectlite.openStore()` manages a directory of independent databases
64
+ - **Bulk ingestion** -- `bulkIngest()` with deferred index rebuilds for fast imports
65
+ - **Snapshots** -- `db.snapshot(path)` creates a self-contained copy
66
+ - **Backup / Restore** -- `db.backup(dir)` and `vectlite.restore(dir, path)` for full roundtrips
67
+ - **Read-only mode** -- `vectlite.open(path, { readOnly: true })` for safe concurrent readers
44
68
 
45
69
  ## Usage
46
70
 
71
+ ### Hybrid Search
72
+
47
73
  ```js
48
- const { open, sparseTerms } = require('./index.js')
74
+ const vectlite = require('vectlite')
49
75
 
50
- const db = open('knowledge.vdb', { dimension: 384 })
51
- db.upsert('doc1', embedding, { source: 'notes', title: 'Auth Guide' })
76
+ const db = vectlite.open('knowledge.vdb', { dimension: 384 })
52
77
 
78
+ // Upsert with dense + sparse vectors
79
+ db.upsert(
80
+ 'doc1',
81
+ denseEmbedding,
82
+ { source: 'docs', title: 'Auth Setup', text: 'How to configure SSO...' },
83
+ { sparse: vectlite.sparseTerms('How to configure SSO authentication') },
84
+ )
85
+
86
+ // Hybrid search
53
87
  const results = db.search(queryEmbedding, {
54
- k: 5,
55
- sparse: sparseTerms('auth guide'),
56
- filter: { source: 'notes' },
88
+ k: 10,
89
+ sparse: vectlite.sparseTerms('SSO authentication'),
90
+ fusion: 'rrf',
91
+ filter: { source: 'docs' },
92
+ explain: true,
57
93
  })
94
+
95
+ for (const result of results) {
96
+ console.log(result.id, result.score)
97
+ }
98
+ ```
99
+
100
+ ### Collections
101
+
102
+ ```js
103
+ const store = vectlite.openStore('./my_collections')
104
+ const products = store.createCollection('products', 384)
105
+ products.upsert('p1', embedding, { name: 'Widget', price: 9.99 })
106
+
107
+ const logs = store.openOrCreateCollection('logs', 128)
108
+ console.log(store.collections()) // ["logs", "products"]
109
+ ```
110
+
111
+ ### Transactions
112
+
113
+ ```js
114
+ const tx = db.transaction()
115
+ try {
116
+ tx.upsert('doc1', emb1, { source: 'a' })
117
+ tx.upsert('doc2', emb2, { source: 'b' })
118
+ tx.delete('old_doc')
119
+ tx.commit() // All operations commit atomically
120
+ } catch (err) {
121
+ tx.rollback() // Roll back on error
122
+ throw err
123
+ }
124
+ ```
125
+
126
+ ### Text Helpers
127
+
128
+ ```js
129
+ // Handles embedding + sparse term generation for you
130
+ vectlite.upsertText(db, 'doc1', 'Auth setup guide', embedFn, { source: 'docs' })
131
+ const results = vectlite.searchText(db, 'how to authenticate', embedFn, { k: 5 })
58
132
  ```
59
133
 
60
- ## Scope
134
+ ### Snapshots & Backup
61
135
 
62
- The initial Node surface covers the core database and store operations:
136
+ ```js
137
+ db.snapshot('/backups/knowledge_2024.vdb') // Self-contained copy
138
+ db.backup('/backups/full/') // Full backup with ANN sidecars
139
+
140
+ const restored = vectlite.restore('/backups/full/', 'restored.vdb')
141
+ ```
63
142
 
64
- - `open`, `openStore`, `restore`
65
- - `insert`, `upsert`, `get`, `delete`
66
- - batch writes and bulk ingest
67
- - snapshots, backup, compact, flush
68
- - namespaces and collections
69
- - dense, sparse, and hybrid search
70
- - search stats and text helpers
143
+ ### Read-Only Mode
71
144
 
72
- Not yet included:
145
+ ```js
146
+ const ro = vectlite.open('knowledge.vdb', { readOnly: true })
147
+ const results = ro.search(query, { k: 5 }) // Reads work
148
+ ro.upsert(...) // Throws VectLiteError
149
+ ```
150
+
151
+ ### Search Diagnostics
152
+
153
+ ```js
154
+ const outcome = db.searchWithStats(query, {
155
+ k: 5,
156
+ sparse: terms,
157
+ explain: true,
158
+ })
159
+
160
+ console.log(outcome.stats.timings) // { dense_us: 120, sparse_us: 45, ... }
161
+ console.log(outcome.stats.used_ann) // true
162
+ console.log(outcome.results[0].explain) // Detailed scoring breakdown
163
+ ```
73
164
 
74
- - JS callback rerank hooks
75
- - prebuilt binaries
165
+ ## Filter Operators
166
+
167
+ | Operator | Example | Description |
168
+ |---|---|---|
169
+ | `$eq` | `{ field: { $eq: 'value' } }` | Equal (also `{ field: 'value' }`) |
170
+ | `$ne` | `{ field: { $ne: 'value' } }` | Not equal |
171
+ | `$gt` / `$gte` | `{ field: { $gt: 5 } }` | Greater than (or equal) |
172
+ | `$lt` / `$lte` | `{ field: { $lt: 20 } }` | Less than (or equal) |
173
+ | `$in` / `$nin` | `{ field: { $in: ['a', 'b'] } }` | In / not in set |
174
+ | `$contains` | `{ field: { $contains: 'auth' } }` | Substring match |
175
+ | `$exists` | `{ field: { $exists: true } }` | Field presence |
176
+ | `$and` / `$or` | `{ $and: [{...}, {...}] }` | Logical combinators |
177
+ | `$not` | `{ $not: {...} }` | Logical negation |
178
+ | `$elemMatch` | `{ tags: { $elemMatch: { $eq: 'rust' } } }` | Match array elements |
179
+ | `$size` | `{ tags: { $size: 3 } }` | Array length |
180
+ | dot-path | `{ 'author.name': 'Alice' }` | Nested field access |
181
+
182
+ ## How It Works
183
+
184
+ - Records are stored in a compact binary `.vdb` snapshot file
185
+ - Writes go through a crash-safe WAL (`.wal`) before being applied in memory
186
+ - `compact()` folds the WAL into the snapshot and persists HNSW sidecar files
187
+ - Dense search uses HNSW indexes (auto-built for collections above ~128 records)
188
+ - Sparse search uses an inverted index with BM25 scoring
189
+ - Hybrid fusion combines dense + sparse via linear combination or reciprocal rank fusion
190
+ - Advisory file locks (`flock`) prevent concurrent write corruption
191
+
192
+ ## Links
193
+
194
+ - [GitHub Repository](https://github.com/mcsedition-hub/vectlite)
195
+ - [Issue Tracker](https://github.com/mcsedition-hub/vectlite/issues)
196
+ - [PyPI Package](https://pypi.org/project/vectlite/)
197
+
198
+ ## License
199
+
200
+ MIT
package/index.js CHANGED
@@ -1,6 +1,66 @@
1
1
  'use strict'
2
2
 
3
- const native = require('./vectlite.node')
3
+ const fs = require('node:fs')
4
+ const path = require('node:path')
5
+
6
+ function linuxLibc() {
7
+ if (process.platform !== 'linux') {
8
+ return null
9
+ }
10
+
11
+ const report = process.report?.getReport?.()
12
+ return report?.header?.glibcVersionRuntime ? 'gnu' : 'musl'
13
+ }
14
+
15
+ function runtimePrebuildTag() {
16
+ switch (process.platform) {
17
+ case 'darwin':
18
+ if (process.arch === 'x64') return 'darwin-x64'
19
+ if (process.arch === 'arm64') return 'darwin-arm64'
20
+ return null
21
+ case 'linux': {
22
+ const libc = linuxLibc()
23
+ if (process.arch === 'x64' && libc === 'gnu') return 'linux-x64-gnu'
24
+ if (process.arch === 'arm64' && libc === 'gnu') return 'linux-arm64-gnu'
25
+ return null
26
+ }
27
+ case 'win32':
28
+ if (process.arch === 'x64') return 'win32-x64-msvc'
29
+ if (process.arch === 'arm64') return 'win32-arm64-msvc'
30
+ return null
31
+ default:
32
+ return null
33
+ }
34
+ }
35
+
36
+ function loadNative() {
37
+ const candidates = []
38
+ const prebuildTag = runtimePrebuildTag()
39
+ if (prebuildTag != null) {
40
+ candidates.push(path.join(__dirname, 'prebuilds', prebuildTag, 'vectlite.node'))
41
+ }
42
+ candidates.push(path.join(__dirname, 'vectlite.node'))
43
+
44
+ const errors = []
45
+ for (const candidate of candidates) {
46
+ if (!fs.existsSync(candidate)) {
47
+ continue
48
+ }
49
+ try {
50
+ return require(candidate)
51
+ } catch (error) {
52
+ errors.push(`${candidate}: ${error?.message ?? String(error)}`)
53
+ }
54
+ }
55
+
56
+ const detail = errors.length === 0 ? 'No compatible prebuilt binary was found.' : errors.join('\n')
57
+ throw new Error(
58
+ `Unable to load the vectlite native addon.\n${detail}\n` +
59
+ 'If this platform is not covered by prebuilt binaries, install Rust/Cargo and reinstall the package.',
60
+ )
61
+ }
62
+
63
+ const native = loadNative()
4
64
 
5
65
  const TOKEN_RE = /[a-z0-9]+/g
6
66
 
package/native/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "vectlite-node"
3
- version = "0.1.3"
3
+ version = "0.1.4"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "Node.js bindings for vectlite."
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "vectlite-core"
3
- version = "0.1.3"
3
+ version = "0.1.4"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "Core storage engine for vectlite."
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vectlite",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "description": "Embedded vector store for local-first AI applications.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -28,12 +28,13 @@
28
28
  "index.d.ts",
29
29
  "README.md",
30
30
  "scripts",
31
- "native"
31
+ "native",
32
+ "prebuilds"
32
33
  ],
33
34
  "scripts": {
34
35
  "build": "node ./scripts/build-addon.mjs",
35
- "test": "npm run build && node --test tests/smoke.test.cjs",
36
- "install": "node ./scripts/build-addon.mjs",
36
+ "test": "npm run build && node --test tests/smoke.test.cjs && node --test tests/prebuild-loader.test.cjs",
37
+ "install": "node ./scripts/install-addon.mjs",
37
38
  "prepack": "node ./scripts/prepare-package.mjs",
38
39
  "postpack": "node ./scripts/clean-package.mjs"
39
40
  },
@@ -0,0 +1,27 @@
1
+ import { cpSync, existsSync, mkdirSync, readdirSync } from 'node:fs'
2
+ import { join, resolve } from 'node:path'
3
+ import { dirname } from 'node:path'
4
+ import { fileURLToPath } from 'node:url'
5
+
6
+ const __dirname = dirname(fileURLToPath(import.meta.url))
7
+ const packageRoot = resolve(__dirname, '..')
8
+ const sourceRoot = resolve(process.argv[2] ?? join(packageRoot, '..', '..', 'dist', 'node-prebuilds'))
9
+ const destRoot = join(packageRoot, 'prebuilds')
10
+
11
+ for (const entry of readdirSync(sourceRoot, { withFileTypes: true })) {
12
+ if (!entry.isDirectory() || !entry.name.startsWith('prebuild-')) {
13
+ continue
14
+ }
15
+
16
+ const prebuildTag = entry.name.slice('prebuild-'.length)
17
+ const source = join(sourceRoot, entry.name, 'vectlite.node')
18
+ if (!existsSync(source)) {
19
+ console.error(`Missing prebuilt artifact for ${prebuildTag}: ${source}`)
20
+ process.exit(1)
21
+ }
22
+
23
+ const destDir = join(destRoot, prebuildTag)
24
+ mkdirSync(destDir, { recursive: true })
25
+ cpSync(source, join(destDir, 'vectlite.node'))
26
+ console.log(`Collected ${prebuildTag}`)
27
+ }
@@ -0,0 +1,53 @@
1
+ import { existsSync } from 'node:fs'
2
+ import { join, resolve } from 'node:path'
3
+ import { dirname } from 'node:path'
4
+ import { fileURLToPath } from 'node:url'
5
+ import { spawnSync } from 'node:child_process'
6
+
7
+ const __dirname = dirname(fileURLToPath(import.meta.url))
8
+ const packageRoot = resolve(__dirname, '..')
9
+
10
+ function linuxLibc() {
11
+ if (process.platform !== 'linux') {
12
+ return null
13
+ }
14
+
15
+ const report = process.report?.getReport?.()
16
+ return report?.header?.glibcVersionRuntime ? 'gnu' : 'musl'
17
+ }
18
+
19
+ function runtimePrebuildTag() {
20
+ switch (process.platform) {
21
+ case 'darwin':
22
+ if (process.arch === 'x64') return 'darwin-x64'
23
+ if (process.arch === 'arm64') return 'darwin-arm64'
24
+ return null
25
+ case 'linux': {
26
+ const libc = linuxLibc()
27
+ if (process.arch === 'x64' && libc === 'gnu') return 'linux-x64-gnu'
28
+ if (process.arch === 'arm64' && libc === 'gnu') return 'linux-arm64-gnu'
29
+ return null
30
+ }
31
+ case 'win32':
32
+ if (process.arch === 'x64') return 'win32-x64-msvc'
33
+ if (process.arch === 'arm64') return 'win32-arm64-msvc'
34
+ return null
35
+ default:
36
+ return null
37
+ }
38
+ }
39
+
40
+ const prebuildTag = runtimePrebuildTag()
41
+ const prebuiltPath =
42
+ prebuildTag == null ? null : join(packageRoot, 'prebuilds', prebuildTag, 'vectlite.node')
43
+
44
+ if (prebuiltPath != null && existsSync(prebuiltPath)) {
45
+ console.log(`Using prebuilt addon: ${prebuiltPath}`)
46
+ process.exit(0)
47
+ }
48
+
49
+ const result = spawnSync(process.execPath, [join(__dirname, 'build-addon.mjs')], {
50
+ stdio: 'inherit',
51
+ })
52
+
53
+ process.exit(result.status ?? 1)
@@ -0,0 +1,56 @@
1
+ import { cpSync, existsSync, mkdirSync } from 'node:fs'
2
+ import { join, resolve } from 'node:path'
3
+ import { dirname } from 'node:path'
4
+ import { fileURLToPath } from 'node:url'
5
+
6
+ const __dirname = dirname(fileURLToPath(import.meta.url))
7
+ const packageRoot = resolve(__dirname, '..')
8
+
9
+ function linuxLibc() {
10
+ if (process.platform !== 'linux') {
11
+ return null
12
+ }
13
+
14
+ const report = process.report?.getReport?.()
15
+ return report?.header?.glibcVersionRuntime ? 'gnu' : 'musl'
16
+ }
17
+
18
+ function runtimePrebuildTag() {
19
+ switch (process.platform) {
20
+ case 'darwin':
21
+ if (process.arch === 'x64') return 'darwin-x64'
22
+ if (process.arch === 'arm64') return 'darwin-arm64'
23
+ return null
24
+ case 'linux': {
25
+ const libc = linuxLibc()
26
+ if (process.arch === 'x64' && libc === 'gnu') return 'linux-x64-gnu'
27
+ if (process.arch === 'arm64' && libc === 'gnu') return 'linux-arm64-gnu'
28
+ return null
29
+ }
30
+ case 'win32':
31
+ if (process.arch === 'x64') return 'win32-x64-msvc'
32
+ if (process.arch === 'arm64') return 'win32-arm64-msvc'
33
+ return null
34
+ default:
35
+ return null
36
+ }
37
+ }
38
+
39
+ const prebuildTag = process.env.VECTLITE_PREBUILD_TAG ?? runtimePrebuildTag()
40
+ if (prebuildTag == null) {
41
+ console.error('Unable to determine a prebuild tag for this platform.')
42
+ process.exit(1)
43
+ }
44
+
45
+ const source = join(packageRoot, 'vectlite.node')
46
+ if (!existsSync(source)) {
47
+ console.error(`Missing built addon at ${source}. Run the build first.`)
48
+ process.exit(1)
49
+ }
50
+
51
+ const destDir = join(packageRoot, 'prebuilds', prebuildTag)
52
+ const dest = join(destDir, 'vectlite.node')
53
+
54
+ mkdirSync(destDir, { recursive: true })
55
+ cpSync(source, dest)
56
+ console.log(`Staged prebuilt ${prebuildTag}: ${dest}`)