@sochdb/sochdb 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +545 -8
- package/_bin/aarch64-apple-darwin/libsochdb_storage.dylib +0 -0
- package/_bin/aarch64-apple-darwin/sochdb-bulk +0 -0
- package/_bin/aarch64-apple-darwin/sochdb-grpc-server +0 -0
- package/_bin/aarch64-apple-darwin/sochdb-server +0 -0
- package/_bin/x86_64-pc-windows-msvc/sochdb-bulk.exe +0 -0
- package/_bin/x86_64-pc-windows-msvc/sochdb-grpc-server.exe +0 -0
- package/_bin/x86_64-pc-windows-msvc/sochdb_storage.dll +0 -0
- package/_bin/x86_64-unknown-linux-gnu/libsochdb_storage.so +0 -0
- package/_bin/x86_64-unknown-linux-gnu/sochdb-bulk +0 -0
- package/_bin/x86_64-unknown-linux-gnu/sochdb-grpc-server +0 -0
- package/_bin/x86_64-unknown-linux-gnu/sochdb-server +0 -0
- package/dist/cjs/embedded/database.js +71 -4
- package/dist/cjs/embedded/ffi/bindings.js +11 -1
- package/dist/cjs/embedded/ffi/hnsw-bindings.js +295 -0
- package/dist/cjs/embedded/ffi/library-finder.js +10 -3
- package/dist/cjs/embedded/index.js +5 -2
- package/dist/cjs/index.js +9 -6
- package/dist/esm/embedded/database.js +71 -4
- package/dist/esm/embedded/ffi/bindings.js +13 -1
- package/dist/esm/embedded/ffi/hnsw-bindings.js +316 -0
- package/dist/esm/embedded/ffi/library-finder.js +10 -3
- package/dist/esm/embedded/index.js +5 -2
- package/dist/esm/index.js +9 -6
- package/dist/types/embedded/database.d.ts +54 -1
- package/dist/types/embedded/database.d.ts.map +1 -1
- package/dist/types/embedded/ffi/bindings.d.ts +2 -0
- package/dist/types/embedded/ffi/bindings.d.ts.map +1 -1
- package/dist/types/embedded/ffi/hnsw-bindings.d.ts +90 -0
- package/dist/types/embedded/ffi/hnsw-bindings.d.ts.map +1 -0
- package/dist/types/embedded/ffi/library-finder.d.ts.map +1 -1
- package/dist/types/embedded/index.d.ts +1 -0
- package/dist/types/embedded/index.d.ts.map +1 -1
- package/dist/types/index.d.ts +4 -2
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,8 +1,212 @@
|
|
|
1
1
|
# SochDB Node.js SDK
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**LLM-Optimized Embedded Database with Native Vector Search**
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @sochdb/sochdb
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or from source:
|
|
14
|
+
```bash
|
|
15
|
+
cd sochdb-typescript-sdk
|
|
16
|
+
npm install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Architecture: Flexible Deployment
|
|
22
|
+
|
|
23
|
+
**Tri-mode architecture: Embedded + Concurrent + Server (gRPC/IPC)**
|
|
4
24
|
Choose the deployment mode that fits your needs.
|
|
5
25
|
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
# SochDB Node.js SDK Documentation
|
|
29
|
+
|
|
30
|
+
**LLM-Optimized Embedded Database with Native Vector Search**
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Table of Contents
|
|
35
|
+
|
|
36
|
+
1. [Quick Start](#1-quick-start)
|
|
37
|
+
2. [Installation](#2-installation)
|
|
38
|
+
3. [Features](#3-features)
|
|
39
|
+
- [Namespace API](#namespace-api---multi-tenant-isolation)
|
|
40
|
+
- [Priority Queue API](#priority-queue-api---task-processing)
|
|
41
|
+
4. [Architecture Overview](#4-architecture-overview)
|
|
42
|
+
5. [Core Key-Value Operations](#5-core-key-value-operations)
|
|
43
|
+
6. [Transactions (ACID with SSI)](#6-transactions-acid-with-ssi)
|
|
44
|
+
7. [Query Builder](#7-query-builder)
|
|
45
|
+
8. [Prefix Scanning](#8-prefix-scanning)
|
|
46
|
+
9. [SQL Operations](#9-sql-operations)
|
|
47
|
+
10. [Table Management & Index Policies](#10-table-management--index-policies)
|
|
48
|
+
11. [Namespaces & Collections](#11-namespaces--collections)
|
|
49
|
+
12. [Priority Queues](#12-priority-queues)
|
|
50
|
+
13. [Vector Search](#13-vector-search)
|
|
51
|
+
14. [Hybrid Search (Vector + BM25)](#14-hybrid-search-vector--bm25)
|
|
52
|
+
15. [Graph Operations](#15-graph-operations)
|
|
53
|
+
16. [Temporal Graph (Time-Travel)](#16-temporal-graph-time-travel)
|
|
54
|
+
17. [Semantic Cache](#17-semantic-cache)
|
|
55
|
+
18. [Memory System](#18-memory-system)
|
|
56
|
+
19. [Session Management](#19-session-management)
|
|
57
|
+
20. [Context Query Builder (LLM Optimization)](#20-context-query-builder-llm-optimization)
|
|
58
|
+
21. [Atomic Multi-Index Writes](#21-atomic-multi-index-writes)
|
|
59
|
+
22. [Recovery & WAL Management](#22-recovery--wal-management)
|
|
60
|
+
23. [Checkpoints & Snapshots](#23-checkpoints--snapshots)
|
|
61
|
+
24. [Compression & Storage](#24-compression--storage)
|
|
62
|
+
25. [Statistics & Monitoring](#25-statistics--monitoring)
|
|
63
|
+
26. [Distributed Tracing](#26-distributed-tracing)
|
|
64
|
+
27. [Workflow & Run Tracking](#27-workflow--run-tracking)
|
|
65
|
+
28. [Server Mode (gRPC Client)](#28-server-mode-grpc-client)
|
|
66
|
+
29. [IPC Client (Unix Sockets)](#29-ipc-client-unix-sockets)
|
|
67
|
+
30. [Standalone VectorIndex](#30-standalone-vectorindex)
|
|
68
|
+
31. [Vector Utilities](#31-vector-utilities)
|
|
69
|
+
32. [Data Formats (TOON/JSON/Columnar)](#32-data-formats-toonjsoncolumnar)
|
|
70
|
+
33. [Policy Service](#33-policy-service)
|
|
71
|
+
34. [MCP (Model Context Protocol)](#34-mcp-model-context-protocol)
|
|
72
|
+
35. [Configuration Reference](#35-configuration-reference)
|
|
73
|
+
36. [Error Handling](#36-error-handling)
|
|
74
|
+
37. [Async Support](#37-async-support)
|
|
75
|
+
38. [Building & Development](#38-building--development)
|
|
76
|
+
39. [Complete Examples](#39-complete-examples)
|
|
77
|
+
40. [Migration Guide](#40-migration-guide)
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## 1. Quick Start
|
|
82
|
+
|
|
83
|
+
### Concurrent Embedded Mode
|
|
84
|
+
|
|
85
|
+
For web applications with multiple Node.js processes (PM2 cluster, multiple workers):
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
import { EmbeddedDatabase } from '@sochdb/sochdb';
|
|
89
|
+
import express from 'express';
|
|
90
|
+
|
|
91
|
+
// Open in concurrent mode - multiple processes can access simultaneously
|
|
92
|
+
const db = EmbeddedDatabase.openConcurrent('./web_db');
|
|
93
|
+
|
|
94
|
+
const app = express();
|
|
95
|
+
|
|
96
|
+
app.get('/user/:id', async (req, res) => {
|
|
97
|
+
// Multiple concurrent requests can read simultaneously (~100ns)
|
|
98
|
+
const data = await db.get(Buffer.from(`user:${req.params.id}`));
|
|
99
|
+
if (!data) {
|
|
100
|
+
res.status(404).json({ error: 'not found' });
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
res.send(data);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
app.post('/user/:id', async (req, res) => {
|
|
107
|
+
// Writes are automatically coordinated (~60µs amortized)
|
|
108
|
+
await db.put(Buffer.from(`user:${req.params.id}`), req.body);
|
|
109
|
+
res.json({ status: 'ok' });
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// Check concurrent mode status
|
|
113
|
+
console.log(`Concurrent mode: ${db.isConcurrent}`); // true
|
|
114
|
+
|
|
115
|
+
// Start with PM2 cluster mode (multiple workers can access DB)
|
|
116
|
+
// pm2 start app.js -i max
|
|
117
|
+
app.listen(3000);
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Performance
|
|
121
|
+
|
|
122
|
+
| Operation | Standard Mode | Concurrent Mode |
|
|
123
|
+
|-----------|---------------|-----------------|
|
|
124
|
+
| Read (single process) | ~100ns | ~100ns |
|
|
125
|
+
| Read (multi-process) | **Blocked** ❌ | ~100ns ✅ |
|
|
126
|
+
| Write | ~5ms (fsync) | ~60µs (amortized) |
|
|
127
|
+
| Max concurrent readers | 1 | 1024 |
|
|
128
|
+
|
|
129
|
+
### PM2 Cluster Example
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Install PM2
|
|
133
|
+
npm install -g pm2
|
|
134
|
+
|
|
135
|
+
# Start with automatic worker scaling
|
|
136
|
+
pm2 start server.js -i max
|
|
137
|
+
|
|
138
|
+
# All workers can access the same database concurrently!
|
|
139
|
+
pm2 logs
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### PM2 Ecosystem File
|
|
143
|
+
|
|
144
|
+
```javascript
|
|
145
|
+
// ecosystem.config.js
|
|
146
|
+
module.exports = {
|
|
147
|
+
apps: [{
|
|
148
|
+
name: 'api-server',
|
|
149
|
+
script: './server.js',
|
|
150
|
+
instances: 'max', // Scale across all CPU cores
|
|
151
|
+
exec_mode: 'cluster',
|
|
152
|
+
env: {
|
|
153
|
+
NODE_ENV: 'production',
|
|
154
|
+
DB_PATH: './shared_db' // All workers use same DB
|
|
155
|
+
}
|
|
156
|
+
}]
|
|
157
|
+
};
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
# Deploy with ecosystem file
|
|
162
|
+
pm2 start ecosystem.config.js
|
|
163
|
+
|
|
164
|
+
# Monitor all workers
|
|
165
|
+
pm2 monit
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Docker Compose with PM2
|
|
169
|
+
|
|
170
|
+
```yaml
|
|
171
|
+
version: '3.8'
|
|
172
|
+
services:
|
|
173
|
+
app:
|
|
174
|
+
build: .
|
|
175
|
+
environment:
|
|
176
|
+
- NODE_ENV=production
|
|
177
|
+
- INSTANCES=4 # 4 PM2 workers
|
|
178
|
+
volumes:
|
|
179
|
+
- ./data:/app/data # Shared database volume
|
|
180
|
+
ports:
|
|
181
|
+
- "3000:3000"
|
|
182
|
+
command: pm2-runtime start ecosystem.config.js
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Kubernetes Deployment
|
|
186
|
+
|
|
187
|
+
```yaml
|
|
188
|
+
apiVersion: apps/v1
|
|
189
|
+
kind: Deployment
|
|
190
|
+
metadata:
|
|
191
|
+
name: sochdb-app
|
|
192
|
+
spec:
|
|
193
|
+
replicas: 4 # 4 pods share the database
|
|
194
|
+
template:
|
|
195
|
+
spec:
|
|
196
|
+
containers:
|
|
197
|
+
- name: app
|
|
198
|
+
image: myapp:latest
|
|
199
|
+
volumeMounts:
|
|
200
|
+
- name: db-storage
|
|
201
|
+
mountPath: /app/data
|
|
202
|
+
volumes:
|
|
203
|
+
- name: db-storage
|
|
204
|
+
persistentVolumeClaim:
|
|
205
|
+
claimName: sochdb-pvc # Shared PVC with ReadWriteMany
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
6
210
|
## Features
|
|
7
211
|
|
|
8
212
|
### Memory System - LLM-Native Memory for AI Agents
|
|
@@ -239,21 +443,354 @@ console.log(`Pending: ${stats.pending}, Completed: ${stats.completed}`);
|
|
|
239
443
|
|
|
240
444
|
---
|
|
241
445
|
|
|
242
|
-
|
|
446
|
+
---
|
|
243
447
|
|
|
448
|
+
## System Requirements
|
|
449
|
+
|
|
450
|
+
### For Concurrent Mode
|
|
451
|
+
|
|
452
|
+
- **SochDB Core**: Latest version
|
|
453
|
+
- **Node.js**: 14.0+ (18.0+ recommended)
|
|
454
|
+
- **Native Library**: `libsochdb_storage.{dylib,so}`
|
|
455
|
+
- **FFI**: Koffi (automatically installed)
|
|
456
|
+
|
|
457
|
+
**Operating Systems:**
|
|
458
|
+
- ✅ Linux (Ubuntu 20.04+, RHEL 8+)
|
|
459
|
+
- ✅ macOS (10.15+, both Intel and Apple Silicon)
|
|
460
|
+
- ⚠️ Windows (requires native builds)
|
|
461
|
+
|
|
462
|
+
**File Descriptors:**
|
|
463
|
+
- Default limit: 1024 (sufficient for most workloads)
|
|
464
|
+
- For high concurrency with PM2: `ulimit -n 4096`
|
|
465
|
+
|
|
466
|
+
**Memory:**
|
|
467
|
+
- Standard mode: ~50MB base + data
|
|
468
|
+
- Concurrent mode: +4KB per concurrent reader slot (1024 slots = ~4MB overhead)
|
|
469
|
+
- PM2 cluster: Each worker has independent memory
|
|
470
|
+
|
|
471
|
+
---
|
|
472
|
+
|
|
473
|
+
## Troubleshooting
|
|
474
|
+
|
|
475
|
+
### "Database is locked" Error (Standard Mode)
|
|
476
|
+
|
|
477
|
+
```
|
|
478
|
+
Error: SQLITE_BUSY: database is locked
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
**Solution**: Use concurrent mode for multi-process access:
|
|
482
|
+
|
|
483
|
+
```typescript
|
|
484
|
+
// ❌ Standard mode - PM2 cluster will fail
|
|
485
|
+
const db = new EmbeddedDatabase('./data.db');
|
|
486
|
+
|
|
487
|
+
// ✅ Concurrent mode - PM2 cluster works!
|
|
488
|
+
const db = EmbeddedDatabase.openConcurrent('./data.db');
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
### Library Not Found Error
|
|
492
|
+
|
|
493
|
+
```
|
|
494
|
+
Error: Dynamic library 'libsochdb_storage.dylib' not found
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
**macOS**:
|
|
244
498
|
```bash
|
|
245
|
-
|
|
499
|
+
# Build and install library
|
|
500
|
+
cd /path/to/sochdb
|
|
501
|
+
cargo build --release
|
|
502
|
+
sudo cp target/release/libsochdb_storage.dylib /usr/local/lib/
|
|
246
503
|
```
|
|
247
504
|
|
|
248
|
-
|
|
505
|
+
**Linux**:
|
|
249
506
|
```bash
|
|
250
|
-
cd sochdb
|
|
251
|
-
|
|
507
|
+
cd /path/to/sochdb
|
|
508
|
+
cargo build --release
|
|
509
|
+
sudo cp target/release/libsochdb_storage.so /usr/local/lib/
|
|
510
|
+
sudo ldconfig
|
|
252
511
|
```
|
|
253
512
|
|
|
513
|
+
**Development Mode** (no install):
|
|
514
|
+
```bash
|
|
515
|
+
export DYLD_LIBRARY_PATH=/path/to/sochdb/target/release # macOS
|
|
516
|
+
export LD_LIBRARY_PATH=/path/to/sochdb/target/release # Linux
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
### PM2 Cluster Issues
|
|
520
|
+
|
|
521
|
+
**Symptom**: Workers crash with "database locked"
|
|
522
|
+
|
|
523
|
+
**Solution**: Ensure concurrent mode is used:
|
|
524
|
+
```javascript
|
|
525
|
+
// ecosystem.config.js
|
|
526
|
+
module.exports = {
|
|
527
|
+
apps: [{
|
|
528
|
+
name: 'api',
|
|
529
|
+
script: './server.js',
|
|
530
|
+
instances: 4,
|
|
531
|
+
exec_mode: 'cluster',
|
|
532
|
+
env: {
|
|
533
|
+
USE_CONCURRENT_MODE: 'true' // Flag to use openConcurrent()
|
|
534
|
+
}
|
|
535
|
+
}]
|
|
536
|
+
};
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
```typescript
|
|
540
|
+
// server.ts
|
|
541
|
+
const db = process.env.USE_CONCURRENT_MODE
|
|
542
|
+
? EmbeddedDatabase.openConcurrent('./db')
|
|
543
|
+
: new EmbeddedDatabase('./db');
|
|
544
|
+
|
|
545
|
+
console.log('Concurrent mode:', db.isConcurrent); // Should be true
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
### Docker Volume Permissions
|
|
549
|
+
|
|
550
|
+
**Symptom**: `EACCES: permission denied` when opening database
|
|
551
|
+
|
|
552
|
+
**Solution**: Fix volume ownership:
|
|
553
|
+
```dockerfile
|
|
554
|
+
FROM node:18
|
|
555
|
+
WORKDIR /app
|
|
556
|
+
|
|
557
|
+
# Create data directory with correct permissions
|
|
558
|
+
RUN mkdir -p /app/data && chown -R node:node /app
|
|
559
|
+
|
|
560
|
+
# Switch to non-root user
|
|
561
|
+
USER node
|
|
562
|
+
|
|
563
|
+
COPY --chown=node:node . .
|
|
564
|
+
RUN npm install
|
|
565
|
+
|
|
566
|
+
CMD ["npm", "start"]
|
|
567
|
+
```
|
|
568
|
+
|
|
569
|
+
### Performance Issues
|
|
570
|
+
|
|
571
|
+
**Symptom**: Concurrent reads slower than expected
|
|
572
|
+
|
|
573
|
+
**Check 1** - Verify concurrent mode:
|
|
574
|
+
```typescript
|
|
575
|
+
if (!db.isConcurrent) {
|
|
576
|
+
console.error('Database is not in concurrent mode!');
|
|
577
|
+
process.exit(1);
|
|
578
|
+
}
|
|
579
|
+
```
|
|
580
|
+
|
|
581
|
+
**Check 2** - Monitor PM2 workers:
|
|
582
|
+
```bash
|
|
583
|
+
pm2 monit # Real-time monitoring
|
|
584
|
+
pm2 logs --lines 200 # Check for errors
|
|
585
|
+
```
|
|
586
|
+
|
|
587
|
+
**Check 3** - Batch writes:
|
|
588
|
+
```typescript
|
|
589
|
+
// ❌ Slow - individual writes
|
|
590
|
+
for (const item of items) {
|
|
591
|
+
await collection.insert(item);
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
// ✅ Fast - batch write
|
|
595
|
+
await collection.insertBatch(items);
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
---
|
|
599
|
+
|
|
600
|
+
## 🆕 Vector Search - Native HNSW
|
|
601
|
+
|
|
602
|
+
SochDB now includes **native HNSW (Hierarchical Navigable Small World)** vector search for sub-millisecond similarity search across millions of vectors.
|
|
603
|
+
|
|
604
|
+
### Quick Start - Vector Search
|
|
605
|
+
|
|
606
|
+
```typescript
|
|
607
|
+
import { HnswIndex } from '@sochdb/sochdb';
|
|
608
|
+
|
|
609
|
+
// Create HNSW index
|
|
610
|
+
const index = new HnswIndex({
|
|
611
|
+
dimension: 384, // Vector dimension
|
|
612
|
+
maxConnections: 16, // M parameter (default: 16)
|
|
613
|
+
efConstruction: 200, // Build quality (default: 200)
|
|
614
|
+
efSearch: 100 // Search quality (default: 100)
|
|
615
|
+
});
|
|
616
|
+
|
|
617
|
+
// Insert vectors (batch is 10-100× faster)
|
|
618
|
+
index.insertBatch(
|
|
619
|
+
['doc1', 'doc2', 'doc3'],
|
|
620
|
+
[[1.0, 2.0, ...], [3.0, 4.0, ...], [5.0, 6.0, ...]]
|
|
621
|
+
);
|
|
622
|
+
|
|
623
|
+
// Search for similar vectors
|
|
624
|
+
const results = index.search(queryVector, 10);
|
|
625
|
+
console.log(results);
|
|
626
|
+
// [{ id: 'doc1', distance: 0.15 }, { id: 'doc3', distance: 0.23 }, ...]
|
|
627
|
+
|
|
628
|
+
// Clean up
|
|
629
|
+
index.close();
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
### Performance Comparison
|
|
633
|
+
|
|
634
|
+
| Implementation | 10K vectors | 100K vectors | 1M vectors |
|
|
635
|
+
|----------------|-------------|--------------|------------|
|
|
636
|
+
| **Linear Scan (old)** | ~50ms | ~500ms | ~5000ms |
|
|
637
|
+
| **Native HNSW (new)** | <0.5ms | <1ms | <1ms |
|
|
638
|
+
| **Speedup** | **100×** | **500×** | **5000×** |
|
|
639
|
+
|
|
640
|
+
### Two Ways to Use Vector Search
|
|
641
|
+
|
|
642
|
+
#### 1. Direct HNSW API (Recommended for Production)
|
|
643
|
+
|
|
644
|
+
Best performance, full control:
|
|
645
|
+
|
|
646
|
+
```typescript
|
|
647
|
+
import { HnswIndex } from '@sochdb/sochdb';
|
|
648
|
+
|
|
649
|
+
const index = new HnswIndex({ dimension: 1536 });
|
|
650
|
+
index.insertBatch(ids, embeddings);
|
|
651
|
+
const results = index.search(queryEmbedding, 10);
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
**✅ Use when:**
|
|
655
|
+
- You need maximum performance
|
|
656
|
+
- Working with large datasets (>10K vectors)
|
|
657
|
+
- Building RAG/AI applications
|
|
658
|
+
- Have existing embedding pipeline
|
|
659
|
+
|
|
660
|
+
#### 2. Collection API (Simple, High-Level)
|
|
661
|
+
|
|
662
|
+
Convenient API with metadata support:
|
|
663
|
+
|
|
664
|
+
```typescript
|
|
665
|
+
import { Database } from '@sochdb/sochdb';
|
|
666
|
+
|
|
667
|
+
const db = await Database.open('./mydb');
|
|
668
|
+
const ns = await db.createNamespace({ name: 'docs' });
|
|
669
|
+
|
|
670
|
+
const collection = await ns.createCollection({
|
|
671
|
+
name: 'embeddings',
|
|
672
|
+
dimension: 384,
|
|
673
|
+
indexed: true // Note: Currently uses linear search in embedded mode
|
|
674
|
+
});
|
|
675
|
+
|
|
676
|
+
await collection.insert([1.0, 2.0, ...], { title: 'Document 1' }, 'doc1');
|
|
677
|
+
const results = await collection.search({ queryVector: [...], k: 10 });
|
|
678
|
+
```
|
|
679
|
+
|
|
680
|
+
**⚠️ Current Limitation:** Collection API uses O(n) linear search in embedded mode. For production use with >10K vectors, use:
|
|
681
|
+
- Direct HNSW API (above), OR
|
|
682
|
+
- gRPC Server Mode (see below)
|
|
683
|
+
|
|
684
|
+
#### 3. gRPC Server Mode (Production-Ready)
|
|
685
|
+
|
|
686
|
+
For distributed systems, multi-language support:
|
|
687
|
+
|
|
688
|
+
```typescript
|
|
689
|
+
import { SochDBClient } from '@sochdb/sochdb';
|
|
690
|
+
|
|
691
|
+
// Start server: sochdb-grpc --port 50051
|
|
692
|
+
const client = new SochDBClient({ address: 'localhost:50051' });
|
|
693
|
+
|
|
694
|
+
// Create HNSW index
|
|
695
|
+
await client.createIndex('docs', {
|
|
696
|
+
dimension: 1536,
|
|
697
|
+
config: { m: 16, ef_construction: 200 },
|
|
698
|
+
metric: 'cosine'
|
|
699
|
+
});
|
|
700
|
+
|
|
701
|
+
// Insert and search
|
|
702
|
+
await client.insertBatch('docs', ids, vectors);
|
|
703
|
+
const results = await client.search('docs', queryVector, 10);
|
|
704
|
+
```
|
|
705
|
+
|
|
706
|
+
**✅ Full HNSW support with:**
|
|
707
|
+
- Native Rust implementation
|
|
708
|
+
- Persistence
|
|
709
|
+
- Distributed queries
|
|
710
|
+
- Multi-language clients
|
|
711
|
+
|
|
712
|
+
### Migration from Linear Search
|
|
713
|
+
|
|
714
|
+
If you're using the Collection API with large datasets and experiencing slow search:
|
|
715
|
+
|
|
716
|
+
**Before (slow):**
|
|
717
|
+
```typescript
|
|
718
|
+
// O(n) scan through all documents
|
|
719
|
+
const results = await collection.search({ queryVector, k: 10 });
|
|
720
|
+
```
|
|
721
|
+
|
|
722
|
+
**After (fast) - Option 1: Use HnswIndex directly:**
|
|
723
|
+
```typescript
|
|
724
|
+
import { HnswIndex } from '@sochdb/sochdb';
|
|
725
|
+
|
|
726
|
+
const index = new HnswIndex({ dimension: 384 });
|
|
727
|
+
index.insertBatch(ids, vectors);
|
|
728
|
+
const results = index.search(queryVector, 10); // <1ms
|
|
729
|
+
```
|
|
730
|
+
|
|
731
|
+
**After (fast) - Option 2: Use gRPC mode:**
|
|
732
|
+
```bash
|
|
733
|
+
# Terminal 1: Start server
|
|
734
|
+
sochdb-grpc --port 50051
|
|
735
|
+
|
|
736
|
+
# Terminal 2: Use client
|
|
737
|
+
```
|
|
738
|
+
```typescript
|
|
739
|
+
const client = new SochDBClient({ address: 'localhost:50051' });
|
|
740
|
+
await client.createIndex('docs', { dimension: 384 });
|
|
741
|
+
const results = await client.search('docs', queryVector, 10);
|
|
742
|
+
```
|
|
743
|
+
|
|
744
|
+
### Complete Examples
|
|
745
|
+
|
|
746
|
+
- **[06_native_vector_search.ts](https://github.com/sochdb/sochdb-nodejs-examples/blob/main/06_native_vector_search.ts)** - Direct HNSW usage with benchmarks
|
|
747
|
+
- **[AI PDF Chatbot](https://github.com/sochdb/sochdb-nodejs-examples/tree/main/ai-pdf-chatbot-langchain)** - LangChain RAG example
|
|
748
|
+
|
|
749
|
+
### API Reference
|
|
750
|
+
|
|
751
|
+
```typescript
|
|
752
|
+
// HnswIndex Configuration
|
|
753
|
+
interface HnswConfig {
|
|
754
|
+
dimension: number; // Required: vector dimension
|
|
755
|
+
maxConnections?: number; // M parameter (default: 16)
|
|
756
|
+
efConstruction?: number; // Build quality (default: 200)
|
|
757
|
+
efSearch?: number; // Search quality (default: 100)
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Search Result
|
|
761
|
+
interface SearchResult {
|
|
762
|
+
id: string; // Vector ID
|
|
763
|
+
distance: number; // Distance (lower = more similar)
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// Main Methods
|
|
767
|
+
class HnswIndex {
|
|
768
|
+
constructor(config: HnswConfig)
|
|
769
|
+
insert(id: string, vector: number[]): void
|
|
770
|
+
insertBatch(ids: string[], vectors: number[][]): void
|
|
771
|
+
search(queryVector: number[], k: number, fast?: boolean): SearchResult[]
|
|
772
|
+
searchUltra(queryVector: number[], k: number): SearchResult[]
|
|
773
|
+
close(): void
|
|
774
|
+
|
|
775
|
+
// Properties
|
|
776
|
+
get length(): number // Number of vectors
|
|
777
|
+
get dimension(): number // Vector dimension
|
|
778
|
+
get efSearch(): number
|
|
779
|
+
set efSearch(value: number) // Adjust search quality
|
|
780
|
+
}
|
|
781
|
+
```
|
|
782
|
+
|
|
783
|
+
### Roadmap
|
|
784
|
+
|
|
785
|
+
- **Current**: Direct HNSW FFI bindings
|
|
786
|
+
- **Next**: Collection API auto-uses HNSW in embedded mode
|
|
787
|
+
- **Future**: Persistent HNSW indexes with disk storage
|
|
788
|
+
|
|
789
|
+
---
|
|
790
|
+
|
|
254
791
|
# SochDB Node.js SDK Documentation
|
|
255
792
|
|
|
256
|
-
**
|
|
793
|
+
**LLM-Optimized Embedded Database with Native Vector Search**
|
|
257
794
|
|
|
258
795
|
---
|
|
259
796
|
|
|
@@ -3484,7 +4021,7 @@ A:
|
|
|
3484
4021
|
A: Yes! Both modes have the same API. Change `Database.open()` to `SochDBClient()` and vice versa.
|
|
3485
4022
|
|
|
3486
4023
|
**Q: Do temporal graphs work in embedded mode?**
|
|
3487
|
-
A: Yes!
|
|
4024
|
+
A: Yes! Temporal graphs work in both embedded and server modes with identical APIs.
|
|
3488
4025
|
|
|
3489
4026
|
**Q: Is embedded mode slower than server mode?**
|
|
3490
4027
|
A: Embedded mode is faster for single-process use (no network overhead). Server mode is better for distributed deployments.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|