@mastra/duckdb 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +253 -0
- package/dist/index.cjs +508 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +506 -0
- package/dist/index.js.map +1 -0
- package/dist/vector/filter-builder.d.ts +11 -0
- package/dist/vector/filter-builder.d.ts.map +1 -0
- package/dist/vector/index.d.ts +61 -0
- package/dist/vector/index.d.ts.map +1 -0
- package/dist/vector/types.d.ts +118 -0
- package/dist/vector/types.d.ts.map +1 -0
- package/package.json +66 -0
package/README.md
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# @mastra/duckdb
|
|
2
|
+
|
|
3
|
+
DuckDB vector store implementation for Mastra, providing high-performance embedded vector similarity search with HNSW indexing. No external server required - runs entirely in-process.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @mastra/duckdb
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
### Vector Store
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { DuckDBVector } from '@mastra/duckdb';
|
|
17
|
+
|
|
18
|
+
// Create a vector store with in-memory database
|
|
19
|
+
const vectorStore = new DuckDBVector({
|
|
20
|
+
id: 'my-vector-store',
|
|
21
|
+
path: ':memory:', // or './vectors.duckdb' for persistence
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
// Create a new index with vector support
|
|
25
|
+
await vectorStore.createIndex({
|
|
26
|
+
indexName: 'my_vectors',
|
|
27
|
+
dimension: 1536,
|
|
28
|
+
metric: 'cosine',
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
// Add vectors with metadata
|
|
32
|
+
const ids = await vectorStore.upsert({
|
|
33
|
+
indexName: 'my_vectors',
|
|
34
|
+
vectors: [
|
|
35
|
+
[0.1, 0.2, 0.3],
|
|
36
|
+
[0.3, 0.4, 0.5],
|
|
37
|
+
], // truncated - use actual 1536-dim vectors
|
|
38
|
+
metadata: [
|
|
39
|
+
{ text: 'doc1', category: 'A' },
|
|
40
|
+
{ text: 'doc2', category: 'B' },
|
|
41
|
+
],
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Query similar vectors
|
|
45
|
+
const results = await vectorStore.query({
|
|
46
|
+
indexName: 'my_vectors',
|
|
47
|
+
queryVector: [0.1, 0.2, 0.3], // truncated - use actual 1536-dim vector
|
|
48
|
+
topK: 10,
|
|
49
|
+
filter: { category: 'A' },
|
|
50
|
+
includeVector: false,
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// Clean up
|
|
54
|
+
await vectorStore.close();
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### With RAG Pipeline
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
import { Mastra } from '@mastra/core';
|
|
61
|
+
import { DuckDBVector } from '@mastra/duckdb';
|
|
62
|
+
|
|
63
|
+
const vectorStore = new DuckDBVector({
|
|
64
|
+
id: 'rag-store',
|
|
65
|
+
path: './rag-vectors.duckdb',
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Use with Mastra's RAG system
|
|
69
|
+
const mastra = new Mastra({
|
|
70
|
+
vectors: {
|
|
71
|
+
ragStore: vectorStore,
|
|
72
|
+
},
|
|
73
|
+
});
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Configuration
|
|
77
|
+
|
|
78
|
+
### Constructor Options
|
|
79
|
+
|
|
80
|
+
| Option | Type | Default | Description |
|
|
81
|
+
| ------------ | ----------------------------------------- | ------------ | --------------------------------------------------------- |
|
|
82
|
+
| `id` | `string` | required | Unique identifier for the vector store instance |
|
|
83
|
+
| `path` | `string` | `':memory:'` | Database file path. Use `:memory:` for in-memory database |
|
|
84
|
+
| `dimensions` | `number` | `1536` | Default dimension for vector embeddings |
|
|
85
|
+
| `metric` | `'cosine' \| 'euclidean' \| 'dotproduct'` | `'cosine'` | Default distance metric for similarity search |
|
|
86
|
+
|
|
87
|
+
### Example Configurations
|
|
88
|
+
|
|
89
|
+
```typescript
|
|
90
|
+
// In-memory (fast, non-persistent)
|
|
91
|
+
const memoryStore = new DuckDBVector({
|
|
92
|
+
id: 'memory-store',
|
|
93
|
+
path: ':memory:',
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// File-based (persistent)
|
|
97
|
+
const fileStore = new DuckDBVector({
|
|
98
|
+
id: 'file-store',
|
|
99
|
+
path: './data/vectors.duckdb',
|
|
100
|
+
dimensions: 768,
|
|
101
|
+
metric: 'cosine',
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// With euclidean distance
|
|
105
|
+
const euclideanStore = new DuckDBVector({
|
|
106
|
+
id: 'euclidean-store',
|
|
107
|
+
path: ':memory:',
|
|
108
|
+
metric: 'euclidean',
|
|
109
|
+
});
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Features
|
|
113
|
+
|
|
114
|
+
### Vector Store Features
|
|
115
|
+
|
|
116
|
+
- Embedded database with no external server required
|
|
117
|
+
- HNSW indexing for fast approximate nearest neighbor search
|
|
118
|
+
- Vector similarity search with cosine, euclidean, and dot product metrics
|
|
119
|
+
- Advanced metadata filtering with MongoDB-like query syntax
|
|
120
|
+
- Automatic UUID generation for vectors
|
|
121
|
+
- File-based persistence or in-memory operation
|
|
122
|
+
- Table management (create, list, describe, delete)
|
|
123
|
+
|
|
124
|
+
### Key Benefits
|
|
125
|
+
|
|
126
|
+
- **Zero infrastructure** - No database server to manage
|
|
127
|
+
- **High performance** - HNSW indexing with configurable parameters
|
|
128
|
+
- **SQL interface** - Familiar query language for metadata filtering
|
|
129
|
+
- **Parquet support** - Native import/export capabilities
|
|
130
|
+
- **Low memory footprint** - Efficient resource usage
|
|
131
|
+
|
|
132
|
+
## Supported Filter Operators
|
|
133
|
+
|
|
134
|
+
The following filter operators are supported for metadata queries:
|
|
135
|
+
|
|
136
|
+
- Comparison: `$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`
|
|
137
|
+
- Logical: `$and`, `$or`, `$not`, `$nor`
|
|
138
|
+
- Array: `$in`, `$nin`
|
|
139
|
+
- Element: `$exists`
|
|
140
|
+
- Text: `$contains`
|
|
141
|
+
|
|
142
|
+
### Filter Examples
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
// Simple equality
|
|
146
|
+
const results = await vectorStore.query({
|
|
147
|
+
indexName: 'docs',
|
|
148
|
+
queryVector: [0.1, 0.2, 0.3], // truncated - use actual embedding vector
|
|
149
|
+
filter: { category: 'technology' },
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
// Comparison operators
|
|
153
|
+
const results = await vectorStore.query({
|
|
154
|
+
indexName: 'docs',
|
|
155
|
+
queryVector: [0.1, 0.2, 0.3], // truncated - use actual embedding vector
|
|
156
|
+
filter: { price: { $gt: 100, $lte: 500 } },
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// Logical operators
|
|
160
|
+
const results = await vectorStore.query({
|
|
161
|
+
indexName: 'docs',
|
|
162
|
+
queryVector: [0.1, 0.2, 0.3], // truncated - use actual embedding vector
|
|
163
|
+
filter: {
|
|
164
|
+
$and: [{ category: 'electronics' }, { $or: [{ brand: 'Apple' }, { brand: 'Samsung' }] }],
|
|
165
|
+
},
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
// Array operators
|
|
169
|
+
const results = await vectorStore.query({
|
|
170
|
+
indexName: 'docs',
|
|
171
|
+
queryVector: [0.1, 0.2, 0.3], // truncated - use actual embedding vector
|
|
172
|
+
filter: { tags: { $in: ['featured', 'sale'] } },
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// Nested field access
|
|
176
|
+
const results = await vectorStore.query({
|
|
177
|
+
indexName: 'docs',
|
|
178
|
+
queryVector: [0.1, 0.2, 0.3], // truncated - use actual embedding vector
|
|
179
|
+
filter: { 'user.profile.tier': 'premium' },
|
|
180
|
+
});
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Vector Store Methods
|
|
184
|
+
|
|
185
|
+
### Index Management
|
|
186
|
+
|
|
187
|
+
- `createIndex({ indexName, dimension, metric? })`: Create a new table with vector support and optional HNSW index
|
|
188
|
+
- `listIndexes()`: List all vector-enabled tables
|
|
189
|
+
- `describeIndex({ indexName })`: Get table statistics (dimension, count, metric)
|
|
190
|
+
- `deleteIndex({ indexName })`: Delete a table and its data
|
|
191
|
+
|
|
192
|
+
### Vector Operations
|
|
193
|
+
|
|
194
|
+
- `upsert({ indexName, vectors, metadata?, ids? })`: Add or update vectors
|
|
195
|
+
- `query({ indexName, queryVector, topK?, filter?, includeVector? })`: Search for similar vectors
|
|
196
|
+
- `updateVector({ indexName, id?, filter?, update })`: Update a vector by ID or metadata filter
|
|
197
|
+
- `deleteVector({ indexName, id })`: Delete a single vector by ID
|
|
198
|
+
- `deleteVectors({ indexName, ids?, filter? })`: Delete multiple vectors by IDs or metadata filter
|
|
199
|
+
|
|
200
|
+
### Connection Management
|
|
201
|
+
|
|
202
|
+
- `close()`: Close the database connection
|
|
203
|
+
|
|
204
|
+
## Distance Metrics
|
|
205
|
+
|
|
206
|
+
| Metric | Description | Score Range | Best For |
|
|
207
|
+
| ------------ | ----------------- | ------------------- | ----------------------------------- |
|
|
208
|
+
| `cosine` | Cosine similarity | 0-1 (1 = identical) | Text embeddings, normalized vectors |
|
|
209
|
+
| `euclidean` | L2 distance | 0-∞ (0 = identical) | Image embeddings, spatial data |
|
|
210
|
+
| `dotproduct` | Inner product | -∞ to ∞ | When magnitude matters |
|
|
211
|
+
|
|
212
|
+
## Use Cases
|
|
213
|
+
|
|
214
|
+
### Embedded Semantic Search
|
|
215
|
+
|
|
216
|
+
Build offline-capable AI applications with semantic search that runs entirely in-process without external dependencies.
|
|
217
|
+
|
|
218
|
+
```typescript
|
|
219
|
+
const vectorStore = new DuckDBVector({
|
|
220
|
+
id: 'semantic-search',
|
|
221
|
+
path: './search.duckdb',
|
|
222
|
+
});
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Local RAG Pipelines
|
|
226
|
+
|
|
227
|
+
Process sensitive documents locally without sending data to cloud vector databases.
|
|
228
|
+
|
|
229
|
+
```typescript
|
|
230
|
+
const vectorStore = new DuckDBVector({
|
|
231
|
+
id: 'local-rag',
|
|
232
|
+
path: './private-docs.duckdb',
|
|
233
|
+
dimensions: 1536, // OpenAI embeddings
|
|
234
|
+
});
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### Development and Testing
|
|
238
|
+
|
|
239
|
+
Rapidly prototype vector search features with zero infrastructure setup.
|
|
240
|
+
|
|
241
|
+
```typescript
|
|
242
|
+
const vectorStore = new DuckDBVector({
|
|
243
|
+
id: 'dev-store',
|
|
244
|
+
path: ':memory:', // Fast in-memory for testing
|
|
245
|
+
});
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Related Links
|
|
249
|
+
|
|
250
|
+
- [DuckDB Documentation](https://duckdb.org/docs/)
|
|
251
|
+
- [DuckDB VSS Extension](https://duckdb.org/docs/extensions/vss)
|
|
252
|
+
- [Mastra Documentation](https://mastra.ai/docs)
|
|
253
|
+
- [GitHub Issue #8140](https://github.com/mastra-ai/mastra/issues/8140)
|