@mastra/lance 1.0.0 → 1.0.1-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/docs/SKILL.md +15 -22
- package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +1 -1
- package/dist/docs/{rag/01-vector-databases.md → references/docs-rag-vector-databases.md} +208 -203
- package/dist/docs/{storage/01-reference.md → references/reference-storage-lance.md} +31 -13
- package/dist/docs/references/reference-vectors-lance.md +220 -0
- package/dist/index.cjs +53 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +53 -29
- package/dist/index.js.map +1 -1
- package/dist/vector/index.d.ts +8 -3
- package/dist/vector/index.d.ts.map +1 -1
- package/package.json +7 -8
- package/dist/docs/README.md +0 -33
- package/dist/docs/vectors/01-reference.md +0 -149
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Lance Vector Store
|
|
2
|
+
|
|
3
|
+
The LanceVectorStore class provides vector search using [LanceDB](https://lancedb.github.io/lancedb/), an embedded vector database built on the Lance columnar format. It offers efficient storage and fast similarity search for both local development and production deployments.
|
|
4
|
+
|
|
5
|
+
## Factory Method
|
|
6
|
+
|
|
7
|
+
The LanceVectorStore uses a factory pattern for creation. You should use the static `create()` method rather than the constructor directly.
|
|
8
|
+
|
|
9
|
+
**uri:** (`string`): Path to LanceDB database or URI for cloud deployments
|
|
10
|
+
|
|
11
|
+
**options?:** (`ConnectionOptions`): Additional connection options for LanceDB
|
|
12
|
+
|
|
13
|
+
## Constructor Examples
|
|
14
|
+
|
|
15
|
+
You can create a `LanceVectorStore` instance using the static create method:
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import { LanceVectorStore } from "@mastra/lance";
|
|
19
|
+
|
|
20
|
+
// Connect to a local database
|
|
21
|
+
const vectorStore = await LanceVectorStore.create("/path/to/db");
|
|
22
|
+
|
|
23
|
+
// Connect to a LanceDB cloud database
|
|
24
|
+
const cloudStore = await LanceVectorStore.create("db://host:port");
|
|
25
|
+
|
|
26
|
+
// Connect to a cloud database with options
|
|
27
|
+
const s3Store = await LanceVectorStore.create("s3://bucket/db", {
|
|
28
|
+
storageOptions: { timeout: "60s" },
|
|
29
|
+
});
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Methods
|
|
33
|
+
|
|
34
|
+
### createIndex()
|
|
35
|
+
|
|
36
|
+
**tableName:** (`string`): Name of the table to create index in
|
|
37
|
+
|
|
38
|
+
**indexName:** (`string`): Name of the index (column name) to create
|
|
39
|
+
|
|
40
|
+
**dimension:** (`number`): Vector dimension (must match your embedding model)
|
|
41
|
+
|
|
42
|
+
**metric?:** (`'cosine' | 'euclidean' | 'dotproduct'`): Distance metric for similarity search (Default: `cosine`)
|
|
43
|
+
|
|
44
|
+
**indexConfig?:** (`LanceIndexConfig`): Index configuration (Default: `{ type: 'hnsw' }`)
|
|
45
|
+
|
|
46
|
+
#### LanceIndexConfig
|
|
47
|
+
|
|
48
|
+
**type:** (`'ivfflat' | 'hnsw'`): stringivfflat:ivfflatClusters vectors into lists for approximate search.hnsw:hnswGraph-based index offering fast search times and high recall. (Default: `hnsw`)
|
|
49
|
+
|
|
50
|
+
**numPartitions?:** (`number`): Number of partitions for IVF indexes (Default: `128`)
|
|
51
|
+
|
|
52
|
+
**numSubVectors?:** (`number`): Number of sub-vectors for product quantization (Default: `16`)
|
|
53
|
+
|
|
54
|
+
**hnsw?:** (`HNSWConfig`): objectm?:numberMaximum number of connections per node (default: 16)efConstruction?:numberBuild-time complexity (default: 100)
|
|
55
|
+
|
|
56
|
+
### createTable()
|
|
57
|
+
|
|
58
|
+
**tableName:** (`string`): Name of the table to create
|
|
59
|
+
|
|
60
|
+
**data:** (`Record<string, unknown>[] | TableLike`): Initial data for the table
|
|
61
|
+
|
|
62
|
+
**options?:** (`Partial<CreateTableOptions>`): Additional table creation options
|
|
63
|
+
|
|
64
|
+
### upsert()
|
|
65
|
+
|
|
66
|
+
**tableName:** (`string`): Name of the table to upsert vectors into
|
|
67
|
+
|
|
68
|
+
**vectors:** (`number[][]`): Array of embedding vectors
|
|
69
|
+
|
|
70
|
+
**metadata?:** (`Record<string, any>[]`): Metadata for each vector
|
|
71
|
+
|
|
72
|
+
**ids?:** (`string[]`): Optional vector IDs (auto-generated if not provided)
|
|
73
|
+
|
|
74
|
+
### query()
|
|
75
|
+
|
|
76
|
+
**tableName:** (`string`): Name of the table to query
|
|
77
|
+
|
|
78
|
+
**queryVector:** (`number[]`): Query vector
|
|
79
|
+
|
|
80
|
+
**topK?:** (`number`): Number of results to return (Default: `10`)
|
|
81
|
+
|
|
82
|
+
**filter?:** (`Record<string, any>`): Metadata filters
|
|
83
|
+
|
|
84
|
+
**includeVector?:** (`boolean`): Whether to include the vector in the result (Default: `false`)
|
|
85
|
+
|
|
86
|
+
**columns?:** (`string[]`): Specific columns to include in the result (Default: `[]`)
|
|
87
|
+
|
|
88
|
+
**includeAllColumns?:** (`boolean`): Whether to include all columns in the result (Default: `false`)
|
|
89
|
+
|
|
90
|
+
### listTables()
|
|
91
|
+
|
|
92
|
+
Returns an array of table names as strings.
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
const tables = await vectorStore.listTables();
|
|
96
|
+
// ['my_vectors', 'embeddings', 'documents']
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### getTableSchema()
|
|
100
|
+
|
|
101
|
+
**tableName:** (`string`): Name of the table to describe
|
|
102
|
+
|
|
103
|
+
Returns the schema of the specified table.
|
|
104
|
+
|
|
105
|
+
### deleteTable()
|
|
106
|
+
|
|
107
|
+
**tableName:** (`string`): Name of the table to delete
|
|
108
|
+
|
|
109
|
+
### deleteAllTables()
|
|
110
|
+
|
|
111
|
+
Deletes all tables in the database.
|
|
112
|
+
|
|
113
|
+
### listIndexes()
|
|
114
|
+
|
|
115
|
+
Returns an array of index names as strings.
|
|
116
|
+
|
|
117
|
+
### describeIndex()
|
|
118
|
+
|
|
119
|
+
**indexName:** (`string`): Name of the index to describe
|
|
120
|
+
|
|
121
|
+
Returns information about the index:
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
interface IndexStats {
|
|
125
|
+
dimension: number;
|
|
126
|
+
count: number;
|
|
127
|
+
metric: "cosine" | "euclidean" | "dotproduct";
|
|
128
|
+
type: "ivfflat" | "hnsw";
|
|
129
|
+
config: {
|
|
130
|
+
m?: number;
|
|
131
|
+
efConstruction?: number;
|
|
132
|
+
numPartitions?: number;
|
|
133
|
+
numSubVectors?: number;
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### deleteIndex()
|
|
139
|
+
|
|
140
|
+
**indexName:** (`string`): Name of the index to delete
|
|
141
|
+
|
|
142
|
+
### updateVector()
|
|
143
|
+
|
|
144
|
+
Update a single vector by ID or by metadata filter. Either `id` or `filter` must be provided, but not both.
|
|
145
|
+
|
|
146
|
+
**indexName:** (`string`): Name of the index containing the vector
|
|
147
|
+
|
|
148
|
+
**id?:** (`string`): ID of the vector to update (mutually exclusive with filter)
|
|
149
|
+
|
|
150
|
+
**filter?:** (`Record<string, any>`): Metadata filter to identify vector(s) to update (mutually exclusive with id)
|
|
151
|
+
|
|
152
|
+
**update:** (`{ vector?: number[]; metadata?: Record<string, any>; }`): Object containing the vector and/or metadata to update
|
|
153
|
+
|
|
154
|
+
### deleteVector()
|
|
155
|
+
|
|
156
|
+
**indexName:** (`string`): Name of the index containing the vector
|
|
157
|
+
|
|
158
|
+
**id:** (`string`): ID of the vector to delete
|
|
159
|
+
|
|
160
|
+
### deleteVectors()
|
|
161
|
+
|
|
162
|
+
Delete multiple vectors by IDs or by metadata filter. Either `ids` or `filter` must be provided, but not both.
|
|
163
|
+
|
|
164
|
+
**indexName:** (`string`): Name of the index containing the vectors to delete
|
|
165
|
+
|
|
166
|
+
**ids?:** (`string[]`): Array of vector IDs to delete (mutually exclusive with filter)
|
|
167
|
+
|
|
168
|
+
**filter?:** (`Record<string, any>`): Metadata filter to identify vectors to delete (mutually exclusive with ids)
|
|
169
|
+
|
|
170
|
+
### close()
|
|
171
|
+
|
|
172
|
+
Closes the database connection.
|
|
173
|
+
|
|
174
|
+
## Response Types
|
|
175
|
+
|
|
176
|
+
Query results are returned in this format:
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
interface QueryResult {
|
|
180
|
+
id: string;
|
|
181
|
+
score: number;
|
|
182
|
+
metadata: Record<string, any>;
|
|
183
|
+
vector?: number[]; // Only included if includeVector is true
|
|
184
|
+
document?: string; // Document text if available
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Error Handling
|
|
189
|
+
|
|
190
|
+
The store throws typed errors that can be caught:
|
|
191
|
+
|
|
192
|
+
```typescript
|
|
193
|
+
try {
|
|
194
|
+
await store.query({
|
|
195
|
+
tableName: "my_vectors",
|
|
196
|
+
queryVector: queryVector,
|
|
197
|
+
});
|
|
198
|
+
} catch (error) {
|
|
199
|
+
if (error instanceof Error) {
|
|
200
|
+
console.log(error.message);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Best Practices
|
|
206
|
+
|
|
207
|
+
- Use the appropriate index type for your use case:
|
|
208
|
+
|
|
209
|
+
- HNSW for better recall and performance when memory isn't constrained
|
|
210
|
+
- IVF for better memory efficiency with large datasets
|
|
211
|
+
|
|
212
|
+
- For optimal performance with large datasets, consider adjusting `numPartitions` and `numSubVectors` values
|
|
213
|
+
|
|
214
|
+
- Use `close()` method to properly close connections when done with the database
|
|
215
|
+
|
|
216
|
+
- Store metadata with a consistent schema to simplify filtering operations
|
|
217
|
+
|
|
218
|
+
## Related
|
|
219
|
+
|
|
220
|
+
- [Metadata Filters](https://mastra.ai/reference/rag/metadata-filters)
|
package/dist/index.cjs
CHANGED
|
@@ -2408,6 +2408,16 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2408
2408
|
if (filter && Object.keys(filter).length > 0) {
|
|
2409
2409
|
const whereClause = this.filterTranslator(filter);
|
|
2410
2410
|
this.logger.debug(`Where clause generated: ${whereClause}`);
|
|
2411
|
+
const schema = await table.schema();
|
|
2412
|
+
const schemaColumns = new Set(schema.fields.map((f) => f.name));
|
|
2413
|
+
const filterColumns = this.extractFilterColumns(whereClause);
|
|
2414
|
+
const missingColumns = filterColumns.filter((col) => !schemaColumns.has(col));
|
|
2415
|
+
if (missingColumns.length > 0) {
|
|
2416
|
+
this.logger.debug(
|
|
2417
|
+
`Filter references columns not in schema: ${missingColumns.join(", ")}. Returning empty results.`
|
|
2418
|
+
);
|
|
2419
|
+
return [];
|
|
2420
|
+
}
|
|
2411
2421
|
query = query.where(whereClause);
|
|
2412
2422
|
}
|
|
2413
2423
|
if (!includeAllColumns && columns.length > 0) {
|
|
@@ -2420,16 +2430,16 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2420
2430
|
query = query.limit(topK);
|
|
2421
2431
|
const results = await query.toArray();
|
|
2422
2432
|
return results.map((result) => {
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
}
|
|
2433
|
+
let metadata = {};
|
|
2434
|
+
if (result._metadata_json) {
|
|
2435
|
+
try {
|
|
2436
|
+
metadata = JSON.parse(result._metadata_json);
|
|
2437
|
+
} catch {
|
|
2438
|
+
metadata = this.extractFlatMetadata(result);
|
|
2430
2439
|
}
|
|
2431
|
-
}
|
|
2432
|
-
|
|
2440
|
+
} else {
|
|
2441
|
+
metadata = this.extractFlatMetadata(result);
|
|
2442
|
+
}
|
|
2433
2443
|
return {
|
|
2434
2444
|
id: String(result.id || ""),
|
|
2435
2445
|
metadata,
|
|
@@ -2525,6 +2535,9 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2525
2535
|
Object.entries(flattenedMetadata).forEach(([key, value]) => {
|
|
2526
2536
|
rowData[key] = value;
|
|
2527
2537
|
});
|
|
2538
|
+
rowData["_metadata_json"] = JSON.stringify(metadataItem);
|
|
2539
|
+
} else {
|
|
2540
|
+
rowData["_metadata_json"] = "";
|
|
2528
2541
|
}
|
|
2529
2542
|
return rowData;
|
|
2530
2543
|
});
|
|
@@ -2723,7 +2736,9 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2723
2736
|
`Table ${resolvedTableName} does not exist. Creating empty table with dimension ${dimension}.`
|
|
2724
2737
|
);
|
|
2725
2738
|
const initVector = new Array(dimension).fill(0);
|
|
2726
|
-
table = await this.lanceClient.createTable(resolvedTableName, [
|
|
2739
|
+
table = await this.lanceClient.createTable(resolvedTableName, [
|
|
2740
|
+
{ id: "__init__", vector: initVector, _metadata_json: "" }
|
|
2741
|
+
]);
|
|
2727
2742
|
try {
|
|
2728
2743
|
await table.delete("id = '__init__'");
|
|
2729
2744
|
} catch (deleteError) {
|
|
@@ -3071,6 +3086,17 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
3071
3086
|
Object.entries(update.metadata).forEach(([key, value]) => {
|
|
3072
3087
|
rowData[`metadata_${key}`] = value;
|
|
3073
3088
|
});
|
|
3089
|
+
const hasMetadataJson = schema.fields.some((f) => f.name === "_metadata_json");
|
|
3090
|
+
if (hasMetadataJson) {
|
|
3091
|
+
let existingMetadata = {};
|
|
3092
|
+
if (record._metadata_json) {
|
|
3093
|
+
try {
|
|
3094
|
+
existingMetadata = JSON.parse(record._metadata_json);
|
|
3095
|
+
} catch {
|
|
3096
|
+
}
|
|
3097
|
+
}
|
|
3098
|
+
rowData["_metadata_json"] = JSON.stringify({ ...existingMetadata, ...update.metadata });
|
|
3099
|
+
}
|
|
3074
3100
|
}
|
|
3075
3101
|
return rowData;
|
|
3076
3102
|
});
|
|
@@ -3162,29 +3188,27 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
3162
3188
|
}
|
|
3163
3189
|
}
|
|
3164
3190
|
/**
|
|
3165
|
-
*
|
|
3166
|
-
*
|
|
3191
|
+
* Extracts column names referenced in a SQL WHERE clause.
|
|
3192
|
+
* Identifies metadata_* prefixed identifiers used in filter conditions.
|
|
3167
3193
|
*/
|
|
3168
|
-
|
|
3169
|
-
const
|
|
3170
|
-
|
|
3171
|
-
|
|
3172
|
-
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3194
|
+
extractFilterColumns(whereClause) {
|
|
3195
|
+
const matches = whereClause.match(/metadata_\w+/g);
|
|
3196
|
+
return matches ? [...new Set(matches)] : [];
|
|
3197
|
+
}
|
|
3198
|
+
/**
|
|
3199
|
+
* Extracts metadata from flattened column names (legacy data without _metadata_json).
|
|
3200
|
+
* Returns keys as-is after stripping the 'metadata_' prefix, without any unflattening.
|
|
3201
|
+
*/
|
|
3202
|
+
extractFlatMetadata(result) {
|
|
3203
|
+
const metadata = {};
|
|
3204
|
+
Object.keys(result).forEach((key) => {
|
|
3205
|
+
if (key !== "id" && key !== "score" && key !== "vector" && key !== "_distance" && key !== "_metadata_json") {
|
|
3206
|
+
if (key.startsWith("metadata_")) {
|
|
3207
|
+
metadata[key.substring("metadata_".length)] = result[key];
|
|
3179
3208
|
}
|
|
3180
|
-
current = current[part];
|
|
3181
|
-
}
|
|
3182
|
-
const lastPart = parts[parts.length - 1];
|
|
3183
|
-
if (lastPart) {
|
|
3184
|
-
current[lastPart] = value;
|
|
3185
3209
|
}
|
|
3186
3210
|
});
|
|
3187
|
-
return
|
|
3211
|
+
return metadata;
|
|
3188
3212
|
}
|
|
3189
3213
|
async deleteVectors({ indexName, filter, ids }) {
|
|
3190
3214
|
if (ids && filter) {
|