@mastra/lance 1.0.0 → 1.0.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,220 @@
1
+ # Lance Vector Store
2
+
3
+ The LanceVectorStore class provides vector search using [LanceDB](https://lancedb.github.io/lancedb/), an embedded vector database built on the Lance columnar format. It offers efficient storage and fast similarity search for both local development and production deployments.
4
+
5
+ ## Factory Method
6
+
7
+ The LanceVectorStore uses a factory pattern for creation. You should use the static `create()` method rather than the constructor directly.
8
+
9
+ **uri:** (`string`): Path to LanceDB database or URI for cloud deployments
10
+
11
+ **options?:** (`ConnectionOptions`): Additional connection options for LanceDB
12
+
13
+ ## Constructor Examples
14
+
15
+ You can create a `LanceVectorStore` instance using the static create method:
16
+
17
+ ```ts
18
+ import { LanceVectorStore } from "@mastra/lance";
19
+
20
+ // Connect to a local database
21
+ const vectorStore = await LanceVectorStore.create("/path/to/db");
22
+
23
+ // Connect to a LanceDB cloud database
24
+ const cloudStore = await LanceVectorStore.create("db://host:port");
25
+
26
+ // Connect to a cloud database with options
27
+ const s3Store = await LanceVectorStore.create("s3://bucket/db", {
28
+ storageOptions: { timeout: "60s" },
29
+ });
30
+ ```
31
+
32
+ ## Methods
33
+
34
+ ### createIndex()
35
+
36
+ **tableName:** (`string`): Name of the table to create index in
37
+
38
+ **indexName:** (`string`): Name of the index (column name) to create
39
+
40
+ **dimension:** (`number`): Vector dimension (must match your embedding model)
41
+
42
+ **metric?:** (`'cosine' | 'euclidean' | 'dotproduct'`): Distance metric for similarity search (Default: `cosine`)
43
+
44
+ **indexConfig?:** (`LanceIndexConfig`): Index configuration (Default: `{ type: 'hnsw' }`)
45
+
46
+ #### LanceIndexConfig
47
+
48
+ **type:** (`'ivfflat' | 'hnsw'`): stringivfflat:ivfflatClusters vectors into lists for approximate search.hnsw:hnswGraph-based index offering fast search times and high recall. (Default: `hnsw`)
49
+
50
+ **numPartitions?:** (`number`): Number of partitions for IVF indexes (Default: `128`)
51
+
52
+ **numSubVectors?:** (`number`): Number of sub-vectors for product quantization (Default: `16`)
53
+
54
+ **hnsw?:** (`HNSWConfig`): objectm?:numberMaximum number of connections per node (default: 16)efConstruction?:numberBuild-time complexity (default: 100)
55
+
56
+ ### createTable()
57
+
58
+ **tableName:** (`string`): Name of the table to create
59
+
60
+ **data:** (`Record<string, unknown>[] | TableLike`): Initial data for the table
61
+
62
+ **options?:** (`Partial<CreateTableOptions>`): Additional table creation options
63
+
64
+ ### upsert()
65
+
66
+ **tableName:** (`string`): Name of the table to upsert vectors into
67
+
68
+ **vectors:** (`number[][]`): Array of embedding vectors
69
+
70
+ **metadata?:** (`Record<string, any>[]`): Metadata for each vector
71
+
72
+ **ids?:** (`string[]`): Optional vector IDs (auto-generated if not provided)
73
+
74
+ ### query()
75
+
76
+ **tableName:** (`string`): Name of the table to query
77
+
78
+ **queryVector:** (`number[]`): Query vector
79
+
80
+ **topK?:** (`number`): Number of results to return (Default: `10`)
81
+
82
+ **filter?:** (`Record<string, any>`): Metadata filters
83
+
84
+ **includeVector?:** (`boolean`): Whether to include the vector in the result (Default: `false`)
85
+
86
+ **columns?:** (`string[]`): Specific columns to include in the result (Default: `[]`)
87
+
88
+ **includeAllColumns?:** (`boolean`): Whether to include all columns in the result (Default: `false`)
89
+
90
+ ### listTables()
91
+
92
+ Returns an array of table names as strings.
93
+
94
+ ```typescript
95
+ const tables = await vectorStore.listTables();
96
+ // ['my_vectors', 'embeddings', 'documents']
97
+ ```
98
+
99
+ ### getTableSchema()
100
+
101
+ **tableName:** (`string`): Name of the table to describe
102
+
103
+ Returns the schema of the specified table.
104
+
105
+ ### deleteTable()
106
+
107
+ **tableName:** (`string`): Name of the table to delete
108
+
109
+ ### deleteAllTables()
110
+
111
+ Deletes all tables in the database.
112
+
113
+ ### listIndexes()
114
+
115
+ Returns an array of index names as strings.
116
+
117
+ ### describeIndex()
118
+
119
+ **indexName:** (`string`): Name of the index to describe
120
+
121
+ Returns information about the index:
122
+
123
+ ```typescript
124
+ interface IndexStats {
125
+ dimension: number;
126
+ count: number;
127
+ metric: "cosine" | "euclidean" | "dotproduct";
128
+ type: "ivfflat" | "hnsw";
129
+ config: {
130
+ m?: number;
131
+ efConstruction?: number;
132
+ numPartitions?: number;
133
+ numSubVectors?: number;
134
+ };
135
+ }
136
+ ```
137
+
138
+ ### deleteIndex()
139
+
140
+ **indexName:** (`string`): Name of the index to delete
141
+
142
+ ### updateVector()
143
+
144
+ Update a single vector by ID or by metadata filter. Either `id` or `filter` must be provided, but not both.
145
+
146
+ **indexName:** (`string`): Name of the index containing the vector
147
+
148
+ **id?:** (`string`): ID of the vector to update (mutually exclusive with filter)
149
+
150
+ **filter?:** (`Record<string, any>`): Metadata filter to identify vector(s) to update (mutually exclusive with id)
151
+
152
+ **update:** (`{ vector?: number[]; metadata?: Record<string, any>; }`): Object containing the vector and/or metadata to update
153
+
154
+ ### deleteVector()
155
+
156
+ **indexName:** (`string`): Name of the index containing the vector
157
+
158
+ **id:** (`string`): ID of the vector to delete
159
+
160
+ ### deleteVectors()
161
+
162
+ Delete multiple vectors by IDs or by metadata filter. Either `ids` or `filter` must be provided, but not both.
163
+
164
+ **indexName:** (`string`): Name of the index containing the vectors to delete
165
+
166
+ **ids?:** (`string[]`): Array of vector IDs to delete (mutually exclusive with filter)
167
+
168
+ **filter?:** (`Record<string, any>`): Metadata filter to identify vectors to delete (mutually exclusive with ids)
169
+
170
+ ### close()
171
+
172
+ Closes the database connection.
173
+
174
+ ## Response Types
175
+
176
+ Query results are returned in this format:
177
+
178
+ ```typescript
179
+ interface QueryResult {
180
+ id: string;
181
+ score: number;
182
+ metadata: Record<string, any>;
183
+ vector?: number[]; // Only included if includeVector is true
184
+ document?: string; // Document text if available
185
+ }
186
+ ```
187
+
188
+ ## Error Handling
189
+
190
+ The store throws typed errors that can be caught:
191
+
192
+ ```typescript
193
+ try {
194
+ await store.query({
195
+ tableName: "my_vectors",
196
+ queryVector: queryVector,
197
+ });
198
+ } catch (error) {
199
+ if (error instanceof Error) {
200
+ console.log(error.message);
201
+ }
202
+ }
203
+ ```
204
+
205
+ ## Best Practices
206
+
207
+ - Use the appropriate index type for your use case:
208
+
209
+ - HNSW for better recall and performance when memory isn't constrained
210
+ - IVF for better memory efficiency with large datasets
211
+
212
+ - For optimal performance with large datasets, consider adjusting `numPartitions` and `numSubVectors` values
213
+
214
+ - Use `close()` method to properly close connections when done with the database
215
+
216
+ - Store metadata with a consistent schema to simplify filtering operations
217
+
218
+ ## Related
219
+
220
+ - [Metadata Filters](https://mastra.ai/reference/rag/metadata-filters)
package/dist/index.cjs CHANGED
@@ -2408,6 +2408,16 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
2408
2408
  if (filter && Object.keys(filter).length > 0) {
2409
2409
  const whereClause = this.filterTranslator(filter);
2410
2410
  this.logger.debug(`Where clause generated: ${whereClause}`);
2411
+ const schema = await table.schema();
2412
+ const schemaColumns = new Set(schema.fields.map((f) => f.name));
2413
+ const filterColumns = this.extractFilterColumns(whereClause);
2414
+ const missingColumns = filterColumns.filter((col) => !schemaColumns.has(col));
2415
+ if (missingColumns.length > 0) {
2416
+ this.logger.debug(
2417
+ `Filter references columns not in schema: ${missingColumns.join(", ")}. Returning empty results.`
2418
+ );
2419
+ return [];
2420
+ }
2411
2421
  query = query.where(whereClause);
2412
2422
  }
2413
2423
  if (!includeAllColumns && columns.length > 0) {
@@ -2420,16 +2430,16 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
2420
2430
  query = query.limit(topK);
2421
2431
  const results = await query.toArray();
2422
2432
  return results.map((result) => {
2423
- const flatMetadata = {};
2424
- Object.keys(result).forEach((key) => {
2425
- if (key !== "id" && key !== "score" && key !== "vector" && key !== "_distance") {
2426
- if (key.startsWith("metadata_")) {
2427
- const metadataKey = key.substring("metadata_".length);
2428
- flatMetadata[metadataKey] = result[key];
2429
- }
2433
+ let metadata = {};
2434
+ if (result._metadata_json) {
2435
+ try {
2436
+ metadata = JSON.parse(result._metadata_json);
2437
+ } catch {
2438
+ metadata = this.extractFlatMetadata(result);
2430
2439
  }
2431
- });
2432
- const metadata = this.unflattenObject(flatMetadata);
2440
+ } else {
2441
+ metadata = this.extractFlatMetadata(result);
2442
+ }
2433
2443
  return {
2434
2444
  id: String(result.id || ""),
2435
2445
  metadata,
@@ -2525,6 +2535,9 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
2525
2535
  Object.entries(flattenedMetadata).forEach(([key, value]) => {
2526
2536
  rowData[key] = value;
2527
2537
  });
2538
+ rowData["_metadata_json"] = JSON.stringify(metadataItem);
2539
+ } else {
2540
+ rowData["_metadata_json"] = "";
2528
2541
  }
2529
2542
  return rowData;
2530
2543
  });
@@ -2723,7 +2736,9 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
2723
2736
  `Table ${resolvedTableName} does not exist. Creating empty table with dimension ${dimension}.`
2724
2737
  );
2725
2738
  const initVector = new Array(dimension).fill(0);
2726
- table = await this.lanceClient.createTable(resolvedTableName, [{ id: "__init__", vector: initVector }]);
2739
+ table = await this.lanceClient.createTable(resolvedTableName, [
2740
+ { id: "__init__", vector: initVector, _metadata_json: "" }
2741
+ ]);
2727
2742
  try {
2728
2743
  await table.delete("id = '__init__'");
2729
2744
  } catch (deleteError) {
@@ -3071,6 +3086,17 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
3071
3086
  Object.entries(update.metadata).forEach(([key, value]) => {
3072
3087
  rowData[`metadata_${key}`] = value;
3073
3088
  });
3089
+ const hasMetadataJson = schema.fields.some((f) => f.name === "_metadata_json");
3090
+ if (hasMetadataJson) {
3091
+ let existingMetadata = {};
3092
+ if (record._metadata_json) {
3093
+ try {
3094
+ existingMetadata = JSON.parse(record._metadata_json);
3095
+ } catch {
3096
+ }
3097
+ }
3098
+ rowData["_metadata_json"] = JSON.stringify({ ...existingMetadata, ...update.metadata });
3099
+ }
3074
3100
  }
3075
3101
  return rowData;
3076
3102
  });
@@ -3162,29 +3188,27 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
3162
3188
  }
3163
3189
  }
3164
3190
  /**
3165
- * Converts a flattened object with keys using underscore notation back to a nested object.
3166
- * Example: { name: 'test', details_text: 'test' } → { name: 'test', details: { text: 'test' } }
3191
+ * Extracts column names referenced in a SQL WHERE clause.
3192
+ * Identifies metadata_* prefixed identifiers used in filter conditions.
3167
3193
  */
3168
- unflattenObject(obj) {
3169
- const result = {};
3170
- Object.keys(obj).forEach((key) => {
3171
- const value = obj[key];
3172
- const parts = key.split("_");
3173
- let current = result;
3174
- for (let i = 0; i < parts.length - 1; i++) {
3175
- const part = parts[i];
3176
- if (!part) continue;
3177
- if (!current[part] || typeof current[part] !== "object") {
3178
- current[part] = {};
3194
+ extractFilterColumns(whereClause) {
3195
+ const matches = whereClause.match(/metadata_\w+/g);
3196
+ return matches ? [...new Set(matches)] : [];
3197
+ }
3198
+ /**
3199
+ * Extracts metadata from flattened column names (legacy data without _metadata_json).
3200
+ * Returns keys as-is after stripping the 'metadata_' prefix, without any unflattening.
3201
+ */
3202
+ extractFlatMetadata(result) {
3203
+ const metadata = {};
3204
+ Object.keys(result).forEach((key) => {
3205
+ if (key !== "id" && key !== "score" && key !== "vector" && key !== "_distance" && key !== "_metadata_json") {
3206
+ if (key.startsWith("metadata_")) {
3207
+ metadata[key.substring("metadata_".length)] = result[key];
3179
3208
  }
3180
- current = current[part];
3181
- }
3182
- const lastPart = parts[parts.length - 1];
3183
- if (lastPart) {
3184
- current[lastPart] = value;
3185
3209
  }
3186
3210
  });
3187
- return result;
3211
+ return metadata;
3188
3212
  }
3189
3213
  async deleteVectors({ indexName, filter, ids }) {
3190
3214
  if (ids && filter) {