@mastra/lance 1.0.0-beta.10 → 1.0.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +83 -0
- package/dist/docs/README.md +1 -1
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/SOURCE_MAP.json +1 -1
- package/dist/docs/rag/01-vector-databases.md +10 -5
- package/dist/index.cjs +134 -53
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +135 -54
- package/dist/index.js.map +1 -1
- package/dist/storage/domains/workflows/index.d.ts +1 -0
- package/dist/storage/domains/workflows/index.d.ts.map +1 -1
- package/dist/vector/index.d.ts +15 -5
- package/dist/vector/index.d.ts.map +1 -1
- package/package.json +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,88 @@
|
|
|
1
1
|
# @mastra/lance
|
|
2
2
|
|
|
3
|
+
## 1.0.0-beta.11
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Aligned vector store configuration with underlying library APIs, giving you access to all library options directly. ([#11742](https://github.com/mastra-ai/mastra/pull/11742))
|
|
8
|
+
|
|
9
|
+
**Why this change?**
|
|
10
|
+
|
|
11
|
+
Previously, each vector store defined its own configuration types that only exposed a subset of the underlying library's options. This meant users couldn't access advanced features like authentication, SSL, compression, or custom headers without creating their own client instances. Now, the configuration types extend the library types directly, so all options are available.
|
|
12
|
+
|
|
13
|
+
**@mastra/libsql** (Breaking)
|
|
14
|
+
|
|
15
|
+
Renamed `connectionUrl` to `url` to match the `@libsql/client` API and align with LibSQLStorage.
|
|
16
|
+
|
|
17
|
+
```typescript
|
|
18
|
+
// Before
|
|
19
|
+
new LibSQLVector({ id: 'my-vector', connectionUrl: 'file:./db.sqlite' });
|
|
20
|
+
|
|
21
|
+
// After
|
|
22
|
+
new LibSQLVector({ id: 'my-vector', url: 'file:./db.sqlite' });
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**@mastra/opensearch** (Breaking)
|
|
26
|
+
|
|
27
|
+
Renamed `url` to `node` and added support for all OpenSearch `ClientOptions` including authentication, SSL, and compression.
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
// Before
|
|
31
|
+
new OpenSearchVector({ id: 'my-vector', url: 'http://localhost:9200' });
|
|
32
|
+
|
|
33
|
+
// After
|
|
34
|
+
new OpenSearchVector({ id: 'my-vector', node: 'http://localhost:9200' });
|
|
35
|
+
|
|
36
|
+
// With authentication (now possible)
|
|
37
|
+
new OpenSearchVector({
|
|
38
|
+
id: 'my-vector',
|
|
39
|
+
node: 'https://localhost:9200',
|
|
40
|
+
auth: { username: 'admin', password: 'admin' },
|
|
41
|
+
ssl: { rejectUnauthorized: false },
|
|
42
|
+
});
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**@mastra/pinecone** (Breaking)
|
|
46
|
+
|
|
47
|
+
Removed `environment` parameter. Use `controllerHostUrl` instead (the actual Pinecone SDK field name). Added support for all `PineconeConfiguration` options.
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
// Before
|
|
51
|
+
new PineconeVector({ id: 'my-vector', apiKey: '...', environment: '...' });
|
|
52
|
+
|
|
53
|
+
// After
|
|
54
|
+
new PineconeVector({ id: 'my-vector', apiKey: '...' });
|
|
55
|
+
|
|
56
|
+
// With custom controller host (if needed)
|
|
57
|
+
new PineconeVector({ id: 'my-vector', apiKey: '...', controllerHostUrl: '...' });
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**@mastra/clickhouse**
|
|
61
|
+
|
|
62
|
+
Added support for all `ClickHouseClientConfigOptions` like `request_timeout`, `compression`, `keep_alive`, and `database`. Existing configurations continue to work unchanged.
|
|
63
|
+
|
|
64
|
+
**@mastra/cloudflare, @mastra/cloudflare-d1, @mastra/lance, @mastra/libsql, @mastra/mongodb, @mastra/pg, @mastra/upstash**
|
|
65
|
+
|
|
66
|
+
Improved logging by replacing `console.warn` with structured logger in workflow storage domains.
|
|
67
|
+
|
|
68
|
+
**@mastra/deployer-cloud**
|
|
69
|
+
|
|
70
|
+
Updated internal LibSQLVector configuration for compatibility with the new API.
|
|
71
|
+
|
|
72
|
+
- Fixed `LanceVectorStore` failing when used with Memory. ([#11828](https://github.com/mastra-ai/mastra/pull/11828))
|
|
73
|
+
|
|
74
|
+
When using `LanceVectorStore` with `@mastra/memory`, operations would fail because Memory calls methods without a `tableName` parameter. The `tableName` parameter now defaults to `indexName` when not provided in `createIndex`, `query`, and `upsert` methods, matching the behavior of other vector stores like PgVector.
|
|
75
|
+
|
|
76
|
+
Additionally fixed three critical bugs:
|
|
77
|
+
1. **Upsert replacing entire table**: The `upsert` method was using `mode: 'overwrite'` which replaced all rows in the table instead of updating only the specified rows. Now uses LanceDB's `mergeInsert` for proper upsert semantics (update existing rows, insert new ones).
|
|
78
|
+
2. **UpdateVector replacing entire table**: The `updateVector` method had the same issue - using `mode: 'overwrite'` caused all other rows to be deleted. Now uses `mergeInsert` to only update the targeted rows.
|
|
79
|
+
3. **Query not returning metadata by default**: When querying without specifying `columns`, only the `id` field was returned, causing metadata to be empty even though filters worked on metadata fields. Now returns all columns by default.
|
|
80
|
+
|
|
81
|
+
Fixes #11716
|
|
82
|
+
|
|
83
|
+
- Updated dependencies [[`ebae12a`](https://github.com/mastra-ai/mastra/commit/ebae12a2dd0212e75478981053b148a2c246962d), [`c61a0a5`](https://github.com/mastra-ai/mastra/commit/c61a0a5de4904c88fd8b3718bc26d1be1c2ec6e7), [`69136e7`](https://github.com/mastra-ai/mastra/commit/69136e748e32f57297728a4e0f9a75988462f1a7), [`449aed2`](https://github.com/mastra-ai/mastra/commit/449aed2ba9d507b75bf93d427646ea94f734dfd1), [`eb648a2`](https://github.com/mastra-ai/mastra/commit/eb648a2cc1728f7678768dd70cd77619b448dab9), [`0131105`](https://github.com/mastra-ai/mastra/commit/0131105532e83bdcbb73352fc7d0879eebf140dc), [`9d5059e`](https://github.com/mastra-ai/mastra/commit/9d5059eae810829935fb08e81a9bb7ecd5b144a7), [`ef756c6`](https://github.com/mastra-ai/mastra/commit/ef756c65f82d16531c43f49a27290a416611e526), [`b00ccd3`](https://github.com/mastra-ai/mastra/commit/b00ccd325ebd5d9e37e34dd0a105caae67eb568f), [`3bdfa75`](https://github.com/mastra-ai/mastra/commit/3bdfa7507a91db66f176ba8221aa28dd546e464a), [`e770de9`](https://github.com/mastra-ai/mastra/commit/e770de941a287a49b1964d44db5a5763d19890a6), [`52e2716`](https://github.com/mastra-ai/mastra/commit/52e2716b42df6eff443de72360ae83e86ec23993), [`27b4040`](https://github.com/mastra-ai/mastra/commit/27b4040bfa1a95d92546f420a02a626b1419a1d6), [`610a70b`](https://github.com/mastra-ai/mastra/commit/610a70bdad282079f0c630e0d7bb284578f20151), [`8dc7f55`](https://github.com/mastra-ai/mastra/commit/8dc7f55900395771da851dc7d78d53ae84fe34ec), [`8379099`](https://github.com/mastra-ai/mastra/commit/8379099fc467af6bef54dd7f80c9bd75bf8bbddf), [`8c0ec25`](https://github.com/mastra-ai/mastra/commit/8c0ec25646c8a7df253ed1e5ff4863a0d3f1316c), [`ff4d9a6`](https://github.com/mastra-ai/mastra/commit/ff4d9a6704fc87b31a380a76ed22736fdedbba5a), [`69821ef`](https://github.com/mastra-ai/mastra/commit/69821ef806482e2c44e2197ac0b050c3fe3a5285), [`1ed5716`](https://github.com/mastra-ai/mastra/commit/1ed5716830867b3774c4a1b43cc0d82935f32b96), [`4186bdd`](https://github.com/mastra-ai/mastra/commit/4186bdd00731305726fa06adba0b076a1d50b49f), [`7aaf973`](https://github.com/mastra-ai/mastra/commit/7aaf973f83fbbe9521f1f9e7a4fd99b8de464617)]:
|
|
84
|
+
- @mastra/core@1.0.0-beta.22
|
|
85
|
+
|
|
3
86
|
## 1.0.0-beta.10
|
|
4
87
|
|
|
5
88
|
### Patch Changes
|
package/dist/docs/README.md
CHANGED
package/dist/docs/SKILL.md
CHANGED
|
@@ -12,6 +12,7 @@ After generating embeddings, you need to store them in a database that supports
|
|
|
12
12
|
import { MongoDBVector } from "@mastra/mongodb";
|
|
13
13
|
|
|
14
14
|
const store = new MongoDBVector({
|
|
15
|
+
id: 'mongodb-vector',
|
|
15
16
|
uri: process.env.MONGODB_URI,
|
|
16
17
|
dbName: process.env.MONGODB_DATABASE,
|
|
17
18
|
});
|
|
@@ -144,6 +145,7 @@ await store.upsert({
|
|
|
144
145
|
import { AstraVector } from "@mastra/astra";
|
|
145
146
|
|
|
146
147
|
const store = new AstraVector({
|
|
148
|
+
id: 'astra-vector',
|
|
147
149
|
token: process.env.ASTRA_DB_TOKEN,
|
|
148
150
|
endpoint: process.env.ASTRA_DB_ENDPOINT,
|
|
149
151
|
keyspace: process.env.ASTRA_DB_KEYSPACE,
|
|
@@ -170,7 +172,7 @@ import { LibSQLVector } from "@mastra/core/vector/libsql";
|
|
|
170
172
|
|
|
171
173
|
const store = new LibSQLVector({
|
|
172
174
|
id: 'libsql-vector',
|
|
173
|
-
|
|
175
|
+
url: process.env.DATABASE_URL,
|
|
174
176
|
authToken: process.env.DATABASE_AUTH_TOKEN, // Optional: for Turso cloud databases
|
|
175
177
|
});
|
|
176
178
|
|
|
@@ -217,6 +219,7 @@ await store.upsert({
|
|
|
217
219
|
import { CloudflareVector } from "@mastra/vectorize";
|
|
218
220
|
|
|
219
221
|
const store = new CloudflareVector({
|
|
222
|
+
id: 'cloudflare-vector',
|
|
220
223
|
accountId: process.env.CF_ACCOUNT_ID,
|
|
221
224
|
apiToken: process.env.CF_API_TOKEN,
|
|
222
225
|
});
|
|
@@ -238,7 +241,7 @@ await store.upsert({
|
|
|
238
241
|
```ts title="vector-store.ts"
|
|
239
242
|
import { OpenSearchVector } from "@mastra/opensearch";
|
|
240
243
|
|
|
241
|
-
const store = new OpenSearchVector({
|
|
244
|
+
const store = new OpenSearchVector({ id: "opensearch", node: process.env.OPENSEARCH_URL });
|
|
242
245
|
|
|
243
246
|
await store.createIndex({
|
|
244
247
|
indexName: "my-collection",
|
|
@@ -259,7 +262,7 @@ await store.upsert({
|
|
|
259
262
|
```ts title="vector-store.ts"
|
|
260
263
|
import { ElasticSearchVector } from "@mastra/elasticsearch";
|
|
261
264
|
|
|
262
|
-
const store = new ElasticSearchVector({ url: process.env.ELASTICSEARCH_URL });
|
|
265
|
+
const store = new ElasticSearchVector({ id: 'elasticsearch-vector', url: process.env.ELASTICSEARCH_URL });
|
|
263
266
|
|
|
264
267
|
await store.createIndex({
|
|
265
268
|
indexName: "my-collection",
|
|
@@ -280,6 +283,7 @@ await store.upsert({
|
|
|
280
283
|
import { CouchbaseVector } from "@mastra/couchbase";
|
|
281
284
|
|
|
282
285
|
const store = new CouchbaseVector({
|
|
286
|
+
id: 'couchbase-vector',
|
|
283
287
|
connectionString: process.env.COUCHBASE_CONNECTION_STRING,
|
|
284
288
|
username: process.env.COUCHBASE_USERNAME,
|
|
285
289
|
password: process.env.COUCHBASE_PASSWORD,
|
|
@@ -331,6 +335,7 @@ For detailed setup instructions and best practices, see the [official LanceDB do
|
|
|
331
335
|
import { S3Vectors } from "@mastra/s3vectors";
|
|
332
336
|
|
|
333
337
|
const store = new S3Vectors({
|
|
338
|
+
id: 's3-vectors',
|
|
334
339
|
vectorBucketName: "my-vector-bucket",
|
|
335
340
|
clientConfig: {
|
|
336
341
|
region: "us-east-1",
|
|
@@ -373,7 +378,7 @@ The dimension size must match the output dimension of your chosen embedding mode
|
|
|
373
378
|
- Cohere embed-multilingual-v3: 1024 dimensions
|
|
374
379
|
- Google text-embedding-004: 768 dimensions (or custom)
|
|
375
380
|
|
|
376
|
-
|
|
381
|
+
> **Note:**
|
|
377
382
|
Index dimensions cannot be changed after creation. To use a different model, delete and recreate the index with the new dimension size.
|
|
378
383
|
|
|
379
384
|
### Naming Rules for Databases
|
|
@@ -537,7 +542,7 @@ The upsert operation:
|
|
|
537
542
|
|
|
538
543
|
Vector stores support rich metadata (any JSON-serializable fields) for filtering and organization. Since metadata is stored with no fixed schema, use consistent field naming to avoid unexpected query results.
|
|
539
544
|
|
|
540
|
-
|
|
545
|
+
> **Note:**
|
|
541
546
|
Metadata is crucial for vector storage - without it, you'd only have numerical embeddings with no way to return the original text or filter results. Always store at least the source text as metadata.
|
|
542
547
|
|
|
543
548
|
```ts
|
package/dist/index.cjs
CHANGED
|
@@ -1605,24 +1605,6 @@ var StoreScoresLance = class extends storage.ScoresStorage {
|
|
|
1605
1605
|
function escapeSql(str) {
|
|
1606
1606
|
return str.replace(/'/g, "''");
|
|
1607
1607
|
}
|
|
1608
|
-
function parseWorkflowRun(row) {
|
|
1609
|
-
let parsedSnapshot = row.snapshot;
|
|
1610
|
-
if (typeof parsedSnapshot === "string") {
|
|
1611
|
-
try {
|
|
1612
|
-
parsedSnapshot = JSON.parse(row.snapshot);
|
|
1613
|
-
} catch (e) {
|
|
1614
|
-
console.warn(`Failed to parse snapshot for workflow ${row.workflow_name}: ${e}`);
|
|
1615
|
-
}
|
|
1616
|
-
}
|
|
1617
|
-
return {
|
|
1618
|
-
workflowName: row.workflow_name,
|
|
1619
|
-
runId: row.run_id,
|
|
1620
|
-
snapshot: parsedSnapshot,
|
|
1621
|
-
createdAt: storage.ensureDate(row.createdAt),
|
|
1622
|
-
updatedAt: storage.ensureDate(row.updatedAt),
|
|
1623
|
-
resourceId: row.resourceId
|
|
1624
|
-
};
|
|
1625
|
-
}
|
|
1626
1608
|
var StoreWorkflowsLance = class extends storage.WorkflowsStorage {
|
|
1627
1609
|
client;
|
|
1628
1610
|
#db;
|
|
@@ -1632,6 +1614,24 @@ var StoreWorkflowsLance = class extends storage.WorkflowsStorage {
|
|
|
1632
1614
|
this.client = client;
|
|
1633
1615
|
this.#db = new LanceDB({ client });
|
|
1634
1616
|
}
|
|
1617
|
+
parseWorkflowRun(row) {
|
|
1618
|
+
let parsedSnapshot = row.snapshot;
|
|
1619
|
+
if (typeof parsedSnapshot === "string") {
|
|
1620
|
+
try {
|
|
1621
|
+
parsedSnapshot = JSON.parse(row.snapshot);
|
|
1622
|
+
} catch (e) {
|
|
1623
|
+
this.logger.warn(`Failed to parse snapshot for workflow ${row.workflow_name}: ${e}`);
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
return {
|
|
1627
|
+
workflowName: row.workflow_name,
|
|
1628
|
+
runId: row.run_id,
|
|
1629
|
+
snapshot: parsedSnapshot,
|
|
1630
|
+
createdAt: storage.ensureDate(row.createdAt),
|
|
1631
|
+
updatedAt: storage.ensureDate(row.updatedAt),
|
|
1632
|
+
resourceId: row.resourceId
|
|
1633
|
+
};
|
|
1634
|
+
}
|
|
1635
1635
|
async init() {
|
|
1636
1636
|
const schema = storage.TABLE_SCHEMAS[storage.TABLE_WORKFLOW_SNAPSHOT];
|
|
1637
1637
|
await this.#db.createTable({ tableName: storage.TABLE_WORKFLOW_SNAPSHOT, schema });
|
|
@@ -1763,7 +1763,7 @@ var StoreWorkflowsLance = class extends storage.WorkflowsStorage {
|
|
|
1763
1763
|
const records = await query.toArray();
|
|
1764
1764
|
if (records.length === 0) return null;
|
|
1765
1765
|
const record = records[0];
|
|
1766
|
-
return parseWorkflowRun(record);
|
|
1766
|
+
return this.parseWorkflowRun(record);
|
|
1767
1767
|
} catch (error$1) {
|
|
1768
1768
|
throw new error.MastraError(
|
|
1769
1769
|
{
|
|
@@ -1840,7 +1840,7 @@ var StoreWorkflowsLance = class extends storage.WorkflowsStorage {
|
|
|
1840
1840
|
}
|
|
1841
1841
|
const records = await query.toArray();
|
|
1842
1842
|
return {
|
|
1843
|
-
runs: records.map((record) => parseWorkflowRun(record)),
|
|
1843
|
+
runs: records.map((record) => this.parseWorkflowRun(record)),
|
|
1844
1844
|
total: total || records.length
|
|
1845
1845
|
};
|
|
1846
1846
|
} catch (error$1) {
|
|
@@ -2330,6 +2330,7 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2330
2330
|
}
|
|
2331
2331
|
async query({
|
|
2332
2332
|
tableName,
|
|
2333
|
+
indexName,
|
|
2333
2334
|
queryVector,
|
|
2334
2335
|
filter,
|
|
2335
2336
|
includeVector = false,
|
|
@@ -2337,12 +2338,13 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2337
2338
|
columns = [],
|
|
2338
2339
|
includeAllColumns = false
|
|
2339
2340
|
}) {
|
|
2341
|
+
const resolvedTableName = tableName ?? indexName;
|
|
2340
2342
|
try {
|
|
2341
2343
|
if (!this.lanceClient) {
|
|
2342
2344
|
throw new Error("LanceDB client not initialized. Use LanceVectorStore.create() to create an instance");
|
|
2343
2345
|
}
|
|
2344
|
-
if (!
|
|
2345
|
-
throw new Error("tableName is required");
|
|
2346
|
+
if (!resolvedTableName) {
|
|
2347
|
+
throw new Error("tableName or indexName is required");
|
|
2346
2348
|
}
|
|
2347
2349
|
if (!queryVector) {
|
|
2348
2350
|
throw new Error("queryVector is required");
|
|
@@ -2353,25 +2355,30 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2353
2355
|
id: storage.createVectorErrorId("LANCE", "QUERY", "INVALID_ARGS"),
|
|
2354
2356
|
domain: error.ErrorDomain.STORAGE,
|
|
2355
2357
|
category: error.ErrorCategory.USER,
|
|
2356
|
-
text:
|
|
2357
|
-
details: { tableName }
|
|
2358
|
+
text: error$1 instanceof Error ? error$1.message : "Invalid query arguments",
|
|
2359
|
+
details: { tableName: resolvedTableName }
|
|
2358
2360
|
},
|
|
2359
2361
|
error$1
|
|
2360
2362
|
);
|
|
2361
2363
|
}
|
|
2362
2364
|
try {
|
|
2363
|
-
const
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2365
|
+
const tables = await this.lanceClient.tableNames();
|
|
2366
|
+
if (!tables.includes(resolvedTableName)) {
|
|
2367
|
+
this.logger.debug(`Table ${resolvedTableName} does not exist. Returning empty results.`);
|
|
2368
|
+
return [];
|
|
2367
2369
|
}
|
|
2370
|
+
const table = await this.lanceClient.openTable(resolvedTableName);
|
|
2368
2371
|
let query = table.search(queryVector);
|
|
2369
2372
|
if (filter && Object.keys(filter).length > 0) {
|
|
2370
2373
|
const whereClause = this.filterTranslator(filter);
|
|
2371
2374
|
this.logger.debug(`Where clause generated: ${whereClause}`);
|
|
2372
2375
|
query = query.where(whereClause);
|
|
2373
2376
|
}
|
|
2374
|
-
if (!includeAllColumns &&
|
|
2377
|
+
if (!includeAllColumns && columns.length > 0) {
|
|
2378
|
+
const selectColumns = [...columns];
|
|
2379
|
+
if (!selectColumns.includes("id")) {
|
|
2380
|
+
selectColumns.push("id");
|
|
2381
|
+
}
|
|
2375
2382
|
query = query.select(selectColumns);
|
|
2376
2383
|
}
|
|
2377
2384
|
query = query.limit(topK);
|
|
@@ -2401,7 +2408,7 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2401
2408
|
id: storage.createVectorErrorId("LANCE", "QUERY", "FAILED"),
|
|
2402
2409
|
domain: error.ErrorDomain.STORAGE,
|
|
2403
2410
|
category: error.ErrorCategory.THIRD_PARTY,
|
|
2404
|
-
details: { tableName, includeVector, columnsCount: columns?.length, includeAllColumns }
|
|
2411
|
+
details: { tableName: resolvedTableName, includeVector, columnsCount: columns?.length, includeAllColumns }
|
|
2405
2412
|
},
|
|
2406
2413
|
error$1
|
|
2407
2414
|
);
|
|
@@ -2436,13 +2443,14 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2436
2443
|
const translator = new LanceFilterTranslator();
|
|
2437
2444
|
return translator.translate(prefixedFilter);
|
|
2438
2445
|
}
|
|
2439
|
-
async upsert({ tableName, vectors, metadata = [], ids = [] }) {
|
|
2446
|
+
async upsert({ tableName, indexName, vectors, metadata = [], ids = [] }) {
|
|
2447
|
+
const resolvedTableName = tableName ?? indexName;
|
|
2440
2448
|
try {
|
|
2441
2449
|
if (!this.lanceClient) {
|
|
2442
2450
|
throw new Error("LanceDB client not initialized. Use LanceVectorStore.create() to create an instance");
|
|
2443
2451
|
}
|
|
2444
|
-
if (!
|
|
2445
|
-
throw new Error("tableName is required");
|
|
2452
|
+
if (!resolvedTableName) {
|
|
2453
|
+
throw new Error("tableName or indexName is required");
|
|
2446
2454
|
}
|
|
2447
2455
|
if (!vectors || !Array.isArray(vectors) || vectors.length === 0) {
|
|
2448
2456
|
throw new Error("vectors array is required and must not be empty");
|
|
@@ -2453,18 +2461,21 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2453
2461
|
id: storage.createVectorErrorId("LANCE", "UPSERT", "INVALID_ARGS"),
|
|
2454
2462
|
domain: error.ErrorDomain.STORAGE,
|
|
2455
2463
|
category: error.ErrorCategory.USER,
|
|
2456
|
-
text:
|
|
2457
|
-
details: { tableName }
|
|
2464
|
+
text: error$1 instanceof Error ? error$1.message : "Invalid upsert arguments",
|
|
2465
|
+
details: { tableName: resolvedTableName }
|
|
2458
2466
|
},
|
|
2459
2467
|
error$1
|
|
2460
2468
|
);
|
|
2461
2469
|
}
|
|
2462
2470
|
try {
|
|
2463
2471
|
const tables = await this.lanceClient.tableNames();
|
|
2464
|
-
|
|
2465
|
-
|
|
2472
|
+
const tableExists = tables.includes(resolvedTableName);
|
|
2473
|
+
let table = null;
|
|
2474
|
+
if (!tableExists) {
|
|
2475
|
+
this.logger.debug(`Table ${resolvedTableName} does not exist. Creating it with the first upsert data.`);
|
|
2476
|
+
} else {
|
|
2477
|
+
table = await this.lanceClient.openTable(resolvedTableName);
|
|
2466
2478
|
}
|
|
2467
|
-
const table = await this.lanceClient.openTable(tableName);
|
|
2468
2479
|
const vectorIds = ids.length === vectors.length ? ids : vectors.map((_, i) => ids[i] || crypto.randomUUID());
|
|
2469
2480
|
const data = vectors.map((vector, i) => {
|
|
2470
2481
|
const id = String(vectorIds[i]);
|
|
@@ -2481,7 +2492,42 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2481
2492
|
}
|
|
2482
2493
|
return rowData;
|
|
2483
2494
|
});
|
|
2484
|
-
|
|
2495
|
+
if (table !== null) {
|
|
2496
|
+
const rowCount = await table.countRows();
|
|
2497
|
+
const schema = await table.schema();
|
|
2498
|
+
const existingColumns = new Set(schema.fields.map((f) => f.name));
|
|
2499
|
+
const dataColumns = new Set(Object.keys(data[0] || {}));
|
|
2500
|
+
const extraColumns = [...dataColumns].filter((col) => !existingColumns.has(col));
|
|
2501
|
+
const missingSchemaColumns = [...existingColumns].filter((col) => !dataColumns.has(col));
|
|
2502
|
+
const hasSchemaMismatch = extraColumns.length > 0 || missingSchemaColumns.length > 0;
|
|
2503
|
+
if (rowCount === 0 && extraColumns.length > 0) {
|
|
2504
|
+
this.logger.warn(
|
|
2505
|
+
`Table ${resolvedTableName} is empty and data has extra columns ${extraColumns.join(", ")}. Recreating with new schema.`
|
|
2506
|
+
);
|
|
2507
|
+
await this.lanceClient.dropTable(resolvedTableName);
|
|
2508
|
+
await this.lanceClient.createTable(resolvedTableName, data);
|
|
2509
|
+
} else if (hasSchemaMismatch) {
|
|
2510
|
+
if (extraColumns.length > 0) {
|
|
2511
|
+
this.logger.warn(
|
|
2512
|
+
`Table ${resolvedTableName} has ${rowCount} rows. Columns ${extraColumns.join(", ")} will be dropped from upsert.`
|
|
2513
|
+
);
|
|
2514
|
+
}
|
|
2515
|
+
const schemaFieldNames = schema.fields.map((f) => f.name);
|
|
2516
|
+
const normalizedData = data.map((row) => {
|
|
2517
|
+
const normalized = {};
|
|
2518
|
+
for (const col of schemaFieldNames) {
|
|
2519
|
+
normalized[col] = col in row ? row[col] : null;
|
|
2520
|
+
}
|
|
2521
|
+
return normalized;
|
|
2522
|
+
});
|
|
2523
|
+
await table.mergeInsert("id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute(normalizedData);
|
|
2524
|
+
} else {
|
|
2525
|
+
await table.mergeInsert("id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute(data);
|
|
2526
|
+
}
|
|
2527
|
+
} else {
|
|
2528
|
+
this.logger.debug(`Creating table ${resolvedTableName} with initial data`);
|
|
2529
|
+
await this.lanceClient.createTable(resolvedTableName, data);
|
|
2530
|
+
}
|
|
2485
2531
|
return vectorIds;
|
|
2486
2532
|
} catch (error$1) {
|
|
2487
2533
|
throw new error.MastraError(
|
|
@@ -2489,7 +2535,12 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2489
2535
|
id: storage.createVectorErrorId("LANCE", "UPSERT", "FAILED"),
|
|
2490
2536
|
domain: error.ErrorDomain.STORAGE,
|
|
2491
2537
|
category: error.ErrorCategory.THIRD_PARTY,
|
|
2492
|
-
details: {
|
|
2538
|
+
details: {
|
|
2539
|
+
tableName: resolvedTableName,
|
|
2540
|
+
vectorCount: vectors.length,
|
|
2541
|
+
metadataCount: metadata.length,
|
|
2542
|
+
idsCount: ids.length
|
|
2543
|
+
}
|
|
2493
2544
|
},
|
|
2494
2545
|
error$1
|
|
2495
2546
|
);
|
|
@@ -2586,7 +2637,17 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2586
2637
|
}
|
|
2587
2638
|
}
|
|
2588
2639
|
/**
|
|
2589
|
-
*
|
|
2640
|
+
* Creates a vector index on a table.
|
|
2641
|
+
*
|
|
2642
|
+
* The behavior of `indexName` depends on whether `tableName` is provided:
|
|
2643
|
+
* - With `tableName`: `indexName` is the column to index (advanced use case)
|
|
2644
|
+
* - Without `tableName`: `indexName` becomes the table name, and 'vector' is used as the column (Memory compatibility)
|
|
2645
|
+
*
|
|
2646
|
+
* @param tableName - Optional table name. If not provided, defaults to indexName.
|
|
2647
|
+
* @param indexName - The index/column name, or table name if tableName is not provided.
|
|
2648
|
+
* @param dimension - Vector dimension size.
|
|
2649
|
+
* @param metric - Distance metric: 'cosine', 'euclidean', or 'dotproduct'.
|
|
2650
|
+
* @param indexConfig - Optional index configuration.
|
|
2590
2651
|
*/
|
|
2591
2652
|
async createIndex({
|
|
2592
2653
|
tableName,
|
|
@@ -2595,13 +2656,12 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2595
2656
|
metric = "cosine",
|
|
2596
2657
|
indexConfig = {}
|
|
2597
2658
|
}) {
|
|
2659
|
+
const resolvedTableName = tableName ?? indexName;
|
|
2660
|
+
const columnToIndex = tableName ? indexName : "vector";
|
|
2598
2661
|
try {
|
|
2599
2662
|
if (!this.lanceClient) {
|
|
2600
2663
|
throw new Error("LanceDB client not initialized. Use LanceVectorStore.create() to create an instance");
|
|
2601
2664
|
}
|
|
2602
|
-
if (!tableName) {
|
|
2603
|
-
throw new Error("tableName is required");
|
|
2604
|
-
}
|
|
2605
2665
|
if (!indexName) {
|
|
2606
2666
|
throw new Error("indexName is required");
|
|
2607
2667
|
}
|
|
@@ -2614,19 +2674,33 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2614
2674
|
id: storage.createVectorErrorId("LANCE", "CREATE_INDEX", "INVALID_ARGS"),
|
|
2615
2675
|
domain: error.ErrorDomain.STORAGE,
|
|
2616
2676
|
category: error.ErrorCategory.USER,
|
|
2617
|
-
details: { tableName:
|
|
2677
|
+
details: { tableName: resolvedTableName, indexName, dimension, metric }
|
|
2618
2678
|
},
|
|
2619
2679
|
err
|
|
2620
2680
|
);
|
|
2621
2681
|
}
|
|
2622
2682
|
try {
|
|
2623
2683
|
const tables = await this.lanceClient.tableNames();
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2684
|
+
let table;
|
|
2685
|
+
if (!tables.includes(resolvedTableName)) {
|
|
2686
|
+
this.logger.debug(
|
|
2687
|
+
`Table ${resolvedTableName} does not exist. Creating empty table with dimension ${dimension}.`
|
|
2627
2688
|
);
|
|
2689
|
+
const initVector = new Array(dimension).fill(0);
|
|
2690
|
+
table = await this.lanceClient.createTable(resolvedTableName, [{ id: "__init__", vector: initVector }]);
|
|
2691
|
+
try {
|
|
2692
|
+
await table.delete("id = '__init__'");
|
|
2693
|
+
} catch (deleteError) {
|
|
2694
|
+
this.logger.warn(
|
|
2695
|
+
`Failed to delete initialization row from ${resolvedTableName}. Subsequent queries may include '__init__' row.`,
|
|
2696
|
+
deleteError
|
|
2697
|
+
);
|
|
2698
|
+
}
|
|
2699
|
+
this.logger.debug(`Table ${resolvedTableName} created. Index creation deferred until data is available.`);
|
|
2700
|
+
return;
|
|
2701
|
+
} else {
|
|
2702
|
+
table = await this.lanceClient.openTable(resolvedTableName);
|
|
2628
2703
|
}
|
|
2629
|
-
const table = await this.lanceClient.openTable(tableName);
|
|
2630
2704
|
let metricType;
|
|
2631
2705
|
if (metric === "euclidean") {
|
|
2632
2706
|
metricType = "l2";
|
|
@@ -2635,8 +2709,15 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2635
2709
|
} else if (metric === "cosine") {
|
|
2636
2710
|
metricType = "cosine";
|
|
2637
2711
|
}
|
|
2712
|
+
const rowCount = await table.countRows();
|
|
2713
|
+
if (rowCount < 256) {
|
|
2714
|
+
this.logger.warn(
|
|
2715
|
+
`Table ${resolvedTableName} has ${rowCount} rows, which is below the 256 row minimum for index creation. Skipping index creation.`
|
|
2716
|
+
);
|
|
2717
|
+
return;
|
|
2718
|
+
}
|
|
2638
2719
|
if (indexConfig.type === "ivfflat") {
|
|
2639
|
-
await table.createIndex(
|
|
2720
|
+
await table.createIndex(columnToIndex, {
|
|
2640
2721
|
config: lancedb.Index.ivfPq({
|
|
2641
2722
|
numPartitions: indexConfig.numPartitions || 128,
|
|
2642
2723
|
numSubVectors: indexConfig.numSubVectors || 16,
|
|
@@ -2645,7 +2726,7 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2645
2726
|
});
|
|
2646
2727
|
} else {
|
|
2647
2728
|
this.logger.debug("Creating HNSW PQ index with config:", indexConfig);
|
|
2648
|
-
await table.createIndex(
|
|
2729
|
+
await table.createIndex(columnToIndex, {
|
|
2649
2730
|
config: lancedb.Index.hnswPq({
|
|
2650
2731
|
m: indexConfig?.hnsw?.m || 16,
|
|
2651
2732
|
efConstruction: indexConfig?.hnsw?.efConstruction || 100,
|
|
@@ -2659,7 +2740,7 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2659
2740
|
id: storage.createVectorErrorId("LANCE", "CREATE_INDEX", "FAILED"),
|
|
2660
2741
|
domain: error.ErrorDomain.STORAGE,
|
|
2661
2742
|
category: error.ErrorCategory.THIRD_PARTY,
|
|
2662
|
-
details: { tableName:
|
|
2743
|
+
details: { tableName: resolvedTableName, indexName, dimension }
|
|
2663
2744
|
},
|
|
2664
2745
|
error$1
|
|
2665
2746
|
);
|
|
@@ -2957,7 +3038,7 @@ var LanceVectorStore = class _LanceVectorStore extends vector.MastraVector {
|
|
|
2957
3038
|
}
|
|
2958
3039
|
return rowData;
|
|
2959
3040
|
});
|
|
2960
|
-
await table.
|
|
3041
|
+
await table.mergeInsert("id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute(updatedRecords);
|
|
2961
3042
|
return;
|
|
2962
3043
|
}
|
|
2963
3044
|
} catch (err) {
|