@mastra/couchbase 0.0.2-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +23 -0
- package/CHANGELOG.md +13 -0
- package/LICENSE.md +46 -0
- package/README.md +267 -0
- package/dist/_tsup-dts-rollup.d.cts +39 -0
- package/dist/_tsup-dts-rollup.d.ts +39 -0
- package/dist/index.cjs +243 -0
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +240 -0
- package/docker-compose.yaml +21 -0
- package/eslint.config.js +6 -0
- package/package.json +46 -0
- package/scripts/start-docker.js +14 -0
- package/scripts/stop-docker.js +7 -0
- package/src/index.ts +1 -0
- package/src/vector/index.integration.test.ts +558 -0
- package/src/vector/index.ts +276 -0
- package/src/vector/index.unit.test.ts +737 -0
- package/tsconfig.json +5 -0
- package/vitest.config.ts +11 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
|
|
2
|
+
> @mastra/couchbase@0.0.2-alpha.0 build /home/runner/work/mastra/mastra/stores/couchbase
|
|
3
|
+
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
|
|
4
|
+
|
|
5
|
+
[34mCLI[39m Building entry: src/index.ts
|
|
6
|
+
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
|
+
[34mCLI[39m tsup v8.4.0
|
|
8
|
+
[34mTSC[39m Build start
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 5560ms
|
|
10
|
+
[34mDTS[39m Build start
|
|
11
|
+
[34mCLI[39m Target: es2022
|
|
12
|
+
Analysis will use the bundled TypeScript version 5.8.3
|
|
13
|
+
[36mWriting package typings: /home/runner/work/mastra/mastra/stores/couchbase/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
|
+
Analysis will use the bundled TypeScript version 5.8.3
|
|
15
|
+
[36mWriting package typings: /home/runner/work/mastra/mastra/stores/couchbase/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 8777ms
|
|
17
|
+
[34mCLI[39m Cleaning output folder
|
|
18
|
+
[34mESM[39m Build start
|
|
19
|
+
[34mCJS[39m Build start
|
|
20
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m8.15 KB[39m
|
|
21
|
+
[32mCJS[39m ⚡️ Build success in 496ms
|
|
22
|
+
[32mESM[39m [1mdist/index.js [22m[32m8.09 KB[39m
|
|
23
|
+
[32mESM[39m ⚡️ Build success in 496ms
|
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# @mastra/couchbase
|
|
2
|
+
|
|
3
|
+
## 0.0.2-alpha.0
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 106a2ab: Added integration for Couchbase Vector Store
|
|
8
|
+
- Updated dependencies [3d2fb5c]
|
|
9
|
+
- Updated dependencies [7eeb2bc]
|
|
10
|
+
- Updated dependencies [8607972]
|
|
11
|
+
- Updated dependencies [7eeb2bc]
|
|
12
|
+
- Updated dependencies [fba031f]
|
|
13
|
+
- @mastra/core@0.9.2-alpha.5
|
package/LICENSE.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Elastic License 2.0 (ELv2)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Mastra AI, Inc.
|
|
4
|
+
|
|
5
|
+
**Acceptance**
|
|
6
|
+
By using the software, you agree to all of the terms and conditions below.
|
|
7
|
+
|
|
8
|
+
**Copyright License**
|
|
9
|
+
The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below
|
|
10
|
+
|
|
11
|
+
**Limitations**
|
|
12
|
+
You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software.
|
|
13
|
+
|
|
14
|
+
You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key.
|
|
15
|
+
|
|
16
|
+
You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law.
|
|
17
|
+
|
|
18
|
+
**Patents**
|
|
19
|
+
The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
|
|
20
|
+
|
|
21
|
+
**Notices**
|
|
22
|
+
You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms.
|
|
23
|
+
|
|
24
|
+
If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software.
|
|
25
|
+
|
|
26
|
+
**No Other Rights**
|
|
27
|
+
These terms do not imply any licenses other than those expressly granted in these terms.
|
|
28
|
+
|
|
29
|
+
**Termination**
|
|
30
|
+
If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently.
|
|
31
|
+
|
|
32
|
+
**No Liability**
|
|
33
|
+
As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.
|
|
34
|
+
|
|
35
|
+
**Definitions**
|
|
36
|
+
The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it.
|
|
37
|
+
|
|
38
|
+
_you_ refers to the individual or entity agreeing to these terms.
|
|
39
|
+
|
|
40
|
+
_your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
|
|
41
|
+
|
|
42
|
+
_your licenses_ are all the licenses granted to you for the software under these terms.
|
|
43
|
+
|
|
44
|
+
_use_ means anything you do with the software requiring one of your licenses.
|
|
45
|
+
|
|
46
|
+
_trademark_ means trademarks, service marks, and similar rights.
|
package/README.md
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# @mastra/couchbase
|
|
2
|
+
|
|
3
|
+
A Mastra vector store implementation for Couchbase, enabling powerful vector similarity search capabilities using the official Couchbase Node.js SDK (v4+). Leverages Couchbase Server's built-in Vector Search feature (available in version 7.6.4+).
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🚀 Vector similarity search powered by Couchbase Search Service.
|
|
8
|
+
- 📐 Supports Cosine, Euclidean (L2 Norm), and Dot Product distance metrics.
|
|
9
|
+
- 📄 Stores vectors and associated metadata within Couchbase documents in a specified Collection.
|
|
10
|
+
- 🔧 Manages Couchbase Search Indexes specifically configured for vector search (Create, List, Describe, Delete).
|
|
11
|
+
- 🆔 Automatic UUID generation for documents if IDs are not provided during upsert.
|
|
12
|
+
- ☁️ Compatible with both self-hosted Couchbase Server (7.6.4+) and Couchbase Capella.
|
|
13
|
+
- ⚙️ Uses the official Couchbase Node.js SDK v4+.
|
|
14
|
+
- 📈 Built-in telemetry support for tracing operations via `@mastra/core`.
|
|
15
|
+
|
|
16
|
+
## Prerequisites
|
|
17
|
+
|
|
18
|
+
- Couchbase Server (Version 7.6.4 or higher) or Couchbase Capella cluster with the **Search Service** enabled.
|
|
19
|
+
- A configured **Bucket**, **Scope**, and **Collection** within your Couchbase cluster where vectors and metadata will be stored.
|
|
20
|
+
- Couchbase user credentials (`username`, `password`) with permissions to ([Docs](https://docs.couchbase.com/cloud/get-started/connect.html#prerequisites)):
|
|
21
|
+
- Connect to the cluster.
|
|
22
|
+
- Read/write documents in the specified Collection (`kv` role usually covers this).
|
|
23
|
+
- Manage Search Indexes (`search_admin` role on the relevant bucket/scope).
|
|
24
|
+
- Node.js (v18+ recommended).
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npm install @mastra/couchbase
|
|
30
|
+
# or using pnpm
|
|
31
|
+
pnpm add @mastra/couchbase
|
|
32
|
+
# or using yarn
|
|
33
|
+
yarn add @mastra/couchbase
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Getting Started: A Quick Tutorial
|
|
37
|
+
|
|
38
|
+
Let's set up `@mastra/couchbase` to store and search vectors in your Couchbase cluster.
|
|
39
|
+
|
|
40
|
+
**Step 1: Connect to Your Cluster**
|
|
41
|
+
|
|
42
|
+
Instantiate `CouchbaseVector` with your cluster details.
|
|
43
|
+
|
|
44
|
+
```typescript
|
|
45
|
+
import { CouchbaseVector } from '@mastra/couchbase';
|
|
46
|
+
|
|
47
|
+
const connectionString = 'couchbases://your_cluster_host?ssl=no_verify'; // Use couchbases:// for Capella/TLS, couchbase:// for local/non-TLS
|
|
48
|
+
const username = 'your_couchbase_user';
|
|
49
|
+
const password = 'your_couchbase_password';
|
|
50
|
+
const bucketName = 'your_vector_bucket';
|
|
51
|
+
const scopeName = '_default'; // Or your custom scope name
|
|
52
|
+
const collectionName = 'vector_data'; // Or your custom collection name
|
|
53
|
+
|
|
54
|
+
const vectorStore = new CouchbaseVector(connectionString, username, password, bucketName, scopeName, collectionName);
|
|
55
|
+
|
|
56
|
+
console.log('CouchbaseVector instance created. Connecting...');
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
_Note_: The actual connection to Couchbase happens lazily upon the first operation.
|
|
60
|
+
|
|
61
|
+
**Step 2: Create a Vector Search Index**
|
|
62
|
+
|
|
63
|
+
Define and create a Search Index specifically for vector search on your collection.
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
const indexName = 'my_vector_search_index';
|
|
67
|
+
const vectorDimension = 1536; // Example: OpenAI embedding dimension
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
await vectorStore.createIndex({
|
|
71
|
+
indexName: indexName,
|
|
72
|
+
dimension: vectorDimension,
|
|
73
|
+
metric: 'cosine', // Or 'euclidean', 'dotproduct'
|
|
74
|
+
});
|
|
75
|
+
console.log(`Search index '${indexName}' created or updated successfully.`);
|
|
76
|
+
} catch (error) {
|
|
77
|
+
console.error(`Failed to create index '${indexName}':`, error);
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
_Note_: Index creation in Couchbase is asynchronous. It might take a short while for the index to become fully built and queryable.
|
|
82
|
+
|
|
83
|
+
_Best practice_: Implement a delay or polling mechanism to ensure the index is ready using simple delay approach (`await new Promise(resolve => setTimeout(resolve, 2000));`) or implement a more robust solution that polls the index status
|
|
84
|
+
|
|
85
|
+
**Step 3: Add Your Vectors (Upsert Documents)**
|
|
86
|
+
|
|
87
|
+
Store your vectors and metadata as documents in the designated Couchbase collection.
|
|
88
|
+
|
|
89
|
+
```typescript
|
|
90
|
+
const vectors = [
|
|
91
|
+
Array(vectorDimension).fill(0.1), // Replace with your actual vectors
|
|
92
|
+
Array(vectorDimension).fill(0.2),
|
|
93
|
+
];
|
|
94
|
+
const metadata = [
|
|
95
|
+
{ source: 'doc1.txt', page: 1, category: 'finance' },
|
|
96
|
+
{ source: 'doc2.pdf', page: 5, text: 'This is the text content.', category: 'tech' }, // Example with text
|
|
97
|
+
];
|
|
98
|
+
|
|
99
|
+
try {
|
|
100
|
+
// IDs will be auto-generated UUIDs if not provided
|
|
101
|
+
const ids = await vectorStore.upsert({
|
|
102
|
+
indexName: indexName, // Required for dimension validation if tracked
|
|
103
|
+
vectors: vectors,
|
|
104
|
+
metadata: metadata,
|
|
105
|
+
// ids: ['custom_id_1', 'custom_id_2'] // Optionally provide your own IDs
|
|
106
|
+
});
|
|
107
|
+
console.log('Upserted documents with IDs:', ids);
|
|
108
|
+
} catch (error) {
|
|
109
|
+
console.error('Failed to upsert vectors:', error);
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
_Note_: For large vector batches, Couchbase may need time to process and index all documents. Consider implementing appropriate waiting periods before querying newly inserted vectors like a simple delay (`await new Promise(resolve => setTimeout(resolve, 1000));`) for smaller batches
|
|
114
|
+
|
|
115
|
+
Document structure in Couchbase will resemble:
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
Document ID: <generated_or_provided_id>
|
|
119
|
+
{
|
|
120
|
+
"embedding": [0.1, ...],
|
|
121
|
+
"metadata": { "source": "doc1.txt", "page": 1, "category": "finance" }
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
Document ID: <generated_or_provided_id>
|
|
127
|
+
{
|
|
128
|
+
"embedding": [0.2, ...],
|
|
129
|
+
"metadata": { "source": "doc2.pdf", "page": 5, "text": "...", "category": "tech" },
|
|
130
|
+
"content": "This is the text content." // 'content' field added if metadata.text exists
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**Step 4: Find Similar Vectors (Query the Index)**
|
|
135
|
+
|
|
136
|
+
Use the Search Index to find documents with vectors similar to your query vector.
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
const queryVector = Array(vectorDimension).fill(0.15); // Your query vector
|
|
140
|
+
const k = 5; // Number of nearest neighbors to retrieve
|
|
141
|
+
try {
|
|
142
|
+
const results = await vectorStore.query({
|
|
143
|
+
indexName: indexName,
|
|
144
|
+
queryVector: queryVector,
|
|
145
|
+
topK: k,
|
|
146
|
+
});
|
|
147
|
+
console.log(`Found ${results.length} similar results:`, results);
|
|
148
|
+
} catch (error) {
|
|
149
|
+
console.error('Failed to query vectors:', error);
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
_Note_: Metadata `filter` and `includeVector` not yet supported in `query()`
|
|
154
|
+
|
|
155
|
+
Results format:
|
|
156
|
+
|
|
157
|
+
```
|
|
158
|
+
[
|
|
159
|
+
{
|
|
160
|
+
id: string, // Document ID
|
|
161
|
+
score: number, // Similarity score (higher is better for cosine/dotproduct, lower for euclidean)
|
|
162
|
+
metadata: Record<string, any> // Fields stored in the index (typically includes 'metadata', 'content')
|
|
163
|
+
},
|
|
164
|
+
// ... more results
|
|
165
|
+
]
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
**Step 5: Manage Indexes**
|
|
169
|
+
|
|
170
|
+
List, inspect, or delete your vector search indexes.
|
|
171
|
+
|
|
172
|
+
```typescript
|
|
173
|
+
try {
|
|
174
|
+
// List all Search Indexes in the cluster (may include non-vector indexes)
|
|
175
|
+
const indexes = await vectorStore.listIndexes();
|
|
176
|
+
console.log('Available search indexes:', indexes);
|
|
177
|
+
// Get details about our specific vector index
|
|
178
|
+
for (const indexName of indexes) {
|
|
179
|
+
const stats = await vectorStore.describeIndex(indexName);
|
|
180
|
+
console.log(`Stats for index '${indexName}':`, stats);
|
|
181
|
+
}
|
|
182
|
+
// Delete the index when no longer needed
|
|
183
|
+
await vectorStore.deleteIndex(indexName);
|
|
184
|
+
console.log(`Search index '${indexName}' deleted.`);
|
|
185
|
+
} catch (error) {
|
|
186
|
+
console.error('Failed to manage indexes:', error);
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
_Note_: Deleting Index does NOT delete the vectors in the associated Couchbase Collection
|
|
191
|
+
|
|
192
|
+
## Advanced Couchbase Vector Usage
|
|
193
|
+
|
|
194
|
+
- **Distance Metrics Mapping:**
|
|
195
|
+
- The `metric` parameter in `createIndex` and `describeIndex` uses Mastra terms. These map to Couchbase index definitions as follows:
|
|
196
|
+
- `cosine` → `cosine`
|
|
197
|
+
- `euclidean` → `l2_norm`
|
|
198
|
+
- `dotproduct` → `dot_product`
|
|
199
|
+
- **Index Definition Details:**
|
|
200
|
+
- The `createIndex` method constructs a Couchbase Search Index definition tailored for vector search. It indexes the `embedding` field (as type `vector`) and the `content` field (as type `text`), targeting documents within the specified `scopeName.collectionName`. It enables `store` and `docvalues` for these fields. For fine-grained control over the index definition (e.g., different analyzers, type mappings), you would need to use the Couchbase SDK or UI directly.
|
|
201
|
+
- **Document Structure:**
|
|
202
|
+
- Vectors are stored in the `embedding` field.
|
|
203
|
+
- Metadata is stored in the `metadata` field.
|
|
204
|
+
- If `metadata.text` exists, it's copied to the `content` field.
|
|
205
|
+
- The `query` results currently return stored fields like `metadata` and `content` in the `metadata` property of the result object, but **not** the `embedding` field itself.
|
|
206
|
+
|
|
207
|
+
## API Reference (`CouchbaseVector` Methods)
|
|
208
|
+
|
|
209
|
+
- `constructor(cnn_string, username, password, bucketName, scopeName, collectionName)`: Creates a new instance and prepares the connection promise.
|
|
210
|
+
- `getCollection()`: (Primarily internal) Establishes connection lazily and gets the Couchbase `Collection` object.
|
|
211
|
+
- `createIndex({ indexName, dimension, metric? })`: Creates or updates a Couchbase Search Index configured for vector search on the collection.
|
|
212
|
+
- `upsert({ indexName, vectors, metadata?, ids? })`: Upserts documents containing vectors and metadata into the Couchbase collection. Returns the document IDs used.
|
|
213
|
+
- `query({ indexName, queryVector, topK?, filter?, includeVector? })`: Queries the specified Search Index for similar vectors using Couchbase Vector Search. **Note:** `filter` and `includeVector` options are **not currently supported**.
|
|
214
|
+
- `listIndexes()`: Lists the names of all Search Indexes in the cluster. Returns fully qualified names (e.g., `bucket.scope.index`).
|
|
215
|
+
- `describeIndex(indexName)`: Gets the configured dimension, metric (Mastra name), and document count (currently returns -1) for a specific Search Index (using its short name).
|
|
216
|
+
- `deleteIndex(indexName)`: Deletes a Search Index (using its short name).
|
|
217
|
+
|
|
218
|
+
## Configuration Details
|
|
219
|
+
|
|
220
|
+
- **Required Constructor Parameters:**
|
|
221
|
+
- `cnn_string`: Couchbase connection string (e.g., `couchbases://host?ssl=no_verify`, `couchbase://localhost`). See [Couchbase SDK Docs](https://docs.couchbase.com/nodejs-sdk/current/hello-world/connect.html) for all options.
|
|
222
|
+
- `username`: Couchbase user with necessary permissions (see Prerequisites).
|
|
223
|
+
- `password`: Password for the Couchbase user.
|
|
224
|
+
- `bucketName`: Name of the target Couchbase Bucket.
|
|
225
|
+
- `scopeName`: Name of the target Scope within the Bucket.
|
|
226
|
+
- `collectionName`: Name of the target Collection within the Scope.
|
|
227
|
+
- **Internal Connection Profile:** The library internally uses the `wanDevelopment` configuration profile when connecting via the Couchbase SDK. This profile adjusts certain timeouts suitable for development and some cloud environments. For production tuning, consider modifying the library or managing the SDK connection externally.
|
|
228
|
+
|
|
229
|
+
## Notes & Considerations
|
|
230
|
+
|
|
231
|
+
- **Couchbase Version:** This integration requires **Couchbase Server 7.6.4+** or a compatible Couchbase Capella cluster with the **Search Service enabled**.
|
|
232
|
+
- **Index Creation:** The `createIndex` method defines and creates/updates a Couchbase Search index configured for vector search. Index creation in Couchbase is asynchronous; allow a short time after creation before querying, especially on larger datasets.
|
|
233
|
+
- **Data Storage:** Vectors and metadata are stored together as fields within standard Couchbase documents in the specified Collection.
|
|
234
|
+
- The default field name for the vector embedding is `"embedding"`.
|
|
235
|
+
- The default field name for metadata is `"metadata"`.
|
|
236
|
+
- If `metadata` contains a `text` property, its value is also copied to a top-level `"content"` field in the document, which is indexed by the Search index created by this library.
|
|
237
|
+
- **Upsert Independence:** The `upsert` operation adds/modifies documents directly in the Collection. It **does not depend on the Search index** existing at the time of upsert. You can insert data before or after creating the index. Couchbase allows multiple Search indexes over the same Collection data.
|
|
238
|
+
- **Dimension Validation:**
|
|
239
|
+
- This library _attempts_ to track the dimension specified during the last `createIndex` call within the same `CouchbaseVector` instance. If tracked, it performs a basic length check during `upsert`.
|
|
240
|
+
- However, Couchbase itself **does not enforce vector dimensions at data ingest time**. Upserting a vector with a dimension different from what an index expects **will not cause an error during `upsert`**. Errors related to dimension mismatches will typically occur only during the `query` operation against that specific index.
|
|
241
|
+
- **Asynchronous Operations & Consistency:** Be mindful of the asynchronous nature of index building and potential replication delays in Couchbase, especially in multi-node clusters. Add appropriate checks or delays in your application logic if immediate consistency after writes is required for subsequent queries.
|
|
242
|
+
- **Index Creation Delays:** After creating a vector search index, allow sufficient time (typically 1-5 seconds for small datasets, longer for larger ones) before querying against it. The delay needed depends on data volume, cluster resources, and replication settings.
|
|
243
|
+
- **Vector Insertion Processing:** When upserting large batches of vectors, the documents may not be immediately queryable. Consider implementing appropriate wait times or retry mechanisms when performing queries immediately after bulk inserts.
|
|
244
|
+
- **Production Considerations:** For production environments, implement a more robust polling mechanism to check index status rather than fixed timeouts.
|
|
245
|
+
- **Current Limitations:**
|
|
246
|
+
- **Metadata Filtering:** The `filter` parameter in the `query` method is **not yet supported** by this library. Filtering must be done client-side after retrieving results or by using the Couchbase SDK's Search capabilities directly for more complex queries.
|
|
247
|
+
- **Returning Vectors:** The `includeVector: true` option in the `query` method is **not yet supported**. To retrieve the vector embedding, you must fetch the full document using its ID (returned in the query results) via the Couchbase SDK's Key-Value operations (`collection.get(id)`).
|
|
248
|
+
- **Index Count:** The `describeIndex` method currently **returns -1 for the count** of indexed documents. Use Couchbase tools (UI, CLI, SQL++ query on the collection, Search API) for accurate index statistics.
|
|
249
|
+
|
|
250
|
+
## Related Links
|
|
251
|
+
|
|
252
|
+
- [Couchbase Vector Search Documentation](https://docs.couchbase.com/cloud/vector-search/vector-search.html)
|
|
253
|
+
- [Couchbase Node.js SDK Documentation](https://docs.couchbase.com/nodejs-sdk/current/hello-world/start-using-sdk.html)
|
|
254
|
+
- [Couchbase Query Language (SQL++) for working with documents](https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/index.html)
|
|
255
|
+
- [Couchbase Search Service API / Index Definition](https://docs.couchbase.com/cloud/search/search-index-params.html)
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
## 📢 Support Policy
|
|
260
|
+
|
|
261
|
+
We truly appreciate your interest in this project!
|
|
262
|
+
This project is **community-maintained**, which means it's **not officially supported** by our support team.
|
|
263
|
+
|
|
264
|
+
If you need help, have found a bug, or want to contribute improvements, the best place to do that is right here — by [opening a GitHub issue](https://github.com/mastra-ai/mastra/issues) (Update this link to your project's issue tracker!).
|
|
265
|
+
Our support portal is unable to assist with requests related to this project, so we kindly ask that all inquiries stay within GitHub.
|
|
266
|
+
|
|
267
|
+
Your collaboration helps us all move forward together — thank you!
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { Collection } from 'couchbase';
|
|
2
|
+
import type { CreateIndexParams } from '@mastra/core/vector';
|
|
3
|
+
import type { IndexStats } from '@mastra/core/vector';
|
|
4
|
+
import { MastraVector } from '@mastra/core/vector';
|
|
5
|
+
import type { QueryResult } from '@mastra/core/vector';
|
|
6
|
+
import type { QueryVectorParams } from '@mastra/core/vector';
|
|
7
|
+
import type { UpsertVectorParams } from '@mastra/core/vector';
|
|
8
|
+
|
|
9
|
+
declare type CouchbaseMetric = 'cosine' | 'l2_norm' | 'dot_product';
|
|
10
|
+
|
|
11
|
+
declare class CouchbaseVector extends MastraVector {
|
|
12
|
+
private clusterPromise;
|
|
13
|
+
private cluster;
|
|
14
|
+
private bucketName;
|
|
15
|
+
private collectionName;
|
|
16
|
+
private scopeName;
|
|
17
|
+
private collection;
|
|
18
|
+
private bucket;
|
|
19
|
+
private scope;
|
|
20
|
+
private vector_dimension;
|
|
21
|
+
constructor(cnn_string: string, username: string, password: string, bucketName: string, scopeName: string, collectionName: string);
|
|
22
|
+
getCollection(): Promise<Collection>;
|
|
23
|
+
createIndex(params: CreateIndexParams): Promise<void>;
|
|
24
|
+
upsert(params: UpsertVectorParams): Promise<string[]>;
|
|
25
|
+
query(params: QueryVectorParams): Promise<QueryResult[]>;
|
|
26
|
+
listIndexes(): Promise<string[]>;
|
|
27
|
+
describeIndex(indexName: string): Promise<IndexStats>;
|
|
28
|
+
deleteIndex(indexName: string): Promise<void>;
|
|
29
|
+
}
|
|
30
|
+
export { CouchbaseVector }
|
|
31
|
+
export { CouchbaseVector as CouchbaseVector_alias_1 }
|
|
32
|
+
|
|
33
|
+
declare const DISTANCE_MAPPING: Record<MastraMetric, CouchbaseMetric>;
|
|
34
|
+
export { DISTANCE_MAPPING }
|
|
35
|
+
export { DISTANCE_MAPPING as DISTANCE_MAPPING_alias_1 }
|
|
36
|
+
|
|
37
|
+
declare type MastraMetric = 'cosine' | 'euclidean' | 'dotproduct';
|
|
38
|
+
|
|
39
|
+
export { }
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { Collection } from 'couchbase';
|
|
2
|
+
import type { CreateIndexParams } from '@mastra/core/vector';
|
|
3
|
+
import type { IndexStats } from '@mastra/core/vector';
|
|
4
|
+
import { MastraVector } from '@mastra/core/vector';
|
|
5
|
+
import type { QueryResult } from '@mastra/core/vector';
|
|
6
|
+
import type { QueryVectorParams } from '@mastra/core/vector';
|
|
7
|
+
import type { UpsertVectorParams } from '@mastra/core/vector';
|
|
8
|
+
|
|
9
|
+
declare type CouchbaseMetric = 'cosine' | 'l2_norm' | 'dot_product';
|
|
10
|
+
|
|
11
|
+
declare class CouchbaseVector extends MastraVector {
|
|
12
|
+
private clusterPromise;
|
|
13
|
+
private cluster;
|
|
14
|
+
private bucketName;
|
|
15
|
+
private collectionName;
|
|
16
|
+
private scopeName;
|
|
17
|
+
private collection;
|
|
18
|
+
private bucket;
|
|
19
|
+
private scope;
|
|
20
|
+
private vector_dimension;
|
|
21
|
+
constructor(cnn_string: string, username: string, password: string, bucketName: string, scopeName: string, collectionName: string);
|
|
22
|
+
getCollection(): Promise<Collection>;
|
|
23
|
+
createIndex(params: CreateIndexParams): Promise<void>;
|
|
24
|
+
upsert(params: UpsertVectorParams): Promise<string[]>;
|
|
25
|
+
query(params: QueryVectorParams): Promise<QueryResult[]>;
|
|
26
|
+
listIndexes(): Promise<string[]>;
|
|
27
|
+
describeIndex(indexName: string): Promise<IndexStats>;
|
|
28
|
+
deleteIndex(indexName: string): Promise<void>;
|
|
29
|
+
}
|
|
30
|
+
export { CouchbaseVector }
|
|
31
|
+
export { CouchbaseVector as CouchbaseVector_alias_1 }
|
|
32
|
+
|
|
33
|
+
declare const DISTANCE_MAPPING: Record<MastraMetric, CouchbaseMetric>;
|
|
34
|
+
export { DISTANCE_MAPPING }
|
|
35
|
+
export { DISTANCE_MAPPING as DISTANCE_MAPPING_alias_1 }
|
|
36
|
+
|
|
37
|
+
declare type MastraMetric = 'cosine' | 'euclidean' | 'dotproduct';
|
|
38
|
+
|
|
39
|
+
export { }
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var vector = require('@mastra/core/vector');
|
|
4
|
+
var couchbase = require('couchbase');
|
|
5
|
+
|
|
6
|
+
// src/vector/index.ts
|
|
7
|
+
var DISTANCE_MAPPING = {
|
|
8
|
+
cosine: "cosine",
|
|
9
|
+
euclidean: "l2_norm",
|
|
10
|
+
dotproduct: "dot_product"
|
|
11
|
+
};
|
|
12
|
+
var CouchbaseVector = class extends vector.MastraVector {
|
|
13
|
+
clusterPromise;
|
|
14
|
+
cluster;
|
|
15
|
+
bucketName;
|
|
16
|
+
collectionName;
|
|
17
|
+
scopeName;
|
|
18
|
+
collection;
|
|
19
|
+
bucket;
|
|
20
|
+
scope;
|
|
21
|
+
vector_dimension;
|
|
22
|
+
constructor(cnn_string, username, password, bucketName, scopeName, collectionName) {
|
|
23
|
+
super();
|
|
24
|
+
const baseClusterPromise = couchbase.connect(cnn_string, {
|
|
25
|
+
username,
|
|
26
|
+
password,
|
|
27
|
+
configProfile: "wanDevelopment"
|
|
28
|
+
});
|
|
29
|
+
const telemetry = this.__getTelemetry();
|
|
30
|
+
this.clusterPromise = telemetry?.traceClass(baseClusterPromise, {
|
|
31
|
+
spanNamePrefix: "couchbase-vector",
|
|
32
|
+
attributes: {
|
|
33
|
+
"vector.type": "couchbase"
|
|
34
|
+
}
|
|
35
|
+
}) ?? baseClusterPromise;
|
|
36
|
+
this.cluster = null;
|
|
37
|
+
this.bucketName = bucketName;
|
|
38
|
+
this.collectionName = collectionName;
|
|
39
|
+
this.scopeName = scopeName;
|
|
40
|
+
this.collection = null;
|
|
41
|
+
this.bucket = null;
|
|
42
|
+
this.scope = null;
|
|
43
|
+
this.vector_dimension = null;
|
|
44
|
+
}
|
|
45
|
+
async getCollection() {
|
|
46
|
+
if (!this.cluster) {
|
|
47
|
+
this.cluster = await this.clusterPromise;
|
|
48
|
+
}
|
|
49
|
+
if (!this.collection) {
|
|
50
|
+
this.bucket = this.cluster.bucket(this.bucketName);
|
|
51
|
+
this.scope = this.bucket.scope(this.scopeName);
|
|
52
|
+
this.collection = this.scope.collection(this.collectionName);
|
|
53
|
+
}
|
|
54
|
+
return this.collection;
|
|
55
|
+
}
|
|
56
|
+
async createIndex(params) {
|
|
57
|
+
const { indexName, dimension, metric = "dotproduct" } = params;
|
|
58
|
+
await this.getCollection();
|
|
59
|
+
if (!Number.isInteger(dimension) || dimension <= 0) {
|
|
60
|
+
throw new Error("Dimension must be a positive integer");
|
|
61
|
+
}
|
|
62
|
+
await this.scope.searchIndexes().upsertIndex({
|
|
63
|
+
name: indexName,
|
|
64
|
+
sourceName: this.bucketName,
|
|
65
|
+
type: "fulltext-index",
|
|
66
|
+
params: {
|
|
67
|
+
doc_config: {
|
|
68
|
+
docid_prefix_delim: "",
|
|
69
|
+
docid_regexp: "",
|
|
70
|
+
mode: "scope.collection.type_field",
|
|
71
|
+
type_field: "type"
|
|
72
|
+
},
|
|
73
|
+
mapping: {
|
|
74
|
+
default_analyzer: "standard",
|
|
75
|
+
default_datetime_parser: "dateTimeOptional",
|
|
76
|
+
default_field: "_all",
|
|
77
|
+
default_mapping: {
|
|
78
|
+
dynamic: true,
|
|
79
|
+
enabled: false
|
|
80
|
+
},
|
|
81
|
+
default_type: "_default",
|
|
82
|
+
docvalues_dynamic: true,
|
|
83
|
+
// [Doc](https://docs.couchbase.com/server/current/search/search-index-params.html#params) mentions this attribute is required for vector search to return the indexed field
|
|
84
|
+
index_dynamic: true,
|
|
85
|
+
store_dynamic: true,
|
|
86
|
+
// [Doc](https://docs.couchbase.com/server/current/search/search-index-params.html#params) mentions this attribute is required for vector search to return the indexed field
|
|
87
|
+
type_field: "_type",
|
|
88
|
+
types: {
|
|
89
|
+
[`${this.scopeName}.${this.collectionName}`]: {
|
|
90
|
+
dynamic: true,
|
|
91
|
+
enabled: true,
|
|
92
|
+
properties: {
|
|
93
|
+
embedding: {
|
|
94
|
+
enabled: true,
|
|
95
|
+
fields: [
|
|
96
|
+
{
|
|
97
|
+
dims: dimension,
|
|
98
|
+
index: true,
|
|
99
|
+
name: "embedding",
|
|
100
|
+
similarity: DISTANCE_MAPPING[metric],
|
|
101
|
+
type: "vector",
|
|
102
|
+
vector_index_optimized_for: "recall",
|
|
103
|
+
store: true,
|
|
104
|
+
// CHANGED due to https://docs.couchbase.com/server/current/search/search-index-params.html#fields
|
|
105
|
+
docvalues: true,
|
|
106
|
+
// CHANGED due to https://docs.couchbase.com/server/current/search/search-index-params.html#fields
|
|
107
|
+
include_term_vectors: true
|
|
108
|
+
// CHANGED due to https://docs.couchbase.com/server/current/search/search-index-params.html#fields
|
|
109
|
+
}
|
|
110
|
+
]
|
|
111
|
+
},
|
|
112
|
+
content: {
|
|
113
|
+
enabled: true,
|
|
114
|
+
fields: [
|
|
115
|
+
{
|
|
116
|
+
index: true,
|
|
117
|
+
name: "content",
|
|
118
|
+
store: true,
|
|
119
|
+
type: "text"
|
|
120
|
+
}
|
|
121
|
+
]
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
store: {
|
|
128
|
+
indexType: "scorch",
|
|
129
|
+
segmentVersion: 16
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
sourceUuid: "",
|
|
133
|
+
sourceParams: {},
|
|
134
|
+
sourceType: "gocbcore",
|
|
135
|
+
planParams: {
|
|
136
|
+
maxPartitionsPerPIndex: 64,
|
|
137
|
+
indexPartitions: 16,
|
|
138
|
+
numReplicas: 0
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
this.vector_dimension = dimension;
|
|
142
|
+
}
|
|
143
|
+
async upsert(params) {
|
|
144
|
+
const { vectors, metadata, ids } = params;
|
|
145
|
+
await this.getCollection();
|
|
146
|
+
if (!vectors || vectors.length === 0) {
|
|
147
|
+
throw new Error("No vectors provided");
|
|
148
|
+
}
|
|
149
|
+
if (this.vector_dimension) {
|
|
150
|
+
for (const vector of vectors) {
|
|
151
|
+
if (!vector || this.vector_dimension !== vector.length) {
|
|
152
|
+
throw new Error("Vector dimension mismatch");
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
const pointIds = ids || vectors.map(() => crypto.randomUUID());
|
|
157
|
+
const records = vectors.map((vector, i) => {
|
|
158
|
+
const metadataObj = metadata?.[i] || {};
|
|
159
|
+
const record = {
|
|
160
|
+
embedding: vector,
|
|
161
|
+
metadata: metadataObj
|
|
162
|
+
};
|
|
163
|
+
if (metadataObj.text) {
|
|
164
|
+
record.content = metadataObj.text;
|
|
165
|
+
}
|
|
166
|
+
return record;
|
|
167
|
+
});
|
|
168
|
+
const allPromises = [];
|
|
169
|
+
for (let i = 0; i < records.length; i++) {
|
|
170
|
+
allPromises.push(this.collection.upsert(pointIds[i], records[i]));
|
|
171
|
+
}
|
|
172
|
+
await Promise.all(allPromises);
|
|
173
|
+
return pointIds;
|
|
174
|
+
}
|
|
175
|
+
async query(params) {
|
|
176
|
+
const { indexName, queryVector, topK = 10, includeVector = false } = params;
|
|
177
|
+
await this.getCollection();
|
|
178
|
+
const index_stats = await this.describeIndex(indexName);
|
|
179
|
+
if (queryVector.length !== index_stats.dimension) {
|
|
180
|
+
throw new Error(`Query vector dimension mismatch. Expected ${index_stats.dimension}, got ${queryVector.length}`);
|
|
181
|
+
}
|
|
182
|
+
let request = couchbase.SearchRequest.create(
|
|
183
|
+
couchbase.VectorSearch.fromVectorQuery(couchbase.VectorQuery.create("embedding", queryVector).numCandidates(topK))
|
|
184
|
+
);
|
|
185
|
+
const results = await this.scope.search(indexName, request, {
|
|
186
|
+
fields: ["*"]
|
|
187
|
+
});
|
|
188
|
+
if (includeVector) {
|
|
189
|
+
throw new Error("Including vectors in search results is not yet supported by the Couchbase vector store");
|
|
190
|
+
}
|
|
191
|
+
const output = [];
|
|
192
|
+
for (const match of results.rows) {
|
|
193
|
+
const cleanedMetadata = {};
|
|
194
|
+
const fields = match.fields || {};
|
|
195
|
+
for (const key in fields) {
|
|
196
|
+
if (Object.prototype.hasOwnProperty.call(fields, key)) {
|
|
197
|
+
const newKey = key.startsWith("metadata.") ? key.substring("metadata.".length) : key;
|
|
198
|
+
cleanedMetadata[newKey] = fields[key];
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
output.push({
|
|
202
|
+
id: match.id,
|
|
203
|
+
score: match.score || 0,
|
|
204
|
+
metadata: cleanedMetadata
|
|
205
|
+
// Use the cleaned metadata object
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
return output;
|
|
209
|
+
}
|
|
210
|
+
async listIndexes() {
|
|
211
|
+
await this.getCollection();
|
|
212
|
+
const indexes = await this.scope.searchIndexes().getAllIndexes();
|
|
213
|
+
return indexes?.map((index) => index.name) || [];
|
|
214
|
+
}
|
|
215
|
+
async describeIndex(indexName) {
|
|
216
|
+
await this.getCollection();
|
|
217
|
+
if (!(await this.listIndexes()).includes(indexName)) {
|
|
218
|
+
throw new Error(`Index ${indexName} does not exist`);
|
|
219
|
+
}
|
|
220
|
+
const index = await this.scope.searchIndexes().getIndex(indexName);
|
|
221
|
+
const dimensions = index.params.mapping?.types?.[`${this.scopeName}.${this.collectionName}`]?.properties?.embedding?.fields?.[0]?.dims;
|
|
222
|
+
const count = -1;
|
|
223
|
+
const metric = index.params.mapping?.types?.[`${this.scopeName}.${this.collectionName}`]?.properties?.embedding?.fields?.[0]?.similarity;
|
|
224
|
+
return {
|
|
225
|
+
dimension: dimensions,
|
|
226
|
+
count,
|
|
227
|
+
metric: Object.keys(DISTANCE_MAPPING).find(
|
|
228
|
+
(key) => DISTANCE_MAPPING[key] === metric
|
|
229
|
+
)
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
async deleteIndex(indexName) {
|
|
233
|
+
await this.getCollection();
|
|
234
|
+
if (!(await this.listIndexes()).includes(indexName)) {
|
|
235
|
+
throw new Error(`Index ${indexName} does not exist`);
|
|
236
|
+
}
|
|
237
|
+
await this.scope.searchIndexes().dropIndex(indexName);
|
|
238
|
+
this.vector_dimension = null;
|
|
239
|
+
}
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
exports.CouchbaseVector = CouchbaseVector;
|
|
243
|
+
exports.DISTANCE_MAPPING = DISTANCE_MAPPING;
|