@mastra/couchbase 0.0.2-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ import { MastraVector } from '@mastra/core/vector';
2
+ import type {
3
+ QueryResult,
4
+ IndexStats,
5
+ CreateIndexParams,
6
+ UpsertVectorParams,
7
+ QueryVectorParams,
8
+ } from '@mastra/core/vector';
9
+ import type { Bucket, Cluster, Collection, Scope } from 'couchbase';
10
+ import { connect, SearchRequest, VectorQuery, VectorSearch } from 'couchbase';
11
+
12
+ type MastraMetric = 'cosine' | 'euclidean' | 'dotproduct';
13
+ type CouchbaseMetric = 'cosine' | 'l2_norm' | 'dot_product';
14
+ export const DISTANCE_MAPPING: Record<MastraMetric, CouchbaseMetric> = {
15
+ cosine: 'cosine',
16
+ euclidean: 'l2_norm',
17
+ dotproduct: 'dot_product',
18
+ };
19
+
20
+ export class CouchbaseVector extends MastraVector {
21
+ private clusterPromise: Promise<Cluster>;
22
+ private cluster: Cluster;
23
+ private bucketName: string;
24
+ private collectionName: string;
25
+ private scopeName: string;
26
+ private collection: Collection;
27
+ private bucket: Bucket;
28
+ private scope: Scope;
29
+ private vector_dimension: number;
30
+
31
+ constructor(
32
+ cnn_string: string,
33
+ username: string,
34
+ password: string,
35
+ bucketName: string,
36
+ scopeName: string,
37
+ collectionName: string,
38
+ ) {
39
+ super();
40
+
41
+ const baseClusterPromise = connect(cnn_string, {
42
+ username,
43
+ password,
44
+ configProfile: 'wanDevelopment',
45
+ });
46
+
47
+ const telemetry = this.__getTelemetry();
48
+ this.clusterPromise =
49
+ telemetry?.traceClass(baseClusterPromise, {
50
+ spanNamePrefix: 'couchbase-vector',
51
+ attributes: {
52
+ 'vector.type': 'couchbase',
53
+ },
54
+ }) ?? baseClusterPromise;
55
+ this.cluster = null as unknown as Cluster;
56
+ this.bucketName = bucketName;
57
+ this.collectionName = collectionName;
58
+ this.scopeName = scopeName;
59
+ this.collection = null as unknown as Collection;
60
+ this.bucket = null as unknown as Bucket;
61
+ this.scope = null as unknown as Scope;
62
+ this.vector_dimension = null as unknown as number;
63
+ }
64
+
65
+ async getCollection() {
66
+ if (!this.cluster) {
67
+ this.cluster = await this.clusterPromise;
68
+ }
69
+
70
+ if (!this.collection) {
71
+ this.bucket = this.cluster.bucket(this.bucketName);
72
+ this.scope = this.bucket.scope(this.scopeName);
73
+ this.collection = this.scope.collection(this.collectionName);
74
+ }
75
+
76
+ return this.collection;
77
+ }
78
+
79
+ async createIndex(params: CreateIndexParams): Promise<void> {
80
+ const { indexName, dimension, metric = 'dotproduct' as MastraMetric } = params;
81
+ await this.getCollection();
82
+
83
+ if (!Number.isInteger(dimension) || dimension <= 0) {
84
+ throw new Error('Dimension must be a positive integer');
85
+ }
86
+
87
+ await this.scope.searchIndexes().upsertIndex({
88
+ name: indexName,
89
+ sourceName: this.bucketName,
90
+ type: 'fulltext-index',
91
+ params: {
92
+ doc_config: {
93
+ docid_prefix_delim: '',
94
+ docid_regexp: '',
95
+ mode: 'scope.collection.type_field',
96
+ type_field: 'type',
97
+ },
98
+ mapping: {
99
+ default_analyzer: 'standard',
100
+ default_datetime_parser: 'dateTimeOptional',
101
+ default_field: '_all',
102
+ default_mapping: {
103
+ dynamic: true,
104
+ enabled: false,
105
+ },
106
+ default_type: '_default',
107
+ docvalues_dynamic: true, // [Doc](https://docs.couchbase.com/server/current/search/search-index-params.html#params) mentions this attribute is required for vector search to return the indexed field
108
+ index_dynamic: true,
109
+ store_dynamic: true, // [Doc](https://docs.couchbase.com/server/current/search/search-index-params.html#params) mentions this attribute is required for vector search to return the indexed field
110
+ type_field: '_type',
111
+ types: {
112
+ [`${this.scopeName}.${this.collectionName}`]: {
113
+ dynamic: true,
114
+ enabled: true,
115
+ properties: {
116
+ embedding: {
117
+ enabled: true,
118
+ fields: [
119
+ {
120
+ dims: dimension,
121
+ index: true,
122
+ name: 'embedding',
123
+ similarity: DISTANCE_MAPPING[metric],
124
+ type: 'vector',
125
+ vector_index_optimized_for: 'recall',
126
+ store: true, // CHANGED due to https://docs.couchbase.com/server/current/search/search-index-params.html#fields
127
+ docvalues: true, // CHANGED due to https://docs.couchbase.com/server/current/search/search-index-params.html#fields
128
+ include_term_vectors: true, // CHANGED due to https://docs.couchbase.com/server/current/search/search-index-params.html#fields
129
+ },
130
+ ],
131
+ },
132
+ content: {
133
+ enabled: true,
134
+ fields: [
135
+ {
136
+ index: true,
137
+ name: 'content',
138
+ store: true,
139
+ type: 'text',
140
+ },
141
+ ],
142
+ },
143
+ },
144
+ },
145
+ },
146
+ },
147
+ store: {
148
+ indexType: 'scorch',
149
+ segmentVersion: 16,
150
+ },
151
+ },
152
+ sourceUuid: '',
153
+ sourceParams: {},
154
+ sourceType: 'gocbcore',
155
+ planParams: {
156
+ maxPartitionsPerPIndex: 64,
157
+ indexPartitions: 16,
158
+ numReplicas: 0,
159
+ },
160
+ });
161
+ this.vector_dimension = dimension;
162
+ }
163
+
164
+ async upsert(params: UpsertVectorParams): Promise<string[]> {
165
+ const { vectors, metadata, ids } = params;
166
+ await this.getCollection();
167
+
168
+ if (!vectors || vectors.length === 0) {
169
+ throw new Error('No vectors provided');
170
+ }
171
+ if (this.vector_dimension) {
172
+ for (const vector of vectors) {
173
+ if (!vector || this.vector_dimension !== vector.length) {
174
+ throw new Error('Vector dimension mismatch');
175
+ }
176
+ }
177
+ }
178
+
179
+ const pointIds = ids || vectors.map(() => crypto.randomUUID());
180
+ const records = vectors.map((vector, i) => {
181
+ const metadataObj = metadata?.[i] || {};
182
+ const record: Record<string, any> = {
183
+ embedding: vector,
184
+ metadata: metadataObj,
185
+ };
186
+ // If metadata has a text field, save it as content
187
+ if (metadataObj.text) {
188
+ record.content = metadataObj.text;
189
+ }
190
+ return record;
191
+ });
192
+
193
+ const allPromises = [];
194
+ for (let i = 0; i < records.length; i++) {
195
+ allPromises.push(this.collection.upsert(pointIds[i]!, records[i]));
196
+ }
197
+ await Promise.all(allPromises);
198
+
199
+ return pointIds;
200
+ }
201
+
202
+ async query(params: QueryVectorParams): Promise<QueryResult[]> {
203
+ const { indexName, queryVector, topK = 10, includeVector = false } = params;
204
+
205
+ await this.getCollection();
206
+
207
+ const index_stats = await this.describeIndex(indexName);
208
+ if (queryVector.length !== index_stats.dimension) {
209
+ throw new Error(`Query vector dimension mismatch. Expected ${index_stats.dimension}, got ${queryVector.length}`);
210
+ }
211
+
212
+ let request = SearchRequest.create(
213
+ VectorSearch.fromVectorQuery(VectorQuery.create('embedding', queryVector).numCandidates(topK)),
214
+ );
215
+ const results = await this.scope.search(indexName, request, {
216
+ fields: ['*'],
217
+ });
218
+
219
+ if (includeVector) {
220
+ throw new Error('Including vectors in search results is not yet supported by the Couchbase vector store');
221
+ }
222
+ const output = [];
223
+ for (const match of results.rows) {
224
+ const cleanedMetadata: Record<string, any> = {};
225
+ const fields = (match.fields as Record<string, any>) || {}; // Ensure fields is an object
226
+ for (const key in fields) {
227
+ if (Object.prototype.hasOwnProperty.call(fields, key)) {
228
+ const newKey = key.startsWith('metadata.') ? key.substring('metadata.'.length) : key;
229
+ cleanedMetadata[newKey] = fields[key];
230
+ }
231
+ }
232
+ output.push({
233
+ id: match.id as string,
234
+ score: (match.score as number) || 0,
235
+ metadata: cleanedMetadata, // Use the cleaned metadata object
236
+ });
237
+ }
238
+ return output;
239
+ }
240
+
241
+ async listIndexes(): Promise<string[]> {
242
+ await this.getCollection();
243
+ const indexes = await this.scope.searchIndexes().getAllIndexes();
244
+ return indexes?.map(index => index.name) || [];
245
+ }
246
+
247
+ async describeIndex(indexName: string): Promise<IndexStats> {
248
+ await this.getCollection();
249
+ if (!(await this.listIndexes()).includes(indexName)) {
250
+ throw new Error(`Index ${indexName} does not exist`);
251
+ }
252
+ const index = await this.scope.searchIndexes().getIndex(indexName);
253
+ const dimensions =
254
+ index.params.mapping?.types?.[`${this.scopeName}.${this.collectionName}`]?.properties?.embedding?.fields?.[0]
255
+ ?.dims;
256
+ const count = -1; // Not added support yet for adding a count of documents covered by an index
257
+ const metric = index.params.mapping?.types?.[`${this.scopeName}.${this.collectionName}`]?.properties?.embedding
258
+ ?.fields?.[0]?.similarity as CouchbaseMetric;
259
+ return {
260
+ dimension: dimensions,
261
+ count: count,
262
+ metric: Object.keys(DISTANCE_MAPPING).find(
263
+ key => DISTANCE_MAPPING[key as MastraMetric] === metric,
264
+ ) as MastraMetric,
265
+ };
266
+ }
267
+
268
+ async deleteIndex(indexName: string): Promise<void> {
269
+ await this.getCollection();
270
+ if (!(await this.listIndexes()).includes(indexName)) {
271
+ throw new Error(`Index ${indexName} does not exist`);
272
+ }
273
+ await this.scope.searchIndexes().dropIndex(indexName);
274
+ this.vector_dimension = null as unknown as number;
275
+ }
276
+ }