@mastra/chroma 0.1.5 → 0.1.6-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +20 -0
- package/README.md +19 -8
- package/dist/_tsup-dts-rollup.d.ts +15 -7
- package/dist/index.d.ts +1 -0
- package/dist/index.js +22 -7
- package/docker-compose.yaml +7 -0
- package/eslint.config.js +6 -0
- package/package.json +7 -4
- package/src/vector/filter.ts +2 -7
- package/src/vector/index.test.ts +161 -5
- package/src/vector/index.ts +31 -7
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/chroma@0.1.
|
|
2
|
+
> @mastra/chroma@0.1.6-alpha.1 build /home/runner/work/mastra/mastra/stores/chroma
|
|
3
3
|
> tsup src/index.ts --format esm --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.3.6
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 6980ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/stores/chroma/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
|
-
[32mDTS[39m ⚡️ Build success in
|
|
14
|
+
[32mDTS[39m ⚡️ Build success in 6006ms
|
|
15
15
|
[34mCLI[39m Cleaning output folder
|
|
16
16
|
[34mESM[39m Build start
|
|
17
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
18
|
-
[32mESM[39m ⚡️ Build success in
|
|
17
|
+
[32mESM[39m [1mdist/index.js [22m[32m7.03 KB[39m
|
|
18
|
+
[32mESM[39m ⚡️ Build success in 410ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @mastra/chroma
|
|
2
2
|
|
|
3
|
+
## 0.1.6-alpha.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 5f28f44: Updated Chroma Vector to allow for document storage
|
|
8
|
+
- Updated dependencies [0d185b1]
|
|
9
|
+
- Updated dependencies [ed55f1d]
|
|
10
|
+
- Updated dependencies [8d13b14]
|
|
11
|
+
- Updated dependencies [3ee4831]
|
|
12
|
+
- Updated dependencies [108793c]
|
|
13
|
+
- Updated dependencies [5f28f44]
|
|
14
|
+
- @mastra/core@0.4.3-alpha.1
|
|
15
|
+
|
|
16
|
+
## 0.1.6-alpha.0
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- Updated dependencies [06aa827]
|
|
21
|
+
- @mastra/core@0.4.3-alpha.0
|
|
22
|
+
|
|
3
23
|
## 0.1.5
|
|
4
24
|
|
|
5
25
|
### Patch Changes
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @mastra/chroma
|
|
2
2
|
|
|
3
|
-
Vector store implementation for ChromaDB using the official chromadb client with added dimension validation and
|
|
3
|
+
Vector store implementation for ChromaDB using the official chromadb client with added dimension validation, collection management, and document storage capabilities.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -24,18 +24,26 @@ const vectorStore = new ChromaVector({
|
|
|
24
24
|
// Create a new collection
|
|
25
25
|
await vectorStore.createIndex('my-collection', 1536, 'cosine');
|
|
26
26
|
|
|
27
|
-
// Add vectors
|
|
27
|
+
// Add vectors with documents
|
|
28
28
|
const vectors = [[0.1, 0.2, ...], [0.3, 0.4, ...]];
|
|
29
29
|
const metadata = [{ text: 'doc1' }, { text: 'doc2' }];
|
|
30
|
-
const
|
|
30
|
+
const documents = ['full text 1', 'full text 2'];
|
|
31
|
+
const ids = await vectorStore.upsert(
|
|
32
|
+
'my-collection',
|
|
33
|
+
vectors,
|
|
34
|
+
metadata,
|
|
35
|
+
undefined, // auto-generate IDs
|
|
36
|
+
documents // store original text
|
|
37
|
+
);
|
|
31
38
|
|
|
32
|
-
// Query vectors
|
|
39
|
+
// Query vectors with document filtering
|
|
33
40
|
const results = await vectorStore.query(
|
|
34
41
|
'my-collection',
|
|
35
42
|
[0.1, 0.2, ...],
|
|
36
43
|
10, // topK
|
|
37
|
-
{ text: { $eq: 'doc1' } }, //
|
|
38
|
-
false // includeVector
|
|
44
|
+
{ text: { $eq: 'doc1' } }, // metadata filter
|
|
45
|
+
false, // includeVector
|
|
46
|
+
{ $contains: 'specific text' } // document content filter
|
|
39
47
|
);
|
|
40
48
|
```
|
|
41
49
|
|
|
@@ -54,6 +62,8 @@ Optional:
|
|
|
54
62
|
## Features
|
|
55
63
|
|
|
56
64
|
- Vector similarity search with cosine, euclidean, and dot product metrics
|
|
65
|
+
- Document storage and retrieval
|
|
66
|
+
- Document content filtering
|
|
57
67
|
- Strict vector dimension validation
|
|
58
68
|
- Collection-based organization
|
|
59
69
|
- Metadata filtering support
|
|
@@ -65,8 +75,8 @@ Optional:
|
|
|
65
75
|
## Methods
|
|
66
76
|
|
|
67
77
|
- `createIndex(indexName, dimension, metric?)`: Create a new collection
|
|
68
|
-
- `upsert(indexName, vectors, metadata?, ids?)`: Add or update vectors
|
|
69
|
-
- `query(indexName, queryVector, topK?, filter?, includeVector?)`: Search for similar vectors
|
|
78
|
+
- `upsert(indexName, vectors, metadata?, ids?, documents?)`: Add or update vectors with optional document storage
|
|
79
|
+
- `query(indexName, queryVector, topK?, filter?, includeVector?, documentFilter?)`: Search for similar vectors with optional document filtering
|
|
70
80
|
- `listIndexes()`: List all collections
|
|
71
81
|
- `describeIndex(indexName)`: Get collection statistics
|
|
72
82
|
- `deleteIndex(indexName)`: Delete a collection
|
|
@@ -78,6 +88,7 @@ Query results include:
|
|
|
78
88
|
- `id`: Vector ID
|
|
79
89
|
- `score`: Distance/similarity score
|
|
80
90
|
- `metadata`: Associated metadata
|
|
91
|
+
- `document`: Original document text (if stored)
|
|
81
92
|
- `vector`: Original vector (if includeVector is true)
|
|
82
93
|
|
|
83
94
|
## Related Links
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { BaseFilterTranslator } from '@mastra/core/filter';
|
|
2
|
-
import { Filter } from '@mastra/core/filter';
|
|
3
|
-
import { IndexStats } from '@mastra/core/vector';
|
|
2
|
+
import type { Filter } from '@mastra/core/filter';
|
|
3
|
+
import type { IndexStats } from '@mastra/core/vector';
|
|
4
4
|
import { MastraVector } from '@mastra/core/vector';
|
|
5
|
-
import { OperatorSupport } from '@mastra/core/filter';
|
|
6
|
-
import { QueryResult } from '@mastra/core/vector';
|
|
5
|
+
import type { OperatorSupport } from '@mastra/core/filter';
|
|
6
|
+
import type { QueryResult } from '@mastra/core/vector';
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
9
|
* Translator for Chroma filter queries.
|
|
@@ -27,12 +27,13 @@ declare class ChromaVector extends MastraVector {
|
|
|
27
27
|
credentials: string;
|
|
28
28
|
};
|
|
29
29
|
});
|
|
30
|
-
|
|
30
|
+
getCollection(indexName: string, throwIfNotExists?: boolean): Promise<any>;
|
|
31
31
|
private validateVectorDimensions;
|
|
32
|
-
upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
|
|
32
|
+
upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[], documents?: string[]): Promise<string[]>;
|
|
33
|
+
private HnswSpaceMap;
|
|
33
34
|
createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
|
|
34
35
|
transformFilter(filter?: Filter): Filter | undefined;
|
|
35
|
-
query(indexName: string, queryVector: number[], topK?: number, filter?: Filter, includeVector?: boolean): Promise<QueryResult[]>;
|
|
36
|
+
query(indexName: string, queryVector: number[], topK?: number, filter?: Filter, includeVector?: boolean, documentFilter?: Filter): Promise<QueryResult[]>;
|
|
36
37
|
listIndexes(): Promise<string[]>;
|
|
37
38
|
describeIndex(indexName: string): Promise<IndexStats>;
|
|
38
39
|
deleteIndex(indexName: string): Promise<void>;
|
|
@@ -40,4 +41,11 @@ declare class ChromaVector extends MastraVector {
|
|
|
40
41
|
export { ChromaVector }
|
|
41
42
|
export { ChromaVector as ChromaVector_alias_1 }
|
|
42
43
|
|
|
44
|
+
declare interface DocumentMetadata {
|
|
45
|
+
content?: string;
|
|
46
|
+
metadata?: Record<string, any>;
|
|
47
|
+
}
|
|
48
|
+
export { DocumentMetadata }
|
|
49
|
+
export { DocumentMetadata as DocumentMetadata_alias_1 }
|
|
50
|
+
|
|
43
51
|
export { }
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { BaseFilterTranslator } from '@mastra/core/filter';
|
|
2
1
|
import { MastraVector } from '@mastra/core/vector';
|
|
3
2
|
import { ChromaClient } from 'chromadb';
|
|
3
|
+
import { BaseFilterTranslator } from '@mastra/core/filter';
|
|
4
4
|
|
|
5
5
|
// src/vector/index.ts
|
|
6
6
|
var ChromaFilterTranslator = class extends BaseFilterTranslator {
|
|
@@ -105,7 +105,7 @@ var ChromaVector = class extends MastraVector {
|
|
|
105
105
|
try {
|
|
106
106
|
const collection = await this.client.getCollection({ name: indexName, embeddingFunction: void 0 });
|
|
107
107
|
this.collections.set(indexName, collection);
|
|
108
|
-
} catch
|
|
108
|
+
} catch {
|
|
109
109
|
if (throwIfNotExists) {
|
|
110
110
|
throw new Error(`Index ${indexName} does not exist`);
|
|
111
111
|
}
|
|
@@ -122,7 +122,7 @@ var ChromaVector = class extends MastraVector {
|
|
|
122
122
|
}
|
|
123
123
|
}
|
|
124
124
|
}
|
|
125
|
-
async upsert(indexName, vectors, metadata, ids) {
|
|
125
|
+
async upsert(indexName, vectors, metadata, ids, documents) {
|
|
126
126
|
const collection = await this.getCollection(indexName);
|
|
127
127
|
const stats = await this.describeIndex(indexName);
|
|
128
128
|
this.validateVectorDimensions(vectors, stats.dimension);
|
|
@@ -131,19 +131,31 @@ var ChromaVector = class extends MastraVector {
|
|
|
131
131
|
await collection.upsert({
|
|
132
132
|
ids: generatedIds,
|
|
133
133
|
embeddings: vectors,
|
|
134
|
-
metadatas: normalizedMetadata
|
|
134
|
+
metadatas: normalizedMetadata,
|
|
135
|
+
documents
|
|
135
136
|
});
|
|
136
137
|
return generatedIds;
|
|
137
138
|
}
|
|
139
|
+
HnswSpaceMap = {
|
|
140
|
+
cosine: "cosine",
|
|
141
|
+
euclidean: "l2",
|
|
142
|
+
dotproduct: "ip",
|
|
143
|
+
l2: "euclidean",
|
|
144
|
+
ip: "dotproduct"
|
|
145
|
+
};
|
|
138
146
|
async createIndex(indexName, dimension, metric = "cosine") {
|
|
139
147
|
if (!Number.isInteger(dimension) || dimension <= 0) {
|
|
140
148
|
throw new Error("Dimension must be a positive integer");
|
|
141
149
|
}
|
|
150
|
+
const hnswSpace = this.HnswSpaceMap[metric];
|
|
151
|
+
if (!["cosine", "l2", "ip"].includes(hnswSpace)) {
|
|
152
|
+
throw new Error(`Invalid metric: "${metric}". Must be one of: cosine, euclidean, dotproduct`);
|
|
153
|
+
}
|
|
142
154
|
await this.client.createCollection({
|
|
143
155
|
name: indexName,
|
|
144
156
|
metadata: {
|
|
145
157
|
dimension,
|
|
146
|
-
metric
|
|
158
|
+
"hnsw:space": this.HnswSpaceMap[metric]
|
|
147
159
|
}
|
|
148
160
|
});
|
|
149
161
|
}
|
|
@@ -152,7 +164,7 @@ var ChromaVector = class extends MastraVector {
|
|
|
152
164
|
const translatedFilter = chromaFilter.translate(filter);
|
|
153
165
|
return translatedFilter;
|
|
154
166
|
}
|
|
155
|
-
async query(indexName, queryVector, topK = 10, filter, includeVector = false) {
|
|
167
|
+
async query(indexName, queryVector, topK = 10, filter, includeVector = false, documentFilter) {
|
|
156
168
|
const collection = await this.getCollection(indexName, true);
|
|
157
169
|
const defaultInclude = ["documents", "metadatas", "distances"];
|
|
158
170
|
const translatedFilter = this.transformFilter(filter);
|
|
@@ -160,12 +172,14 @@ var ChromaVector = class extends MastraVector {
|
|
|
160
172
|
queryEmbeddings: [queryVector],
|
|
161
173
|
nResults: topK,
|
|
162
174
|
where: translatedFilter,
|
|
175
|
+
whereDocument: documentFilter,
|
|
163
176
|
include: includeVector ? [...defaultInclude, "embeddings"] : defaultInclude
|
|
164
177
|
});
|
|
165
178
|
return (results.ids[0] || []).map((id, index) => ({
|
|
166
179
|
id,
|
|
167
180
|
score: results.distances?.[0]?.[index] || 0,
|
|
168
181
|
metadata: results.metadatas?.[0]?.[index] || {},
|
|
182
|
+
document: results.documents?.[0]?.[index],
|
|
169
183
|
...includeVector && { vector: results.embeddings?.[0]?.[index] || [] }
|
|
170
184
|
}));
|
|
171
185
|
}
|
|
@@ -177,10 +191,11 @@ var ChromaVector = class extends MastraVector {
|
|
|
177
191
|
const collection = await this.getCollection(indexName);
|
|
178
192
|
const count = await collection.count();
|
|
179
193
|
const metadata = collection.metadata;
|
|
194
|
+
const hnswSpace = metadata?.["hnsw:space"];
|
|
180
195
|
return {
|
|
181
196
|
dimension: metadata?.dimension || 0,
|
|
182
197
|
count,
|
|
183
|
-
metric:
|
|
198
|
+
metric: this.HnswSpaceMap[hnswSpace]
|
|
184
199
|
};
|
|
185
200
|
}
|
|
186
201
|
async deleteIndex(indexName) {
|
package/eslint.config.js
ADDED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/chroma",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6-alpha.1",
|
|
4
4
|
"description": "Chroma vector store provider for Mastra",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -16,18 +16,21 @@
|
|
|
16
16
|
},
|
|
17
17
|
"dependencies": {
|
|
18
18
|
"chromadb": "^1.9.4",
|
|
19
|
-
"@mastra/core": "^0.4.
|
|
19
|
+
"@mastra/core": "^0.4.3-alpha.1"
|
|
20
20
|
},
|
|
21
21
|
"devDependencies": {
|
|
22
22
|
"@microsoft/api-extractor": "^7.49.2",
|
|
23
23
|
"@types/node": "^22.13.1",
|
|
24
24
|
"tsup": "^8.0.1",
|
|
25
25
|
"typescript": "^5.7.3",
|
|
26
|
-
"vitest": "^3.0.4"
|
|
26
|
+
"vitest": "^3.0.4",
|
|
27
|
+
"eslint": "^9.20.1",
|
|
28
|
+
"@internal/lint": "0.0.0"
|
|
27
29
|
},
|
|
28
30
|
"scripts": {
|
|
29
31
|
"build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
|
|
30
32
|
"build:watch": "pnpm build --watch",
|
|
31
|
-
"test": "vitest run"
|
|
33
|
+
"test": "vitest run",
|
|
34
|
+
"lint": "eslint ."
|
|
32
35
|
}
|
|
33
36
|
}
|
package/src/vector/filter.ts
CHANGED
|
@@ -1,10 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
type FieldCondition,
|
|
4
|
-
type Filter,
|
|
5
|
-
type OperatorSupport,
|
|
6
|
-
type QueryOperator,
|
|
7
|
-
} from '@mastra/core/filter';
|
|
1
|
+
import { BaseFilterTranslator } from '@mastra/core/filter';
|
|
2
|
+
import type { FieldCondition, Filter, OperatorSupport, QueryOperator } from '@mastra/core/filter';
|
|
8
3
|
|
|
9
4
|
/**
|
|
10
5
|
* Translator for Chroma filter queries.
|
package/src/vector/index.test.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { QueryResult, IndexStats } from '@mastra/core/vector';
|
|
1
|
+
import type { QueryResult, IndexStats } from '@mastra/core/vector';
|
|
2
2
|
import { describe, expect, beforeEach, afterEach, it, beforeAll, afterAll } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import { ChromaVector } from './';
|
|
@@ -10,13 +10,14 @@ describe('ChromaVector Integration Tests', () => {
|
|
|
10
10
|
|
|
11
11
|
const testIndexName = 'test-index';
|
|
12
12
|
const testIndexName2 = 'test-index-2';
|
|
13
|
+
const testIndexName3 = 'test-index-3';
|
|
13
14
|
const dimension = 3;
|
|
14
15
|
|
|
15
16
|
beforeEach(async () => {
|
|
16
17
|
// Clean up any existing test index
|
|
17
18
|
try {
|
|
18
19
|
await vectorDB.deleteIndex(testIndexName);
|
|
19
|
-
} catch
|
|
20
|
+
} catch {
|
|
20
21
|
// Ignore errors if index doesn't exist
|
|
21
22
|
}
|
|
22
23
|
await vectorDB.createIndex(testIndexName, dimension);
|
|
@@ -26,7 +27,7 @@ describe('ChromaVector Integration Tests', () => {
|
|
|
26
27
|
// Cleanup after tests
|
|
27
28
|
try {
|
|
28
29
|
await vectorDB.deleteIndex(testIndexName);
|
|
29
|
-
} catch
|
|
30
|
+
} catch {
|
|
30
31
|
// Ignore cleanup errors
|
|
31
32
|
}
|
|
32
33
|
}, 5000);
|
|
@@ -309,7 +310,7 @@ describe('ChromaVector Integration Tests', () => {
|
|
|
309
310
|
beforeAll(async () => {
|
|
310
311
|
try {
|
|
311
312
|
await vectorDB.deleteIndex(testIndexName2);
|
|
312
|
-
} catch
|
|
313
|
+
} catch {
|
|
313
314
|
// Ignore errors if index doesn't exist
|
|
314
315
|
}
|
|
315
316
|
await vectorDB.createIndex(testIndexName2, dimension);
|
|
@@ -357,7 +358,7 @@ describe('ChromaVector Integration Tests', () => {
|
|
|
357
358
|
// Cleanup after tests
|
|
358
359
|
try {
|
|
359
360
|
await vectorDB.deleteIndex(testIndexName2);
|
|
360
|
-
} catch
|
|
361
|
+
} catch {
|
|
361
362
|
// Ignore cleanup errors
|
|
362
363
|
}
|
|
363
364
|
});
|
|
@@ -886,4 +887,159 @@ describe('ChromaVector Integration Tests', () => {
|
|
|
886
887
|
});
|
|
887
888
|
});
|
|
888
889
|
});
|
|
890
|
+
|
|
891
|
+
describe('Document Operations and Filtering', () => {
|
|
892
|
+
const testDocuments = [
|
|
893
|
+
'The quick brown fox jumps over the lazy dog',
|
|
894
|
+
'Pack my box with five dozen liquor jugs',
|
|
895
|
+
'How vexingly quick daft zebras JUMP',
|
|
896
|
+
];
|
|
897
|
+
|
|
898
|
+
beforeAll(async () => {
|
|
899
|
+
try {
|
|
900
|
+
await vectorDB.deleteIndex(testIndexName3);
|
|
901
|
+
} catch {
|
|
902
|
+
// Ignore errors if index doesn't exist
|
|
903
|
+
}
|
|
904
|
+
await vectorDB.createIndex(testIndexName3, dimension);
|
|
905
|
+
|
|
906
|
+
const testVectors = [
|
|
907
|
+
[1.0, 0.0, 0.0],
|
|
908
|
+
[0.0, 1.0, 0.0],
|
|
909
|
+
[0.0, 0.0, 1.0],
|
|
910
|
+
];
|
|
911
|
+
|
|
912
|
+
const testMetadata = [
|
|
913
|
+
{ source: 'pangram1', length: 43 },
|
|
914
|
+
{ source: 'pangram2', length: 32 },
|
|
915
|
+
{ source: 'pangram3', length: 30 },
|
|
916
|
+
];
|
|
917
|
+
const testIds = ['doc1', 'doc2', 'doc3'];
|
|
918
|
+
|
|
919
|
+
await vectorDB.upsert(testIndexName3, testVectors, testMetadata, testIds, testDocuments);
|
|
920
|
+
|
|
921
|
+
// Wait for indexing
|
|
922
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
923
|
+
});
|
|
924
|
+
|
|
925
|
+
afterAll(async () => {
|
|
926
|
+
// Cleanup after tests
|
|
927
|
+
try {
|
|
928
|
+
await vectorDB.deleteIndex(testIndexName3);
|
|
929
|
+
} catch {
|
|
930
|
+
// Ignore cleanup errors
|
|
931
|
+
}
|
|
932
|
+
});
|
|
933
|
+
|
|
934
|
+
describe('Basic Document Operations', () => {
|
|
935
|
+
it('should store and retrieve documents', async () => {
|
|
936
|
+
const results = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3);
|
|
937
|
+
expect(results).toHaveLength(3);
|
|
938
|
+
// Verify documents are returned
|
|
939
|
+
expect(results[0].document).toBe(testDocuments[0]);
|
|
940
|
+
});
|
|
941
|
+
|
|
942
|
+
it('should filter documents using $contains', async () => {
|
|
943
|
+
const results = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
944
|
+
$contains: 'quick',
|
|
945
|
+
});
|
|
946
|
+
expect(results).toHaveLength(2);
|
|
947
|
+
});
|
|
948
|
+
|
|
949
|
+
it('should filter with $not_contains', async () => {
|
|
950
|
+
const results = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
951
|
+
$not_contains: 'fox',
|
|
952
|
+
});
|
|
953
|
+
expect(results.every(r => !r.document?.includes('fox'))).toBe(true);
|
|
954
|
+
});
|
|
955
|
+
|
|
956
|
+
it('should combine metadata and document filters', async () => {
|
|
957
|
+
const results = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, { source: 'pangram1' }, false, {
|
|
958
|
+
$contains: 'fox',
|
|
959
|
+
});
|
|
960
|
+
expect(results).toHaveLength(1);
|
|
961
|
+
expect(results[0].metadata?.source).toBe('pangram1');
|
|
962
|
+
expect(results[0].document).toContain('fox');
|
|
963
|
+
});
|
|
964
|
+
});
|
|
965
|
+
|
|
966
|
+
describe('Complex Document Filtering', () => {
|
|
967
|
+
it('should handle $and conditions', async () => {
|
|
968
|
+
const results = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
969
|
+
$and: [{ $contains: 'quick' }, { $not_contains: 'fox' }],
|
|
970
|
+
});
|
|
971
|
+
expect(results).toHaveLength(1);
|
|
972
|
+
expect(results[0].document).toContain('quick');
|
|
973
|
+
expect(results[0].document).not.toContain('fox');
|
|
974
|
+
});
|
|
975
|
+
|
|
976
|
+
it('should handle $or conditions', async () => {
|
|
977
|
+
const results = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
978
|
+
$or: [{ $contains: 'fox' }, { $contains: 'zebras' }],
|
|
979
|
+
});
|
|
980
|
+
expect(results).toHaveLength(2);
|
|
981
|
+
});
|
|
982
|
+
});
|
|
983
|
+
|
|
984
|
+
describe('Edge Cases and Validation', () => {
|
|
985
|
+
it('should reject empty string in $contains', async () => {
|
|
986
|
+
await expect(
|
|
987
|
+
vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, { $contains: '' }),
|
|
988
|
+
).rejects.toThrow('Expected where document operand value for operator $contains to be a non-empty str');
|
|
989
|
+
});
|
|
990
|
+
|
|
991
|
+
it('should be case sensitive', async () => {
|
|
992
|
+
// First verify lowercase works
|
|
993
|
+
const lowerResults = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
994
|
+
$contains: 'quick',
|
|
995
|
+
});
|
|
996
|
+
expect(lowerResults.length).toBe(2);
|
|
997
|
+
|
|
998
|
+
// Then verify uppercase doesn't match
|
|
999
|
+
const upperResults = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
1000
|
+
$contains: 'QUICK',
|
|
1001
|
+
});
|
|
1002
|
+
expect(upperResults.length).toBe(0);
|
|
1003
|
+
|
|
1004
|
+
const upperResults2 = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
1005
|
+
$contains: 'JUMP',
|
|
1006
|
+
});
|
|
1007
|
+
expect(upperResults2.length).toBe(1);
|
|
1008
|
+
});
|
|
1009
|
+
|
|
1010
|
+
it('should handle exact string matches', async () => {
|
|
1011
|
+
const results = await vectorDB.query(
|
|
1012
|
+
testIndexName3,
|
|
1013
|
+
[1.0, 0.0, 0.0],
|
|
1014
|
+
3,
|
|
1015
|
+
undefined,
|
|
1016
|
+
false,
|
|
1017
|
+
{ $contains: 'quick brown' }, // Test multi-word match
|
|
1018
|
+
);
|
|
1019
|
+
expect(results.length).toBe(1);
|
|
1020
|
+
expect(results[0].document).toContain('quick brown');
|
|
1021
|
+
});
|
|
1022
|
+
|
|
1023
|
+
it('should handle deeply nested logical operators', async () => {
|
|
1024
|
+
const results = await vectorDB.query(testIndexName3, [1.0, 0.0, 0.0], 3, undefined, false, {
|
|
1025
|
+
$or: [
|
|
1026
|
+
{
|
|
1027
|
+
$and: [{ $contains: 'quick' }, { $not_contains: 'fox' }],
|
|
1028
|
+
},
|
|
1029
|
+
{
|
|
1030
|
+
$and: [{ $contains: 'box' }, { $not_contains: 'quick' }],
|
|
1031
|
+
},
|
|
1032
|
+
],
|
|
1033
|
+
});
|
|
1034
|
+
expect(results.length).toBeGreaterThan(0);
|
|
1035
|
+
results.forEach(result => {
|
|
1036
|
+
if (result.document?.includes('quick')) {
|
|
1037
|
+
expect(result.document).not.toContain('fox');
|
|
1038
|
+
} else if (result.document?.includes('box')) {
|
|
1039
|
+
expect(result.document).not.toContain('quick');
|
|
1040
|
+
}
|
|
1041
|
+
});
|
|
1042
|
+
});
|
|
1043
|
+
});
|
|
1044
|
+
});
|
|
889
1045
|
});
|
package/src/vector/index.ts
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { MastraVector
|
|
1
|
+
import type { Filter } from '@mastra/core/filter';
|
|
2
|
+
import { MastraVector } from '@mastra/core/vector';
|
|
3
|
+
import type { QueryResult, IndexStats } from '@mastra/core/vector';
|
|
3
4
|
import { ChromaClient } from 'chromadb';
|
|
4
5
|
|
|
5
6
|
import { ChromaFilterTranslator } from './filter';
|
|
6
7
|
|
|
8
|
+
export interface DocumentMetadata {
|
|
9
|
+
content?: string;
|
|
10
|
+
metadata?: Record<string, any>;
|
|
11
|
+
}
|
|
12
|
+
|
|
7
13
|
export class ChromaVector extends MastraVector {
|
|
8
14
|
private client: ChromaClient;
|
|
9
15
|
private collections: Map<string, any>;
|
|
@@ -26,11 +32,11 @@ export class ChromaVector extends MastraVector {
|
|
|
26
32
|
this.collections = new Map();
|
|
27
33
|
}
|
|
28
34
|
|
|
29
|
-
|
|
35
|
+
async getCollection(indexName: string, throwIfNotExists: boolean = true) {
|
|
30
36
|
try {
|
|
31
37
|
const collection = await this.client.getCollection({ name: indexName, embeddingFunction: undefined as any });
|
|
32
38
|
this.collections.set(indexName, collection);
|
|
33
|
-
} catch
|
|
39
|
+
} catch {
|
|
34
40
|
if (throwIfNotExists) {
|
|
35
41
|
throw new Error(`Index ${indexName} does not exist`);
|
|
36
42
|
}
|
|
@@ -54,6 +60,7 @@ export class ChromaVector extends MastraVector {
|
|
|
54
60
|
vectors: number[][],
|
|
55
61
|
metadata?: Record<string, any>[],
|
|
56
62
|
ids?: string[],
|
|
63
|
+
documents?: string[],
|
|
57
64
|
): Promise<string[]> {
|
|
58
65
|
const collection = await this.getCollection(indexName);
|
|
59
66
|
|
|
@@ -73,11 +80,20 @@ export class ChromaVector extends MastraVector {
|
|
|
73
80
|
ids: generatedIds,
|
|
74
81
|
embeddings: vectors,
|
|
75
82
|
metadatas: normalizedMetadata,
|
|
83
|
+
documents: documents,
|
|
76
84
|
});
|
|
77
85
|
|
|
78
86
|
return generatedIds;
|
|
79
87
|
}
|
|
80
88
|
|
|
89
|
+
private HnswSpaceMap = {
|
|
90
|
+
cosine: 'cosine',
|
|
91
|
+
euclidean: 'l2',
|
|
92
|
+
dotproduct: 'ip',
|
|
93
|
+
l2: 'euclidean',
|
|
94
|
+
ip: 'dotproduct',
|
|
95
|
+
};
|
|
96
|
+
|
|
81
97
|
async createIndex(
|
|
82
98
|
indexName: string,
|
|
83
99
|
dimension: number,
|
|
@@ -86,11 +102,15 @@ export class ChromaVector extends MastraVector {
|
|
|
86
102
|
if (!Number.isInteger(dimension) || dimension <= 0) {
|
|
87
103
|
throw new Error('Dimension must be a positive integer');
|
|
88
104
|
}
|
|
105
|
+
const hnswSpace = this.HnswSpaceMap[metric];
|
|
106
|
+
if (!['cosine', 'l2', 'ip'].includes(hnswSpace)) {
|
|
107
|
+
throw new Error(`Invalid metric: "${metric}". Must be one of: cosine, euclidean, dotproduct`);
|
|
108
|
+
}
|
|
89
109
|
await this.client.createCollection({
|
|
90
110
|
name: indexName,
|
|
91
111
|
metadata: {
|
|
92
112
|
dimension,
|
|
93
|
-
metric,
|
|
113
|
+
'hnsw:space': this.HnswSpaceMap[metric],
|
|
94
114
|
},
|
|
95
115
|
});
|
|
96
116
|
}
|
|
@@ -106,17 +126,18 @@ export class ChromaVector extends MastraVector {
|
|
|
106
126
|
topK: number = 10,
|
|
107
127
|
filter?: Filter,
|
|
108
128
|
includeVector: boolean = false,
|
|
129
|
+
documentFilter?: Filter,
|
|
109
130
|
): Promise<QueryResult[]> {
|
|
110
131
|
const collection = await this.getCollection(indexName, true);
|
|
111
132
|
|
|
112
133
|
const defaultInclude = ['documents', 'metadatas', 'distances'];
|
|
113
134
|
|
|
114
135
|
const translatedFilter = this.transformFilter(filter);
|
|
115
|
-
|
|
116
136
|
const results = await collection.query({
|
|
117
137
|
queryEmbeddings: [queryVector],
|
|
118
138
|
nResults: topK,
|
|
119
139
|
where: translatedFilter,
|
|
140
|
+
whereDocument: documentFilter,
|
|
120
141
|
include: includeVector ? [...defaultInclude, 'embeddings'] : defaultInclude,
|
|
121
142
|
});
|
|
122
143
|
|
|
@@ -125,6 +146,7 @@ export class ChromaVector extends MastraVector {
|
|
|
125
146
|
id,
|
|
126
147
|
score: results.distances?.[0]?.[index] || 0,
|
|
127
148
|
metadata: results.metadatas?.[0]?.[index] || {},
|
|
149
|
+
document: results.documents?.[0]?.[index],
|
|
128
150
|
...(includeVector && { vector: results.embeddings?.[0]?.[index] || [] }),
|
|
129
151
|
}));
|
|
130
152
|
}
|
|
@@ -139,10 +161,12 @@ export class ChromaVector extends MastraVector {
|
|
|
139
161
|
const count = await collection.count();
|
|
140
162
|
const metadata = collection.metadata;
|
|
141
163
|
|
|
164
|
+
const hnswSpace = metadata?.['hnsw:space'] as 'cosine' | 'l2' | 'ip';
|
|
165
|
+
|
|
142
166
|
return {
|
|
143
167
|
dimension: metadata?.dimension || 0,
|
|
144
168
|
count,
|
|
145
|
-
metric:
|
|
169
|
+
metric: this.HnswSpaceMap[hnswSpace] as 'cosine' | 'euclidean' | 'dotproduct',
|
|
146
170
|
};
|
|
147
171
|
}
|
|
148
172
|
|