langchain 0.0.173 → 0.0.175
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/callbacks/manager.cjs +2 -1
- package/dist/callbacks/manager.js +2 -1
- package/dist/load/import_constants.cjs +4 -0
- package/dist/load/import_constants.js +4 -0
- package/dist/memory/index.cjs +2 -1
- package/dist/memory/index.d.ts +1 -1
- package/dist/memory/index.js +1 -1
- package/dist/prompts/selectors/SemanticSimilarityExampleSelector.cjs +15 -18
- package/dist/prompts/selectors/SemanticSimilarityExampleSelector.d.ts +12 -6
- package/dist/prompts/selectors/SemanticSimilarityExampleSelector.js +15 -18
- package/dist/storage/convex.cjs +145 -0
- package/dist/storage/convex.d.ts +85 -0
- package/dist/storage/convex.js +141 -0
- package/dist/stores/message/convex.cjs +120 -0
- package/dist/stores/message/convex.d.ts +60 -0
- package/dist/stores/message/convex.js +116 -0
- package/dist/util/convex.cjs +77 -0
- package/dist/util/convex.d.ts +26 -0
- package/dist/util/convex.js +74 -0
- package/dist/vectorstores/convex.cjs +177 -0
- package/dist/vectorstores/convex.d.ts +113 -0
- package/dist/vectorstores/convex.js +173 -0
- package/dist/vectorstores/milvus.cjs +4 -2
- package/dist/vectorstores/milvus.js +4 -2
- package/dist/vectorstores/pinecone.cjs +46 -9
- package/dist/vectorstores/pinecone.d.ts +20 -2
- package/dist/vectorstores/pinecone.js +46 -9
- package/dist/vectorstores/vercel_postgres.cjs +29 -7
- package/dist/vectorstores/vercel_postgres.d.ts +1 -1
- package/dist/vectorstores/vercel_postgres.js +29 -7
- package/package.json +38 -1
- package/storage/convex.cjs +1 -0
- package/storage/convex.d.ts +1 -0
- package/storage/convex.js +1 -0
- package/stores/message/convex.cjs +1 -0
- package/stores/message/convex.d.ts +1 -0
- package/stores/message/convex.js +1 -0
- package/util/convex.cjs +1 -0
- package/util/convex.d.ts +1 -0
- package/util/convex.js +1 -0
- package/vectorstores/convex.cjs +1 -0
- package/vectorstores/convex.d.ts +1 -0
- package/vectorstores/convex.js +1 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { DocumentByInfo, FieldPaths, FilterExpression, FunctionReference, GenericActionCtx, GenericDataModel, GenericTableInfo, NamedTableInfo, NamedVectorIndex, TableNamesInDataModel, VectorFilterBuilder, VectorIndexNames } from "convex/server";
|
|
2
|
+
import { Document } from "../document.js";
|
|
3
|
+
import { Embeddings } from "../embeddings/base.js";
|
|
4
|
+
import { VectorStore } from "./base.js";
|
|
5
|
+
/**
|
|
6
|
+
* Type that defines the config required to initialize the
|
|
7
|
+
* ConvexVectorStore class. It includes the table name,
|
|
8
|
+
* index name, text field name, and embedding field name.
|
|
9
|
+
*/
|
|
10
|
+
export type ConvexVectorStoreConfig<DataModel extends GenericDataModel, TableName extends TableNamesInDataModel<DataModel>, IndexName extends VectorIndexNames<NamedTableInfo<DataModel, TableName>>, TextFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, EmbeddingFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, MetadataFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, InsertMutation extends FunctionReference<"mutation", "internal", {
|
|
11
|
+
table: string;
|
|
12
|
+
document: object;
|
|
13
|
+
}>, GetQuery extends FunctionReference<"query", "internal", {
|
|
14
|
+
id: string;
|
|
15
|
+
}, object | null>> = {
|
|
16
|
+
readonly ctx: GenericActionCtx<DataModel>;
|
|
17
|
+
readonly table?: TableName;
|
|
18
|
+
readonly index?: IndexName;
|
|
19
|
+
readonly textField?: TextFieldName;
|
|
20
|
+
readonly embeddingField?: EmbeddingFieldName;
|
|
21
|
+
readonly metadataField?: MetadataFieldName;
|
|
22
|
+
readonly insert?: InsertMutation;
|
|
23
|
+
readonly get?: GetQuery;
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* Class that is a wrapper around Convex storage and vector search. It is used
|
|
27
|
+
* to insert embeddings in Convex documents with a vector search index,
|
|
28
|
+
* and perform a vector search on them.
|
|
29
|
+
*
|
|
30
|
+
* ConvexVectorStore does NOT implement maxMarginalRelevanceSearch.
|
|
31
|
+
*/
|
|
32
|
+
export declare class ConvexVectorStore<DataModel extends GenericDataModel, TableName extends TableNamesInDataModel<DataModel>, IndexName extends VectorIndexNames<NamedTableInfo<DataModel, TableName>>, TextFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, EmbeddingFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, MetadataFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, InsertMutation extends FunctionReference<"mutation", "internal", {
|
|
33
|
+
table: string;
|
|
34
|
+
document: object;
|
|
35
|
+
}>, GetQuery extends FunctionReference<"query", "internal", {
|
|
36
|
+
id: string;
|
|
37
|
+
}, object | null>> extends VectorStore {
|
|
38
|
+
/**
|
|
39
|
+
* Type that defines the filter used in the
|
|
40
|
+
* similaritySearchVectorWithScore and maxMarginalRelevanceSearch methods.
|
|
41
|
+
* It includes limit, filter and a flag to include embeddings.
|
|
42
|
+
*/
|
|
43
|
+
FilterType: {
|
|
44
|
+
filter?: (q: VectorFilterBuilder<DocumentByInfo<GenericTableInfo>, NamedVectorIndex<NamedTableInfo<DataModel, TableName>, IndexName>>) => FilterExpression<boolean>;
|
|
45
|
+
includeEmbeddings?: boolean;
|
|
46
|
+
};
|
|
47
|
+
private readonly ctx;
|
|
48
|
+
private readonly table;
|
|
49
|
+
private readonly index;
|
|
50
|
+
private readonly textField;
|
|
51
|
+
private readonly embeddingField;
|
|
52
|
+
private readonly metadataField;
|
|
53
|
+
private readonly insert;
|
|
54
|
+
private readonly get;
|
|
55
|
+
_vectorstoreType(): string;
|
|
56
|
+
constructor(embeddings: Embeddings, config: ConvexVectorStoreConfig<DataModel, TableName, IndexName, TextFieldName, EmbeddingFieldName, MetadataFieldName, InsertMutation, GetQuery>);
|
|
57
|
+
/**
|
|
58
|
+
* Add vectors and their corresponding documents to the Convex table.
|
|
59
|
+
* @param vectors Vectors to be added.
|
|
60
|
+
* @param documents Corresponding documents to be added.
|
|
61
|
+
* @returns Promise that resolves when the vectors and documents have been added.
|
|
62
|
+
*/
|
|
63
|
+
addVectors(vectors: number[][], documents: Document[]): Promise<void>;
|
|
64
|
+
/**
|
|
65
|
+
* Add documents to the Convex table. It first converts
|
|
66
|
+
* the documents to vectors using the embeddings and then calls the
|
|
67
|
+
* addVectors method.
|
|
68
|
+
* @param documents Documents to be added.
|
|
69
|
+
* @returns Promise that resolves when the documents have been added.
|
|
70
|
+
*/
|
|
71
|
+
addDocuments(documents: Document[]): Promise<void>;
|
|
72
|
+
/**
|
|
73
|
+
* Similarity search on the vectors stored in the
|
|
74
|
+
* Convex table. It returns a list of documents and their
|
|
75
|
+
* corresponding similarity scores.
|
|
76
|
+
* @param query Query vector for the similarity search.
|
|
77
|
+
* @param k Number of nearest neighbors to return.
|
|
78
|
+
* @param filter Optional filter to be applied.
|
|
79
|
+
* @returns Promise that resolves to a list of documents and their corresponding similarity scores.
|
|
80
|
+
*/
|
|
81
|
+
similaritySearchVectorWithScore(query: number[], k: number, filter?: this["FilterType"]): Promise<[Document, number][]>;
|
|
82
|
+
/**
|
|
83
|
+
* Static method to create an instance of ConvexVectorStore from a
|
|
84
|
+
* list of texts. It first converts the texts to vectors and then adds
|
|
85
|
+
* them to the Convex table.
|
|
86
|
+
* @param texts List of texts to be converted to vectors.
|
|
87
|
+
* @param metadatas Metadata for the texts.
|
|
88
|
+
* @param embeddings Embeddings to be used for conversion.
|
|
89
|
+
* @param dbConfig Database configuration for Convex.
|
|
90
|
+
* @returns Promise that resolves to a new instance of ConvexVectorStore.
|
|
91
|
+
*/
|
|
92
|
+
static fromTexts<DataModel extends GenericDataModel, TableName extends TableNamesInDataModel<DataModel>, IndexName extends VectorIndexNames<NamedTableInfo<DataModel, TableName>>, TextFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, EmbeddingFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, MetadataFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, InsertMutation extends FunctionReference<"mutation", "internal", {
|
|
93
|
+
table: string;
|
|
94
|
+
document: object;
|
|
95
|
+
}>, GetQuery extends FunctionReference<"query", "internal", {
|
|
96
|
+
id: string;
|
|
97
|
+
}, object | null>>(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig: ConvexVectorStoreConfig<DataModel, TableName, IndexName, TextFieldName, EmbeddingFieldName, MetadataFieldName, InsertMutation, GetQuery>): Promise<ConvexVectorStore<DataModel, TableName, IndexName, TextFieldName, EmbeddingFieldName, MetadataFieldName, InsertMutation, GetQuery>>;
|
|
98
|
+
/**
|
|
99
|
+
* Static method to create an instance of ConvexVectorStore from a
|
|
100
|
+
* list of documents. It first converts the documents to vectors and then
|
|
101
|
+
* adds them to the Convex table.
|
|
102
|
+
* @param docs List of documents to be converted to vectors.
|
|
103
|
+
* @param embeddings Embeddings to be used for conversion.
|
|
104
|
+
* @param dbConfig Database configuration for Convex.
|
|
105
|
+
* @returns Promise that resolves to a new instance of ConvexVectorStore.
|
|
106
|
+
*/
|
|
107
|
+
static fromDocuments<DataModel extends GenericDataModel, TableName extends TableNamesInDataModel<DataModel>, IndexName extends VectorIndexNames<NamedTableInfo<DataModel, TableName>>, TextFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, EmbeddingFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, MetadataFieldName extends FieldPaths<NamedTableInfo<DataModel, TableName>>, InsertMutation extends FunctionReference<"mutation", "internal", {
|
|
108
|
+
table: string;
|
|
109
|
+
document: object;
|
|
110
|
+
}>, GetQuery extends FunctionReference<"query", "internal", {
|
|
111
|
+
id: string;
|
|
112
|
+
}, object | null>>(docs: Document[], embeddings: Embeddings, dbConfig: ConvexVectorStoreConfig<DataModel, TableName, IndexName, TextFieldName, EmbeddingFieldName, MetadataFieldName, InsertMutation, GetQuery>): Promise<ConvexVectorStore<DataModel, TableName, IndexName, TextFieldName, EmbeddingFieldName, MetadataFieldName, InsertMutation, GetQuery>>;
|
|
113
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
2
|
+
import { makeFunctionReference, } from "convex/server";
|
|
3
|
+
import { Document } from "../document.js";
|
|
4
|
+
import { VectorStore } from "./base.js";
|
|
5
|
+
/**
|
|
6
|
+
* Class that is a wrapper around Convex storage and vector search. It is used
|
|
7
|
+
* to insert embeddings in Convex documents with a vector search index,
|
|
8
|
+
* and perform a vector search on them.
|
|
9
|
+
*
|
|
10
|
+
* ConvexVectorStore does NOT implement maxMarginalRelevanceSearch.
|
|
11
|
+
*/
|
|
12
|
+
export class ConvexVectorStore extends VectorStore {
|
|
13
|
+
_vectorstoreType() {
|
|
14
|
+
return "convex";
|
|
15
|
+
}
|
|
16
|
+
constructor(embeddings, config) {
|
|
17
|
+
super(embeddings, config);
|
|
18
|
+
Object.defineProperty(this, "ctx", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
configurable: true,
|
|
21
|
+
writable: true,
|
|
22
|
+
value: void 0
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(this, "table", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
configurable: true,
|
|
27
|
+
writable: true,
|
|
28
|
+
value: void 0
|
|
29
|
+
});
|
|
30
|
+
Object.defineProperty(this, "index", {
|
|
31
|
+
enumerable: true,
|
|
32
|
+
configurable: true,
|
|
33
|
+
writable: true,
|
|
34
|
+
value: void 0
|
|
35
|
+
});
|
|
36
|
+
Object.defineProperty(this, "textField", {
|
|
37
|
+
enumerable: true,
|
|
38
|
+
configurable: true,
|
|
39
|
+
writable: true,
|
|
40
|
+
value: void 0
|
|
41
|
+
});
|
|
42
|
+
Object.defineProperty(this, "embeddingField", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
configurable: true,
|
|
45
|
+
writable: true,
|
|
46
|
+
value: void 0
|
|
47
|
+
});
|
|
48
|
+
Object.defineProperty(this, "metadataField", {
|
|
49
|
+
enumerable: true,
|
|
50
|
+
configurable: true,
|
|
51
|
+
writable: true,
|
|
52
|
+
value: void 0
|
|
53
|
+
});
|
|
54
|
+
Object.defineProperty(this, "insert", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "get", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
66
|
+
this.ctx = config.ctx;
|
|
67
|
+
this.table = config.table ?? "documents";
|
|
68
|
+
this.index = config.index ?? "byEmbedding";
|
|
69
|
+
this.textField = config.textField ?? "text";
|
|
70
|
+
this.embeddingField =
|
|
71
|
+
config.embeddingField ?? "embedding";
|
|
72
|
+
this.metadataField =
|
|
73
|
+
config.metadataField ?? "metadata";
|
|
74
|
+
this.insert =
|
|
75
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
76
|
+
config.insert ?? makeFunctionReference("langchain/db:insert");
|
|
77
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
78
|
+
this.get = config.get ?? makeFunctionReference("langchain/db:get");
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Add vectors and their corresponding documents to the Convex table.
|
|
82
|
+
* @param vectors Vectors to be added.
|
|
83
|
+
* @param documents Corresponding documents to be added.
|
|
84
|
+
* @returns Promise that resolves when the vectors and documents have been added.
|
|
85
|
+
*/
|
|
86
|
+
async addVectors(vectors, documents) {
|
|
87
|
+
const convexDocuments = vectors.map((embedding, idx) => ({
|
|
88
|
+
[this.textField]: documents[idx].pageContent,
|
|
89
|
+
[this.embeddingField]: embedding,
|
|
90
|
+
[this.metadataField]: documents[idx].metadata,
|
|
91
|
+
}));
|
|
92
|
+
// TODO: Remove chunking when Convex handles the concurrent requests correctly
|
|
93
|
+
const PAGE_SIZE = 16;
|
|
94
|
+
for (let i = 0; i < convexDocuments.length; i += PAGE_SIZE) {
|
|
95
|
+
await Promise.all(convexDocuments.slice(i, i + PAGE_SIZE).map((document) => this.ctx.runMutation(this.insert, {
|
|
96
|
+
table: this.table,
|
|
97
|
+
document,
|
|
98
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
99
|
+
})));
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Add documents to the Convex table. It first converts
|
|
104
|
+
* the documents to vectors using the embeddings and then calls the
|
|
105
|
+
* addVectors method.
|
|
106
|
+
* @param documents Documents to be added.
|
|
107
|
+
* @returns Promise that resolves when the documents have been added.
|
|
108
|
+
*/
|
|
109
|
+
async addDocuments(documents) {
|
|
110
|
+
const texts = documents.map(({ pageContent }) => pageContent);
|
|
111
|
+
return this.addVectors(await this.embeddings.embedDocuments(texts), documents);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Similarity search on the vectors stored in the
|
|
115
|
+
* Convex table. It returns a list of documents and their
|
|
116
|
+
* corresponding similarity scores.
|
|
117
|
+
* @param query Query vector for the similarity search.
|
|
118
|
+
* @param k Number of nearest neighbors to return.
|
|
119
|
+
* @param filter Optional filter to be applied.
|
|
120
|
+
* @returns Promise that resolves to a list of documents and their corresponding similarity scores.
|
|
121
|
+
*/
|
|
122
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
123
|
+
const idsAndScores = await this.ctx.vectorSearch(this.table, this.index, {
|
|
124
|
+
vector: query,
|
|
125
|
+
limit: k,
|
|
126
|
+
filter: filter?.filter,
|
|
127
|
+
});
|
|
128
|
+
const documents = await Promise.all(idsAndScores.map(({ _id }) =>
|
|
129
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
130
|
+
this.ctx.runQuery(this.get, { id: _id })));
|
|
131
|
+
return documents.map(({ [this.textField]: text, [this.embeddingField]: embedding, [this.metadataField]: metadata, }, idx) => [
|
|
132
|
+
new Document({
|
|
133
|
+
pageContent: text,
|
|
134
|
+
metadata: {
|
|
135
|
+
...metadata,
|
|
136
|
+
...(filter?.includeEmbeddings ? { embedding } : null),
|
|
137
|
+
},
|
|
138
|
+
}),
|
|
139
|
+
idsAndScores[idx]._score,
|
|
140
|
+
]);
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Static method to create an instance of ConvexVectorStore from a
|
|
144
|
+
* list of texts. It first converts the texts to vectors and then adds
|
|
145
|
+
* them to the Convex table.
|
|
146
|
+
* @param texts List of texts to be converted to vectors.
|
|
147
|
+
* @param metadatas Metadata for the texts.
|
|
148
|
+
* @param embeddings Embeddings to be used for conversion.
|
|
149
|
+
* @param dbConfig Database configuration for Convex.
|
|
150
|
+
* @returns Promise that resolves to a new instance of ConvexVectorStore.
|
|
151
|
+
*/
|
|
152
|
+
static async fromTexts(texts, metadatas, embeddings, dbConfig) {
|
|
153
|
+
const docs = texts.map((text, i) => new Document({
|
|
154
|
+
pageContent: text,
|
|
155
|
+
metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas,
|
|
156
|
+
}));
|
|
157
|
+
return ConvexVectorStore.fromDocuments(docs, embeddings, dbConfig);
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Static method to create an instance of ConvexVectorStore from a
|
|
161
|
+
* list of documents. It first converts the documents to vectors and then
|
|
162
|
+
* adds them to the Convex table.
|
|
163
|
+
* @param docs List of documents to be converted to vectors.
|
|
164
|
+
* @param embeddings Embeddings to be used for conversion.
|
|
165
|
+
* @param dbConfig Database configuration for Convex.
|
|
166
|
+
* @returns Promise that resolves to a new instance of ConvexVectorStore.
|
|
167
|
+
*/
|
|
168
|
+
static async fromDocuments(docs, embeddings, dbConfig) {
|
|
169
|
+
const instance = new this(embeddings, dbConfig);
|
|
170
|
+
await instance.addDocuments(docs);
|
|
171
|
+
return instance;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
@@ -577,11 +577,13 @@ function genCollectionName() {
|
|
|
577
577
|
}
|
|
578
578
|
function getTextFieldMaxLength(documents) {
|
|
579
579
|
let textMaxLength = 0;
|
|
580
|
+
const textEncoder = new TextEncoder();
|
|
580
581
|
// eslint-disable-next-line no-plusplus
|
|
581
582
|
for (let i = 0; i < documents.length; i++) {
|
|
582
583
|
const text = documents[i].pageContent;
|
|
583
|
-
|
|
584
|
-
|
|
584
|
+
const textLengthInBytes = textEncoder.encode(text).length;
|
|
585
|
+
if (textLengthInBytes > textMaxLength) {
|
|
586
|
+
textMaxLength = textLengthInBytes;
|
|
585
587
|
}
|
|
586
588
|
}
|
|
587
589
|
return textMaxLength;
|
|
@@ -550,11 +550,13 @@ function genCollectionName() {
|
|
|
550
550
|
}
|
|
551
551
|
function getTextFieldMaxLength(documents) {
|
|
552
552
|
let textMaxLength = 0;
|
|
553
|
+
const textEncoder = new TextEncoder();
|
|
553
554
|
// eslint-disable-next-line no-plusplus
|
|
554
555
|
for (let i = 0; i < documents.length; i++) {
|
|
555
556
|
const text = documents[i].pageContent;
|
|
556
|
-
|
|
557
|
-
|
|
557
|
+
const textLengthInBytes = textEncoder.encode(text).length;
|
|
558
|
+
if (textLengthInBytes > textMaxLength) {
|
|
559
|
+
textMaxLength = textLengthInBytes;
|
|
558
560
|
}
|
|
559
561
|
}
|
|
560
562
|
return textMaxLength;
|
|
@@ -33,6 +33,7 @@ const flat_1 = __importDefault(require("flat"));
|
|
|
33
33
|
const base_js_1 = require("./base.cjs");
|
|
34
34
|
const document_js_1 = require("../document.cjs");
|
|
35
35
|
const async_caller_js_1 = require("../util/async_caller.cjs");
|
|
36
|
+
const math_js_1 = require("../util/math.cjs");
|
|
36
37
|
/**
|
|
37
38
|
* Class that extends the VectorStore class and provides methods to
|
|
38
39
|
* interact with the Pinecone vector database.
|
|
@@ -166,15 +167,7 @@ class PineconeStore extends base_js_1.VectorStore {
|
|
|
166
167
|
throw new Error("Either ids or delete_all must be provided.");
|
|
167
168
|
}
|
|
168
169
|
}
|
|
169
|
-
|
|
170
|
-
* Method that performs a similarity search in the Pinecone database and
|
|
171
|
-
* returns the results along with their scores.
|
|
172
|
-
* @param query Query vector for the similarity search.
|
|
173
|
-
* @param k Number of top results to return.
|
|
174
|
-
* @param filter Optional filter to apply to the search.
|
|
175
|
-
* @returns Promise that resolves with an array of documents and their scores.
|
|
176
|
-
*/
|
|
177
|
-
async similaritySearchVectorWithScore(query, k, filter) {
|
|
170
|
+
async _runPineconeQuery(query, k, filter, options) {
|
|
178
171
|
if (filter && this.filter) {
|
|
179
172
|
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
180
173
|
}
|
|
@@ -185,7 +178,20 @@ class PineconeStore extends base_js_1.VectorStore {
|
|
|
185
178
|
topK: k,
|
|
186
179
|
vector: query,
|
|
187
180
|
filter: _filter,
|
|
181
|
+
...options,
|
|
188
182
|
});
|
|
183
|
+
return results;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Method that performs a similarity search in the Pinecone database and
|
|
187
|
+
* returns the results along with their scores.
|
|
188
|
+
* @param query Query vector for the similarity search.
|
|
189
|
+
* @param k Number of top results to return.
|
|
190
|
+
* @param filter Optional filter to apply to the search.
|
|
191
|
+
* @returns Promise that resolves with an array of documents and their scores.
|
|
192
|
+
*/
|
|
193
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
194
|
+
const results = await this._runPineconeQuery(query, k, filter);
|
|
189
195
|
const result = [];
|
|
190
196
|
if (results.matches) {
|
|
191
197
|
for (const res of results.matches) {
|
|
@@ -198,6 +204,37 @@ class PineconeStore extends base_js_1.VectorStore {
|
|
|
198
204
|
}
|
|
199
205
|
return result;
|
|
200
206
|
}
|
|
207
|
+
/**
|
|
208
|
+
* Return documents selected using the maximal marginal relevance.
|
|
209
|
+
* Maximal marginal relevance optimizes for similarity to the query AND diversity
|
|
210
|
+
* among selected documents.
|
|
211
|
+
*
|
|
212
|
+
* @param {string} query - Text to look up documents similar to.
|
|
213
|
+
* @param {number} options.k - Number of documents to return.
|
|
214
|
+
* @param {number} options.fetchK=20 - Number of documents to fetch before passing to the MMR algorithm.
|
|
215
|
+
* @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results,
|
|
216
|
+
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
|
|
217
|
+
* @param {PineconeMetadata} options.filter - Optional filter to apply to the search.
|
|
218
|
+
*
|
|
219
|
+
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
|
|
220
|
+
*/
|
|
221
|
+
async maxMarginalRelevanceSearch(query, options) {
|
|
222
|
+
const queryEmbedding = await this.embeddings.embedQuery(query);
|
|
223
|
+
const results = await this._runPineconeQuery(queryEmbedding, options.fetchK ?? 20, options.filter, { includeValues: true });
|
|
224
|
+
const matches = results?.matches ?? [];
|
|
225
|
+
const embeddingList = matches.map((match) => match.values);
|
|
226
|
+
const mmrIndexes = (0, math_js_1.maximalMarginalRelevance)(queryEmbedding, embeddingList, options.lambda, options.k);
|
|
227
|
+
const topMmrMatches = mmrIndexes.map((idx) => matches[idx]);
|
|
228
|
+
const finalResult = [];
|
|
229
|
+
for (const res of topMmrMatches) {
|
|
230
|
+
const { [this.textKey]: pageContent, ...metadata } = (res.metadata ??
|
|
231
|
+
{});
|
|
232
|
+
if (res.score) {
|
|
233
|
+
finalResult.push(new document_js_1.Document({ metadata, pageContent }));
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
return finalResult;
|
|
237
|
+
}
|
|
201
238
|
/**
|
|
202
239
|
* Static method that creates a new instance of the PineconeStore class
|
|
203
240
|
* from texts.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Index as PineconeIndex } from "@pinecone-database/pinecone";
|
|
2
|
-
import { VectorStore } from "./base.js";
|
|
1
|
+
import { RecordMetadata, Index as PineconeIndex } from "@pinecone-database/pinecone";
|
|
2
|
+
import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js";
|
|
3
3
|
import { Embeddings } from "../embeddings/base.js";
|
|
4
4
|
import { Document } from "../document.js";
|
|
5
5
|
import { AsyncCaller } from "../util/async_caller.js";
|
|
@@ -57,6 +57,9 @@ export declare class PineconeStore extends VectorStore {
|
|
|
57
57
|
* @returns Promise that resolves when the delete operation is complete.
|
|
58
58
|
*/
|
|
59
59
|
delete(params: PineconeDeleteParams): Promise<void>;
|
|
60
|
+
protected _runPineconeQuery(query: number[], k: number, filter?: PineconeMetadata, options?: {
|
|
61
|
+
includeValues: boolean;
|
|
62
|
+
}): Promise<import("@pinecone-database/pinecone").QueryResponse<RecordMetadata>>;
|
|
60
63
|
/**
|
|
61
64
|
* Method that performs a similarity search in the Pinecone database and
|
|
62
65
|
* returns the results along with their scores.
|
|
@@ -66,6 +69,21 @@ export declare class PineconeStore extends VectorStore {
|
|
|
66
69
|
* @returns Promise that resolves with an array of documents and their scores.
|
|
67
70
|
*/
|
|
68
71
|
similaritySearchVectorWithScore(query: number[], k: number, filter?: PineconeMetadata): Promise<[Document, number][]>;
|
|
72
|
+
/**
|
|
73
|
+
* Return documents selected using the maximal marginal relevance.
|
|
74
|
+
* Maximal marginal relevance optimizes for similarity to the query AND diversity
|
|
75
|
+
* among selected documents.
|
|
76
|
+
*
|
|
77
|
+
* @param {string} query - Text to look up documents similar to.
|
|
78
|
+
* @param {number} options.k - Number of documents to return.
|
|
79
|
+
* @param {number} options.fetchK=20 - Number of documents to fetch before passing to the MMR algorithm.
|
|
80
|
+
* @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results,
|
|
81
|
+
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
|
|
82
|
+
* @param {PineconeMetadata} options.filter - Optional filter to apply to the search.
|
|
83
|
+
*
|
|
84
|
+
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
|
|
85
|
+
*/
|
|
86
|
+
maxMarginalRelevanceSearch(query: string, options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>): Promise<Document[]>;
|
|
69
87
|
/**
|
|
70
88
|
* Static method that creates a new instance of the PineconeStore class
|
|
71
89
|
* from texts.
|
|
@@ -4,6 +4,7 @@ import flatten from "flat";
|
|
|
4
4
|
import { VectorStore } from "./base.js";
|
|
5
5
|
import { Document } from "../document.js";
|
|
6
6
|
import { AsyncCaller } from "../util/async_caller.js";
|
|
7
|
+
import { maximalMarginalRelevance } from "../util/math.js";
|
|
7
8
|
/**
|
|
8
9
|
* Class that extends the VectorStore class and provides methods to
|
|
9
10
|
* interact with the Pinecone vector database.
|
|
@@ -137,15 +138,7 @@ export class PineconeStore extends VectorStore {
|
|
|
137
138
|
throw new Error("Either ids or delete_all must be provided.");
|
|
138
139
|
}
|
|
139
140
|
}
|
|
140
|
-
|
|
141
|
-
* Method that performs a similarity search in the Pinecone database and
|
|
142
|
-
* returns the results along with their scores.
|
|
143
|
-
* @param query Query vector for the similarity search.
|
|
144
|
-
* @param k Number of top results to return.
|
|
145
|
-
* @param filter Optional filter to apply to the search.
|
|
146
|
-
* @returns Promise that resolves with an array of documents and their scores.
|
|
147
|
-
*/
|
|
148
|
-
async similaritySearchVectorWithScore(query, k, filter) {
|
|
141
|
+
async _runPineconeQuery(query, k, filter, options) {
|
|
149
142
|
if (filter && this.filter) {
|
|
150
143
|
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
151
144
|
}
|
|
@@ -156,7 +149,20 @@ export class PineconeStore extends VectorStore {
|
|
|
156
149
|
topK: k,
|
|
157
150
|
vector: query,
|
|
158
151
|
filter: _filter,
|
|
152
|
+
...options,
|
|
159
153
|
});
|
|
154
|
+
return results;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Method that performs a similarity search in the Pinecone database and
|
|
158
|
+
* returns the results along with their scores.
|
|
159
|
+
* @param query Query vector for the similarity search.
|
|
160
|
+
* @param k Number of top results to return.
|
|
161
|
+
* @param filter Optional filter to apply to the search.
|
|
162
|
+
* @returns Promise that resolves with an array of documents and their scores.
|
|
163
|
+
*/
|
|
164
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
165
|
+
const results = await this._runPineconeQuery(query, k, filter);
|
|
160
166
|
const result = [];
|
|
161
167
|
if (results.matches) {
|
|
162
168
|
for (const res of results.matches) {
|
|
@@ -169,6 +175,37 @@ export class PineconeStore extends VectorStore {
|
|
|
169
175
|
}
|
|
170
176
|
return result;
|
|
171
177
|
}
|
|
178
|
+
/**
|
|
179
|
+
* Return documents selected using the maximal marginal relevance.
|
|
180
|
+
* Maximal marginal relevance optimizes for similarity to the query AND diversity
|
|
181
|
+
* among selected documents.
|
|
182
|
+
*
|
|
183
|
+
* @param {string} query - Text to look up documents similar to.
|
|
184
|
+
* @param {number} options.k - Number of documents to return.
|
|
185
|
+
* @param {number} options.fetchK=20 - Number of documents to fetch before passing to the MMR algorithm.
|
|
186
|
+
* @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results,
|
|
187
|
+
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
|
|
188
|
+
* @param {PineconeMetadata} options.filter - Optional filter to apply to the search.
|
|
189
|
+
*
|
|
190
|
+
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
|
|
191
|
+
*/
|
|
192
|
+
async maxMarginalRelevanceSearch(query, options) {
|
|
193
|
+
const queryEmbedding = await this.embeddings.embedQuery(query);
|
|
194
|
+
const results = await this._runPineconeQuery(queryEmbedding, options.fetchK ?? 20, options.filter, { includeValues: true });
|
|
195
|
+
const matches = results?.matches ?? [];
|
|
196
|
+
const embeddingList = matches.map((match) => match.values);
|
|
197
|
+
const mmrIndexes = maximalMarginalRelevance(queryEmbedding, embeddingList, options.lambda, options.k);
|
|
198
|
+
const topMmrMatches = mmrIndexes.map((idx) => matches[idx]);
|
|
199
|
+
const finalResult = [];
|
|
200
|
+
for (const res of topMmrMatches) {
|
|
201
|
+
const { [this.textKey]: pageContent, ...metadata } = (res.metadata ??
|
|
202
|
+
{});
|
|
203
|
+
if (res.score) {
|
|
204
|
+
finalResult.push(new Document({ metadata, pageContent }));
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return finalResult;
|
|
208
|
+
}
|
|
172
209
|
/**
|
|
173
210
|
* Static method that creates a new instance of the PineconeStore class
|
|
174
211
|
* from texts.
|
|
@@ -206,14 +206,36 @@ class VercelPostgres extends base_js_1.VectorStore {
|
|
|
206
206
|
*/
|
|
207
207
|
async similaritySearchVectorWithScore(query, k, filter) {
|
|
208
208
|
const embeddingString = `[${query.join(",")}]`;
|
|
209
|
-
const _filter = filter ??
|
|
209
|
+
const _filter = filter ?? {};
|
|
210
|
+
const whereClauses = [];
|
|
211
|
+
const values = [embeddingString, k];
|
|
212
|
+
let paramCount = values.length;
|
|
213
|
+
for (const [key, value] of Object.entries(_filter)) {
|
|
214
|
+
if (typeof value === "object" && value !== null) {
|
|
215
|
+
const currentParamCount = paramCount;
|
|
216
|
+
const placeholders = value.in
|
|
217
|
+
.map((_, index) => `$${currentParamCount + index + 1}`)
|
|
218
|
+
.join(",");
|
|
219
|
+
whereClauses.push(`${this.metadataColumnName}->>'${key}' IN (${placeholders})`);
|
|
220
|
+
values.push(...value.in);
|
|
221
|
+
paramCount += value.in.length;
|
|
222
|
+
}
|
|
223
|
+
else {
|
|
224
|
+
paramCount += 1;
|
|
225
|
+
whereClauses.push(`${this.metadataColumnName}->>'${key}' = $${paramCount}`);
|
|
226
|
+
values.push(value);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
const whereClause = whereClauses.length
|
|
230
|
+
? `WHERE ${whereClauses.join(" AND ")}`
|
|
231
|
+
: "";
|
|
210
232
|
const queryString = `
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
const documents = (await this.client.query(queryString,
|
|
233
|
+
SELECT *, ${this.vectorColumnName} <=> $1 as "_distance"
|
|
234
|
+
FROM ${this.tableName}
|
|
235
|
+
${whereClause}
|
|
236
|
+
ORDER BY "_distance" ASC
|
|
237
|
+
LIMIT $2;`;
|
|
238
|
+
const documents = (await this.client.query(queryString, values)).rows;
|
|
217
239
|
const results = [];
|
|
218
240
|
for (const doc of documents) {
|
|
219
241
|
if (doc._distance != null && doc[this.contentColumnName] != null) {
|
|
@@ -2,7 +2,7 @@ import { type VercelPool, type VercelPoolClient, type VercelPostgresPoolConfig }
|
|
|
2
2
|
import { VectorStore } from "./base.js";
|
|
3
3
|
import { Embeddings } from "../embeddings/base.js";
|
|
4
4
|
import { Document } from "../document.js";
|
|
5
|
-
type Metadata = Record<string,
|
|
5
|
+
type Metadata = Record<string, string | number | Record<"in", string[]>>;
|
|
6
6
|
/**
|
|
7
7
|
* Interface that defines the arguments required to create a
|
|
8
8
|
* `VercelPostgres` instance. It includes Postgres connection options,
|
|
@@ -203,14 +203,36 @@ export class VercelPostgres extends VectorStore {
|
|
|
203
203
|
*/
|
|
204
204
|
async similaritySearchVectorWithScore(query, k, filter) {
|
|
205
205
|
const embeddingString = `[${query.join(",")}]`;
|
|
206
|
-
const _filter = filter ??
|
|
206
|
+
const _filter = filter ?? {};
|
|
207
|
+
const whereClauses = [];
|
|
208
|
+
const values = [embeddingString, k];
|
|
209
|
+
let paramCount = values.length;
|
|
210
|
+
for (const [key, value] of Object.entries(_filter)) {
|
|
211
|
+
if (typeof value === "object" && value !== null) {
|
|
212
|
+
const currentParamCount = paramCount;
|
|
213
|
+
const placeholders = value.in
|
|
214
|
+
.map((_, index) => `$${currentParamCount + index + 1}`)
|
|
215
|
+
.join(",");
|
|
216
|
+
whereClauses.push(`${this.metadataColumnName}->>'${key}' IN (${placeholders})`);
|
|
217
|
+
values.push(...value.in);
|
|
218
|
+
paramCount += value.in.length;
|
|
219
|
+
}
|
|
220
|
+
else {
|
|
221
|
+
paramCount += 1;
|
|
222
|
+
whereClauses.push(`${this.metadataColumnName}->>'${key}' = $${paramCount}`);
|
|
223
|
+
values.push(value);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
const whereClause = whereClauses.length
|
|
227
|
+
? `WHERE ${whereClauses.join(" AND ")}`
|
|
228
|
+
: "";
|
|
207
229
|
const queryString = `
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
const documents = (await this.client.query(queryString,
|
|
230
|
+
SELECT *, ${this.vectorColumnName} <=> $1 as "_distance"
|
|
231
|
+
FROM ${this.tableName}
|
|
232
|
+
${whereClause}
|
|
233
|
+
ORDER BY "_distance" ASC
|
|
234
|
+
LIMIT $2;`;
|
|
235
|
+
const documents = (await this.client.query(queryString, values)).rows;
|
|
214
236
|
const results = [];
|
|
215
237
|
for (const doc of documents) {
|
|
216
238
|
if (doc._distance != null && doc[this.contentColumnName] != null) {
|