langchain 0.0.74 → 0.0.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/client.cjs +1 -0
- package/client.d.ts +1 -0
- package/client.js +1 -0
- package/dist/base_language/count_tokens.cjs +5 -21
- package/dist/base_language/count_tokens.d.ts +1 -6
- package/dist/base_language/count_tokens.js +4 -19
- package/dist/base_language/index.cjs +16 -24
- package/dist/base_language/index.d.ts +22 -3
- package/dist/base_language/index.js +17 -25
- package/dist/cache/redis.d.ts +3 -1
- package/dist/callbacks/base.d.ts +17 -3
- package/dist/callbacks/handlers/console.cjs +15 -28
- package/dist/callbacks/handlers/console.d.ts +11 -20
- package/dist/callbacks/handlers/console.js +14 -27
- package/dist/callbacks/handlers/initialize.cjs +8 -3
- package/dist/callbacks/handlers/initialize.d.ts +4 -2
- package/dist/callbacks/handlers/initialize.js +6 -2
- package/dist/callbacks/handlers/tracer.cjs +193 -0
- package/dist/callbacks/handlers/tracer.d.ts +65 -0
- package/dist/callbacks/handlers/tracer.js +189 -0
- package/dist/callbacks/handlers/tracer_langchain.cjs +163 -0
- package/dist/callbacks/handlers/tracer_langchain.d.ts +39 -0
- package/dist/callbacks/handlers/tracer_langchain.js +159 -0
- package/dist/callbacks/handlers/tracer_langchain_v1.cjs +202 -0
- package/dist/callbacks/handlers/tracer_langchain_v1.d.ts +57 -0
- package/dist/callbacks/handlers/tracer_langchain_v1.js +198 -0
- package/dist/callbacks/index.cjs +10 -5
- package/dist/callbacks/index.d.ts +5 -3
- package/dist/callbacks/index.js +5 -3
- package/dist/callbacks/manager.cjs +39 -9
- package/dist/callbacks/manager.d.ts +5 -2
- package/dist/callbacks/manager.js +40 -10
- package/dist/chains/llm_chain.cjs +17 -5
- package/dist/chains/llm_chain.d.ts +9 -3
- package/dist/chains/llm_chain.js +17 -5
- package/dist/chains/question_answering/map_reduce_prompts.cjs +5 -5
- package/dist/chains/question_answering/map_reduce_prompts.d.ts +1 -1
- package/dist/chains/question_answering/map_reduce_prompts.js +1 -1
- package/dist/chains/question_answering/refine_prompts.cjs +5 -5
- package/dist/chains/question_answering/refine_prompts.d.ts +1 -1
- package/dist/chains/question_answering/refine_prompts.js +1 -1
- package/dist/chains/question_answering/stuff_prompts.cjs +2 -2
- package/dist/chains/question_answering/stuff_prompts.d.ts +1 -1
- package/dist/chains/question_answering/stuff_prompts.js +1 -1
- package/dist/chains/sequential_chain.cjs +2 -2
- package/dist/chains/sequential_chain.d.ts +2 -2
- package/dist/chains/sequential_chain.js +2 -2
- package/dist/chains/sql_db/sql_db_prompt.cjs +20 -1
- package/dist/chains/sql_db/sql_db_prompt.d.ts +1 -0
- package/dist/chains/sql_db/sql_db_prompt.js +19 -0
- package/dist/chains/summarization/load.cjs +14 -5
- package/dist/chains/summarization/load.d.ts +7 -2
- package/dist/chains/summarization/load.js +14 -5
- package/dist/chat_models/anthropic.cjs +36 -9
- package/dist/chat_models/anthropic.d.ts +6 -3
- package/dist/chat_models/anthropic.js +36 -9
- package/dist/chat_models/base.cjs +40 -13
- package/dist/chat_models/base.d.ts +14 -7
- package/dist/chat_models/base.js +41 -14
- package/dist/chat_models/openai.cjs +20 -14
- package/dist/chat_models/openai.d.ts +2 -1
- package/dist/chat_models/openai.js +20 -14
- package/dist/client/index.cjs +5 -0
- package/dist/client/index.d.ts +1 -0
- package/dist/client/index.js +1 -0
- package/dist/client/langchainplus.cjs +405 -0
- package/dist/client/langchainplus.d.ts +65 -0
- package/dist/client/langchainplus.js +398 -0
- package/dist/document_loaders/fs/unstructured.cjs +8 -0
- package/dist/document_loaders/fs/unstructured.d.ts +3 -0
- package/dist/document_loaders/fs/unstructured.js +8 -0
- package/dist/document_loaders/web/apify_dataset.cjs +64 -0
- package/dist/document_loaders/web/apify_dataset.d.ts +28 -0
- package/dist/document_loaders/web/apify_dataset.js +60 -0
- package/dist/embeddings/openai.cjs +2 -2
- package/dist/embeddings/openai.js +2 -2
- package/dist/llms/base.cjs +43 -17
- package/dist/llms/base.d.ts +16 -9
- package/dist/llms/base.js +44 -18
- package/dist/llms/cohere.cjs +3 -2
- package/dist/llms/cohere.d.ts +1 -1
- package/dist/llms/cohere.js +3 -2
- package/dist/llms/hf.cjs +2 -2
- package/dist/llms/hf.d.ts +1 -1
- package/dist/llms/hf.js +2 -2
- package/dist/llms/openai-chat.cjs +13 -14
- package/dist/llms/openai-chat.d.ts +2 -1
- package/dist/llms/openai-chat.js +13 -14
- package/dist/llms/openai.cjs +15 -13
- package/dist/llms/openai.d.ts +4 -3
- package/dist/llms/openai.js +15 -13
- package/dist/llms/replicate.cjs +2 -2
- package/dist/llms/replicate.d.ts +1 -1
- package/dist/llms/replicate.js +2 -2
- package/dist/memory/base.cjs +9 -1
- package/dist/memory/base.d.ts +1 -0
- package/dist/memory/base.js +7 -0
- package/dist/memory/entity_memory.cjs +151 -0
- package/dist/memory/entity_memory.d.ts +35 -0
- package/dist/memory/entity_memory.js +147 -0
- package/dist/memory/index.cjs +5 -1
- package/dist/memory/index.d.ts +2 -0
- package/dist/memory/index.js +2 -0
- package/dist/memory/prompt.cjs +84 -1
- package/dist/memory/prompt.d.ts +6 -0
- package/dist/memory/prompt.js +83 -0
- package/dist/memory/stores/entity/in_memory.cjs +32 -0
- package/dist/memory/stores/entity/in_memory.d.ts +10 -0
- package/dist/memory/stores/entity/in_memory.js +28 -0
- package/dist/prompts/index.cjs +6 -1
- package/dist/prompts/index.d.ts +1 -0
- package/dist/prompts/index.js +1 -0
- package/dist/{chains/prompt_selector.d.ts → prompts/selectors/conditional.d.ts} +4 -4
- package/dist/retrievers/document_compressors/chain_extract.cjs +9 -11
- package/dist/retrievers/document_compressors/chain_extract.js +9 -11
- package/dist/schema/index.cjs +13 -1
- package/dist/schema/index.d.ts +19 -0
- package/dist/schema/index.js +11 -0
- package/dist/stores/message/dynamodb.cjs +8 -6
- package/dist/stores/message/dynamodb.js +8 -6
- package/dist/stores/message/redis.cjs +69 -0
- package/dist/stores/message/redis.d.ts +18 -0
- package/dist/stores/message/redis.js +65 -0
- package/dist/stores/message/utils.cjs +30 -15
- package/dist/stores/message/utils.d.ts +4 -2
- package/dist/stores/message/utils.js +28 -14
- package/dist/text_splitter.cjs +3 -23
- package/dist/text_splitter.d.ts +1 -3
- package/dist/text_splitter.js +3 -23
- package/dist/tools/webbrowser.cjs +5 -7
- package/dist/tools/webbrowser.js +3 -5
- package/dist/types/openai-types.d.ts +3 -2
- package/dist/util/async_caller.cjs +16 -0
- package/dist/util/async_caller.d.ts +4 -0
- package/dist/util/async_caller.js +16 -0
- package/dist/util/axios-fetch-adapter.cjs +6 -0
- package/dist/util/axios-fetch-adapter.js +6 -0
- package/dist/util/env.cjs +39 -7
- package/dist/util/env.d.ts +19 -0
- package/dist/util/env.js +32 -6
- package/dist/util/sql_utils.cjs +18 -0
- package/dist/util/sql_utils.js +19 -1
- package/dist/util/tiktoken.cjs +26 -0
- package/dist/util/tiktoken.d.ts +9 -0
- package/dist/util/tiktoken.js +21 -0
- package/dist/vectorstores/redis.cjs +236 -0
- package/dist/vectorstores/redis.d.ts +80 -0
- package/dist/vectorstores/redis.js +232 -0
- package/document_loaders/web/apify_dataset.cjs +1 -0
- package/document_loaders/web/apify_dataset.d.ts +1 -0
- package/document_loaders/web/apify_dataset.js +1 -0
- package/package.json +41 -5
- package/stores/message/redis.cjs +1 -0
- package/stores/message/redis.d.ts +1 -0
- package/stores/message/redis.js +1 -0
- package/vectorstores/redis.cjs +1 -0
- package/vectorstores/redis.d.ts +1 -0
- package/vectorstores/redis.js +1 -0
- package/dist/callbacks/handlers/tracers.cjs +0 -341
- package/dist/callbacks/handlers/tracers.d.ts +0 -100
- package/dist/callbacks/handlers/tracers.js +0 -336
- /package/dist/{chains/prompt_selector.cjs → prompts/selectors/conditional.cjs} +0 -0
- /package/dist/{chains/prompt_selector.js → prompts/selectors/conditional.js} +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Tiktoken, getEncodingNameForModel, } from "js-tiktoken/lite";
|
|
2
|
+
import { AsyncCaller } from "./async_caller.js";
|
|
3
|
+
const cache = {};
|
|
4
|
+
const caller = /* #__PURE__ */ new AsyncCaller({});
|
|
5
|
+
export async function getEncoding(encoding, options) {
|
|
6
|
+
if (!(encoding in cache)) {
|
|
7
|
+
cache[encoding] = caller
|
|
8
|
+
.fetch(`https://tiktoken.pages.dev/js/${encoding}.json`, {
|
|
9
|
+
signal: options?.signal,
|
|
10
|
+
})
|
|
11
|
+
.then((res) => res.json())
|
|
12
|
+
.catch((e) => {
|
|
13
|
+
delete cache[encoding];
|
|
14
|
+
throw e;
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
return new Tiktoken(await cache[encoding], options?.extendedSpecialTokens);
|
|
18
|
+
}
|
|
19
|
+
export async function encodingForModel(model, options) {
|
|
20
|
+
return getEncoding(getEncodingNameForModel(model), options);
|
|
21
|
+
}
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RedisVectorStore = void 0;
|
|
4
|
+
const redis_1 = require("redis");
|
|
5
|
+
const base_js_1 = require("./base.cjs");
|
|
6
|
+
const document_js_1 = require("../document.cjs");
|
|
7
|
+
class RedisVectorStore extends base_js_1.VectorStore {
|
|
8
|
+
constructor(embeddings, _dbConfig) {
|
|
9
|
+
super(embeddings, _dbConfig);
|
|
10
|
+
Object.defineProperty(this, "redisClient", {
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true,
|
|
14
|
+
value: void 0
|
|
15
|
+
});
|
|
16
|
+
Object.defineProperty(this, "indexName", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: void 0
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "indexOptions", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "keyPrefix", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
Object.defineProperty(this, "contentKey", {
|
|
35
|
+
enumerable: true,
|
|
36
|
+
configurable: true,
|
|
37
|
+
writable: true,
|
|
38
|
+
value: void 0
|
|
39
|
+
});
|
|
40
|
+
Object.defineProperty(this, "metadataKey", {
|
|
41
|
+
enumerable: true,
|
|
42
|
+
configurable: true,
|
|
43
|
+
writable: true,
|
|
44
|
+
value: void 0
|
|
45
|
+
});
|
|
46
|
+
Object.defineProperty(this, "vectorKey", {
|
|
47
|
+
enumerable: true,
|
|
48
|
+
configurable: true,
|
|
49
|
+
writable: true,
|
|
50
|
+
value: void 0
|
|
51
|
+
});
|
|
52
|
+
Object.defineProperty(this, "filter", {
|
|
53
|
+
enumerable: true,
|
|
54
|
+
configurable: true,
|
|
55
|
+
writable: true,
|
|
56
|
+
value: void 0
|
|
57
|
+
});
|
|
58
|
+
this.redisClient = _dbConfig.redisClient;
|
|
59
|
+
this.indexName = _dbConfig.indexName;
|
|
60
|
+
this.indexOptions = _dbConfig.indexOptions ?? {
|
|
61
|
+
ALGORITHM: redis_1.VectorAlgorithms.HNSW,
|
|
62
|
+
DISTANCE_METRIC: "COSINE",
|
|
63
|
+
};
|
|
64
|
+
this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`;
|
|
65
|
+
this.contentKey = _dbConfig.contentKey ?? "content";
|
|
66
|
+
this.metadataKey = _dbConfig.metadataKey ?? "metadata";
|
|
67
|
+
this.vectorKey = _dbConfig.vectorKey ?? "content_vector";
|
|
68
|
+
this.filter = _dbConfig.filter;
|
|
69
|
+
}
|
|
70
|
+
async addDocuments(documents, options) {
|
|
71
|
+
const texts = documents.map(({ pageContent }) => pageContent);
|
|
72
|
+
await this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
|
|
73
|
+
}
|
|
74
|
+
async addVectors(vectors, documents, { keys, batchSize = 1000 } = {}) {
|
|
75
|
+
// check if the index exists and create it if it doesn't
|
|
76
|
+
await this.createIndex(vectors[0].length);
|
|
77
|
+
const multi = this.redisClient.multi();
|
|
78
|
+
vectors.map(async (vector, idx) => {
|
|
79
|
+
const key = keys && keys.length ? keys[idx] : `${this.keyPrefix}${idx}`;
|
|
80
|
+
const metadata = documents[idx] && documents[idx].metadata
|
|
81
|
+
? documents[idx].metadata
|
|
82
|
+
: {};
|
|
83
|
+
multi.hSet(key, {
|
|
84
|
+
[this.vectorKey]: this.getFloat32Buffer(vector),
|
|
85
|
+
[this.contentKey]: documents[idx].pageContent,
|
|
86
|
+
[this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)),
|
|
87
|
+
});
|
|
88
|
+
// write batch
|
|
89
|
+
if (idx % batchSize === 0) {
|
|
90
|
+
await multi.exec();
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
// insert final batch
|
|
94
|
+
await multi.exec();
|
|
95
|
+
}
|
|
96
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
97
|
+
if (filter && this.filter) {
|
|
98
|
+
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
99
|
+
}
|
|
100
|
+
const _filter = filter ?? this.filter;
|
|
101
|
+
const results = await this.redisClient.ft.search(this.indexName, ...this.buildQuery(query, k, _filter));
|
|
102
|
+
const result = [];
|
|
103
|
+
if (results.total) {
|
|
104
|
+
for (const res of results.documents) {
|
|
105
|
+
if (res.value) {
|
|
106
|
+
const document = res.value;
|
|
107
|
+
if (document.vector_score) {
|
|
108
|
+
result.push([
|
|
109
|
+
new document_js_1.Document({
|
|
110
|
+
pageContent: document[this.contentKey],
|
|
111
|
+
metadata: JSON.parse(this.unEscapeSpecialChars(document.metadata)),
|
|
112
|
+
}),
|
|
113
|
+
Number(document.vector_score),
|
|
114
|
+
]);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
static fromTexts(texts, metadatas, embeddings, dbConfig) {
|
|
122
|
+
const docs = [];
|
|
123
|
+
for (let i = 0; i < texts.length; i += 1) {
|
|
124
|
+
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
|
125
|
+
const newDoc = new document_js_1.Document({
|
|
126
|
+
pageContent: texts[i],
|
|
127
|
+
metadata,
|
|
128
|
+
});
|
|
129
|
+
docs.push(newDoc);
|
|
130
|
+
}
|
|
131
|
+
return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig);
|
|
132
|
+
}
|
|
133
|
+
static async fromDocuments(docs, embeddings, dbConfig) {
|
|
134
|
+
const instance = new this(embeddings, dbConfig);
|
|
135
|
+
await instance.addDocuments(docs);
|
|
136
|
+
return instance;
|
|
137
|
+
}
|
|
138
|
+
async checkIndexExists() {
|
|
139
|
+
try {
|
|
140
|
+
await this.redisClient.ft.info(this.indexName);
|
|
141
|
+
}
|
|
142
|
+
catch (err) {
|
|
143
|
+
// index doesn't exist
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
async createIndex(dimensions = 1536) {
|
|
149
|
+
if (await this.checkIndexExists()) {
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
const schema = {
|
|
153
|
+
[this.vectorKey]: {
|
|
154
|
+
type: redis_1.SchemaFieldTypes.VECTOR,
|
|
155
|
+
TYPE: "FLOAT32",
|
|
156
|
+
DIM: dimensions,
|
|
157
|
+
...this.indexOptions,
|
|
158
|
+
},
|
|
159
|
+
[this.contentKey]: redis_1.SchemaFieldTypes.TEXT,
|
|
160
|
+
[this.metadataKey]: redis_1.SchemaFieldTypes.TEXT,
|
|
161
|
+
};
|
|
162
|
+
await this.redisClient.ft.create(this.indexName, schema, {
|
|
163
|
+
ON: "HASH",
|
|
164
|
+
PREFIX: this.keyPrefix,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
async dropIndex() {
|
|
168
|
+
try {
|
|
169
|
+
await this.redisClient.ft.dropIndex(this.indexName);
|
|
170
|
+
return true;
|
|
171
|
+
}
|
|
172
|
+
catch (err) {
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
buildQuery(query, k, filter) {
|
|
177
|
+
const vectorScoreField = "vector_score";
|
|
178
|
+
let hybridFields = "*";
|
|
179
|
+
// if a filter is set, modify the hybrid query
|
|
180
|
+
if (filter && filter.length) {
|
|
181
|
+
// `filter` is a list of strings, then it's applied using the OR operator in the metadata key
|
|
182
|
+
// for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar'
|
|
183
|
+
hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`;
|
|
184
|
+
}
|
|
185
|
+
const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`;
|
|
186
|
+
const returnFields = [this.metadataKey, this.contentKey, vectorScoreField];
|
|
187
|
+
const options = {
|
|
188
|
+
PARAMS: {
|
|
189
|
+
vector: this.getFloat32Buffer(query),
|
|
190
|
+
},
|
|
191
|
+
RETURN: returnFields,
|
|
192
|
+
SORTBY: vectorScoreField,
|
|
193
|
+
DIALECT: 2,
|
|
194
|
+
LIMIT: {
|
|
195
|
+
from: 0,
|
|
196
|
+
size: k,
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
return [baseQuery, options];
|
|
200
|
+
}
|
|
201
|
+
prepareFilter(filter) {
|
|
202
|
+
return filter.map(this.escapeSpecialChars).join("|");
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Escapes all '-' characters.
|
|
206
|
+
* RediSearch considers '-' as a negative operator, hence we need
|
|
207
|
+
* to escape it
|
|
208
|
+
* @see https://redis.io/docs/stack/search/reference/query_syntax
|
|
209
|
+
*
|
|
210
|
+
* @param str
|
|
211
|
+
* @returns
|
|
212
|
+
*/
|
|
213
|
+
escapeSpecialChars(str) {
|
|
214
|
+
return str.replaceAll("-", "\\-");
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Unescapes all '-' characters, returning the original string
|
|
218
|
+
*
|
|
219
|
+
* @param str
|
|
220
|
+
* @returns
|
|
221
|
+
*/
|
|
222
|
+
unEscapeSpecialChars(str) {
|
|
223
|
+
return str.replaceAll("\\-", "-");
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Converts the vector to the buffer Redis needs to
|
|
227
|
+
* correctly store an embedding
|
|
228
|
+
*
|
|
229
|
+
* @param vector
|
|
230
|
+
* @returns Buffer
|
|
231
|
+
*/
|
|
232
|
+
getFloat32Buffer(vector) {
|
|
233
|
+
return Buffer.from(new Float32Array(vector).buffer);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
exports.RedisVectorStore = RedisVectorStore;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type { createCluster, createClient } from "redis";
|
|
2
|
+
import { VectorAlgorithms } from "redis";
|
|
3
|
+
import { Embeddings } from "../embeddings/base.js";
|
|
4
|
+
import { VectorStore } from "./base.js";
|
|
5
|
+
import { Document } from "../document.js";
|
|
6
|
+
export type CreateSchemaVectorField<T extends VectorAlgorithms, A extends Record<string, unknown>> = {
|
|
7
|
+
ALGORITHM: T;
|
|
8
|
+
DISTANCE_METRIC: "L2" | "IP" | "COSINE";
|
|
9
|
+
INITIAL_CAP?: number;
|
|
10
|
+
} & A;
|
|
11
|
+
export type CreateSchemaFlatVectorField = CreateSchemaVectorField<VectorAlgorithms.FLAT, {
|
|
12
|
+
BLOCK_SIZE?: number;
|
|
13
|
+
}>;
|
|
14
|
+
export type CreateSchemaHNSWVectorField = CreateSchemaVectorField<VectorAlgorithms.HNSW, {
|
|
15
|
+
M?: number;
|
|
16
|
+
EF_CONSTRUCTION?: number;
|
|
17
|
+
EF_RUNTIME?: number;
|
|
18
|
+
}>;
|
|
19
|
+
export interface RedisVectorStoreConfig {
|
|
20
|
+
redisClient: ReturnType<typeof createClient> | ReturnType<typeof createCluster>;
|
|
21
|
+
indexName: string;
|
|
22
|
+
indexOptions?: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField;
|
|
23
|
+
keyPrefix?: string;
|
|
24
|
+
contentKey?: string;
|
|
25
|
+
metadataKey?: string;
|
|
26
|
+
vectorKey?: string;
|
|
27
|
+
filter?: RedisVectorStoreFilterType;
|
|
28
|
+
}
|
|
29
|
+
export interface RedisAddOptions {
|
|
30
|
+
keys?: string[];
|
|
31
|
+
batchSize?: number;
|
|
32
|
+
}
|
|
33
|
+
export type RedisVectorStoreFilterType = string[];
|
|
34
|
+
export declare class RedisVectorStore extends VectorStore {
|
|
35
|
+
FilterType: RedisVectorStoreFilterType;
|
|
36
|
+
private redisClient;
|
|
37
|
+
indexName: string;
|
|
38
|
+
indexOptions: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField;
|
|
39
|
+
keyPrefix: string;
|
|
40
|
+
contentKey: string;
|
|
41
|
+
metadataKey: string;
|
|
42
|
+
vectorKey: string;
|
|
43
|
+
filter?: RedisVectorStoreFilterType;
|
|
44
|
+
constructor(embeddings: Embeddings, _dbConfig: RedisVectorStoreConfig);
|
|
45
|
+
addDocuments(documents: Document[], options?: RedisAddOptions): Promise<void>;
|
|
46
|
+
addVectors(vectors: number[][], documents: Document[], { keys, batchSize }?: RedisAddOptions): Promise<void>;
|
|
47
|
+
similaritySearchVectorWithScore(query: number[], k: number, filter?: RedisVectorStoreFilterType): Promise<[Document, number][]>;
|
|
48
|
+
static fromTexts(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig: RedisVectorStoreConfig): Promise<RedisVectorStore>;
|
|
49
|
+
static fromDocuments(docs: Document[], embeddings: Embeddings, dbConfig: RedisVectorStoreConfig): Promise<RedisVectorStore>;
|
|
50
|
+
checkIndexExists(): Promise<boolean>;
|
|
51
|
+
createIndex(dimensions?: number): Promise<void>;
|
|
52
|
+
dropIndex(): Promise<boolean>;
|
|
53
|
+
private buildQuery;
|
|
54
|
+
private prepareFilter;
|
|
55
|
+
/**
|
|
56
|
+
* Escapes all '-' characters.
|
|
57
|
+
* RediSearch considers '-' as a negative operator, hence we need
|
|
58
|
+
* to escape it
|
|
59
|
+
* @see https://redis.io/docs/stack/search/reference/query_syntax
|
|
60
|
+
*
|
|
61
|
+
* @param str
|
|
62
|
+
* @returns
|
|
63
|
+
*/
|
|
64
|
+
private escapeSpecialChars;
|
|
65
|
+
/**
|
|
66
|
+
* Unescapes all '-' characters, returning the original string
|
|
67
|
+
*
|
|
68
|
+
* @param str
|
|
69
|
+
* @returns
|
|
70
|
+
*/
|
|
71
|
+
private unEscapeSpecialChars;
|
|
72
|
+
/**
|
|
73
|
+
* Converts the vector to the buffer Redis needs to
|
|
74
|
+
* correctly store an embedding
|
|
75
|
+
*
|
|
76
|
+
* @param vector
|
|
77
|
+
* @returns Buffer
|
|
78
|
+
*/
|
|
79
|
+
private getFloat32Buffer;
|
|
80
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { SchemaFieldTypes, VectorAlgorithms } from "redis";
|
|
2
|
+
import { VectorStore } from "./base.js";
|
|
3
|
+
import { Document } from "../document.js";
|
|
4
|
+
export class RedisVectorStore extends VectorStore {
|
|
5
|
+
constructor(embeddings, _dbConfig) {
|
|
6
|
+
super(embeddings, _dbConfig);
|
|
7
|
+
Object.defineProperty(this, "redisClient", {
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true,
|
|
11
|
+
value: void 0
|
|
12
|
+
});
|
|
13
|
+
Object.defineProperty(this, "indexName", {
|
|
14
|
+
enumerable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
writable: true,
|
|
17
|
+
value: void 0
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(this, "indexOptions", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
configurable: true,
|
|
22
|
+
writable: true,
|
|
23
|
+
value: void 0
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(this, "keyPrefix", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "contentKey", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: void 0
|
|
36
|
+
});
|
|
37
|
+
Object.defineProperty(this, "metadataKey", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: void 0
|
|
42
|
+
});
|
|
43
|
+
Object.defineProperty(this, "vectorKey", {
|
|
44
|
+
enumerable: true,
|
|
45
|
+
configurable: true,
|
|
46
|
+
writable: true,
|
|
47
|
+
value: void 0
|
|
48
|
+
});
|
|
49
|
+
Object.defineProperty(this, "filter", {
|
|
50
|
+
enumerable: true,
|
|
51
|
+
configurable: true,
|
|
52
|
+
writable: true,
|
|
53
|
+
value: void 0
|
|
54
|
+
});
|
|
55
|
+
this.redisClient = _dbConfig.redisClient;
|
|
56
|
+
this.indexName = _dbConfig.indexName;
|
|
57
|
+
this.indexOptions = _dbConfig.indexOptions ?? {
|
|
58
|
+
ALGORITHM: VectorAlgorithms.HNSW,
|
|
59
|
+
DISTANCE_METRIC: "COSINE",
|
|
60
|
+
};
|
|
61
|
+
this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`;
|
|
62
|
+
this.contentKey = _dbConfig.contentKey ?? "content";
|
|
63
|
+
this.metadataKey = _dbConfig.metadataKey ?? "metadata";
|
|
64
|
+
this.vectorKey = _dbConfig.vectorKey ?? "content_vector";
|
|
65
|
+
this.filter = _dbConfig.filter;
|
|
66
|
+
}
|
|
67
|
+
async addDocuments(documents, options) {
|
|
68
|
+
const texts = documents.map(({ pageContent }) => pageContent);
|
|
69
|
+
await this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
|
|
70
|
+
}
|
|
71
|
+
async addVectors(vectors, documents, { keys, batchSize = 1000 } = {}) {
|
|
72
|
+
// check if the index exists and create it if it doesn't
|
|
73
|
+
await this.createIndex(vectors[0].length);
|
|
74
|
+
const multi = this.redisClient.multi();
|
|
75
|
+
vectors.map(async (vector, idx) => {
|
|
76
|
+
const key = keys && keys.length ? keys[idx] : `${this.keyPrefix}${idx}`;
|
|
77
|
+
const metadata = documents[idx] && documents[idx].metadata
|
|
78
|
+
? documents[idx].metadata
|
|
79
|
+
: {};
|
|
80
|
+
multi.hSet(key, {
|
|
81
|
+
[this.vectorKey]: this.getFloat32Buffer(vector),
|
|
82
|
+
[this.contentKey]: documents[idx].pageContent,
|
|
83
|
+
[this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)),
|
|
84
|
+
});
|
|
85
|
+
// write batch
|
|
86
|
+
if (idx % batchSize === 0) {
|
|
87
|
+
await multi.exec();
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
// insert final batch
|
|
91
|
+
await multi.exec();
|
|
92
|
+
}
|
|
93
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
94
|
+
if (filter && this.filter) {
|
|
95
|
+
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
96
|
+
}
|
|
97
|
+
const _filter = filter ?? this.filter;
|
|
98
|
+
const results = await this.redisClient.ft.search(this.indexName, ...this.buildQuery(query, k, _filter));
|
|
99
|
+
const result = [];
|
|
100
|
+
if (results.total) {
|
|
101
|
+
for (const res of results.documents) {
|
|
102
|
+
if (res.value) {
|
|
103
|
+
const document = res.value;
|
|
104
|
+
if (document.vector_score) {
|
|
105
|
+
result.push([
|
|
106
|
+
new Document({
|
|
107
|
+
pageContent: document[this.contentKey],
|
|
108
|
+
metadata: JSON.parse(this.unEscapeSpecialChars(document.metadata)),
|
|
109
|
+
}),
|
|
110
|
+
Number(document.vector_score),
|
|
111
|
+
]);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
static fromTexts(texts, metadatas, embeddings, dbConfig) {
|
|
119
|
+
const docs = [];
|
|
120
|
+
for (let i = 0; i < texts.length; i += 1) {
|
|
121
|
+
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
|
122
|
+
const newDoc = new Document({
|
|
123
|
+
pageContent: texts[i],
|
|
124
|
+
metadata,
|
|
125
|
+
});
|
|
126
|
+
docs.push(newDoc);
|
|
127
|
+
}
|
|
128
|
+
return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig);
|
|
129
|
+
}
|
|
130
|
+
static async fromDocuments(docs, embeddings, dbConfig) {
|
|
131
|
+
const instance = new this(embeddings, dbConfig);
|
|
132
|
+
await instance.addDocuments(docs);
|
|
133
|
+
return instance;
|
|
134
|
+
}
|
|
135
|
+
async checkIndexExists() {
|
|
136
|
+
try {
|
|
137
|
+
await this.redisClient.ft.info(this.indexName);
|
|
138
|
+
}
|
|
139
|
+
catch (err) {
|
|
140
|
+
// index doesn't exist
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
async createIndex(dimensions = 1536) {
|
|
146
|
+
if (await this.checkIndexExists()) {
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
const schema = {
|
|
150
|
+
[this.vectorKey]: {
|
|
151
|
+
type: SchemaFieldTypes.VECTOR,
|
|
152
|
+
TYPE: "FLOAT32",
|
|
153
|
+
DIM: dimensions,
|
|
154
|
+
...this.indexOptions,
|
|
155
|
+
},
|
|
156
|
+
[this.contentKey]: SchemaFieldTypes.TEXT,
|
|
157
|
+
[this.metadataKey]: SchemaFieldTypes.TEXT,
|
|
158
|
+
};
|
|
159
|
+
await this.redisClient.ft.create(this.indexName, schema, {
|
|
160
|
+
ON: "HASH",
|
|
161
|
+
PREFIX: this.keyPrefix,
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
async dropIndex() {
|
|
165
|
+
try {
|
|
166
|
+
await this.redisClient.ft.dropIndex(this.indexName);
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
catch (err) {
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
buildQuery(query, k, filter) {
|
|
174
|
+
const vectorScoreField = "vector_score";
|
|
175
|
+
let hybridFields = "*";
|
|
176
|
+
// if a filter is set, modify the hybrid query
|
|
177
|
+
if (filter && filter.length) {
|
|
178
|
+
// `filter` is a list of strings, then it's applied using the OR operator in the metadata key
|
|
179
|
+
// for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar'
|
|
180
|
+
hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`;
|
|
181
|
+
}
|
|
182
|
+
const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`;
|
|
183
|
+
const returnFields = [this.metadataKey, this.contentKey, vectorScoreField];
|
|
184
|
+
const options = {
|
|
185
|
+
PARAMS: {
|
|
186
|
+
vector: this.getFloat32Buffer(query),
|
|
187
|
+
},
|
|
188
|
+
RETURN: returnFields,
|
|
189
|
+
SORTBY: vectorScoreField,
|
|
190
|
+
DIALECT: 2,
|
|
191
|
+
LIMIT: {
|
|
192
|
+
from: 0,
|
|
193
|
+
size: k,
|
|
194
|
+
},
|
|
195
|
+
};
|
|
196
|
+
return [baseQuery, options];
|
|
197
|
+
}
|
|
198
|
+
prepareFilter(filter) {
|
|
199
|
+
return filter.map(this.escapeSpecialChars).join("|");
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Escapes all '-' characters.
|
|
203
|
+
* RediSearch considers '-' as a negative operator, hence we need
|
|
204
|
+
* to escape it
|
|
205
|
+
* @see https://redis.io/docs/stack/search/reference/query_syntax
|
|
206
|
+
*
|
|
207
|
+
* @param str
|
|
208
|
+
* @returns
|
|
209
|
+
*/
|
|
210
|
+
escapeSpecialChars(str) {
|
|
211
|
+
return str.replaceAll("-", "\\-");
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Unescapes all '-' characters, returning the original string
|
|
215
|
+
*
|
|
216
|
+
* @param str
|
|
217
|
+
* @returns
|
|
218
|
+
*/
|
|
219
|
+
unEscapeSpecialChars(str) {
|
|
220
|
+
return str.replaceAll("\\-", "-");
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Converts the vector to the buffer Redis needs to
|
|
224
|
+
* correctly store an embedding
|
|
225
|
+
*
|
|
226
|
+
* @param vector
|
|
227
|
+
* @returns Buffer
|
|
228
|
+
*/
|
|
229
|
+
getFloat32Buffer(vector) {
|
|
230
|
+
return Buffer.from(new Float32Array(vector).buffer);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../../dist/document_loaders/web/apify_dataset.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/apify_dataset.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/apify_dataset.js'
|