langchain 0.0.75 → 0.0.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/base_language/count_tokens.cjs +5 -21
- package/dist/base_language/count_tokens.d.ts +1 -6
- package/dist/base_language/count_tokens.js +4 -19
- package/dist/base_language/index.cjs +10 -24
- package/dist/base_language/index.d.ts +3 -2
- package/dist/base_language/index.js +11 -25
- package/dist/cache/redis.d.ts +3 -1
- package/dist/callbacks/base.d.ts +4 -4
- package/dist/callbacks/handlers/initialize.cjs +2 -9
- package/dist/callbacks/handlers/initialize.d.ts +1 -1
- package/dist/callbacks/handlers/initialize.js +2 -9
- package/dist/callbacks/handlers/tracer.cjs +11 -22
- package/dist/callbacks/handlers/tracer.d.ts +2 -2
- package/dist/callbacks/handlers/tracer.js +11 -22
- package/dist/callbacks/handlers/tracer_langchain.cjs +52 -91
- package/dist/callbacks/handlers/tracer_langchain.d.ts +12 -9
- package/dist/callbacks/handlers/tracer_langchain.js +52 -68
- package/dist/callbacks/handlers/tracer_langchain_v1.cjs +5 -1
- package/dist/callbacks/handlers/tracer_langchain_v1.js +5 -1
- package/dist/callbacks/manager.cjs +10 -10
- package/dist/callbacks/manager.d.ts +2 -2
- package/dist/callbacks/manager.js +10 -10
- package/dist/chains/question_answering/map_reduce_prompts.cjs +5 -5
- package/dist/chains/question_answering/map_reduce_prompts.d.ts +1 -1
- package/dist/chains/question_answering/map_reduce_prompts.js +1 -1
- package/dist/chains/question_answering/refine_prompts.cjs +5 -5
- package/dist/chains/question_answering/refine_prompts.d.ts +1 -1
- package/dist/chains/question_answering/refine_prompts.js +1 -1
- package/dist/chains/question_answering/stuff_prompts.cjs +2 -2
- package/dist/chains/question_answering/stuff_prompts.d.ts +1 -1
- package/dist/chains/question_answering/stuff_prompts.js +1 -1
- package/dist/chains/sequential_chain.cjs +2 -2
- package/dist/chains/sequential_chain.d.ts +2 -2
- package/dist/chains/sequential_chain.js +2 -2
- package/dist/chains/sql_db/sql_db_prompt.cjs +20 -1
- package/dist/chains/sql_db/sql_db_prompt.d.ts +1 -0
- package/dist/chains/sql_db/sql_db_prompt.js +19 -0
- package/dist/chat_models/base.cjs +17 -1
- package/dist/chat_models/base.d.ts +6 -0
- package/dist/chat_models/base.js +18 -2
- package/dist/chat_models/openai.cjs +2 -2
- package/dist/chat_models/openai.js +2 -2
- package/dist/client/langchainplus.cjs +90 -29
- package/dist/client/langchainplus.d.ts +7 -5
- package/dist/client/langchainplus.js +90 -29
- package/dist/document_loaders/fs/unstructured.cjs +8 -0
- package/dist/document_loaders/fs/unstructured.d.ts +3 -0
- package/dist/document_loaders/fs/unstructured.js +8 -0
- package/dist/document_loaders/web/apify_dataset.cjs +64 -0
- package/dist/document_loaders/web/apify_dataset.d.ts +28 -0
- package/dist/document_loaders/web/apify_dataset.js +60 -0
- package/dist/embeddings/openai.cjs +2 -2
- package/dist/embeddings/openai.js +2 -2
- package/dist/llms/base.cjs +18 -1
- package/dist/llms/base.d.ts +7 -1
- package/dist/llms/base.js +19 -2
- package/dist/llms/openai-chat.cjs +2 -2
- package/dist/llms/openai-chat.js +2 -2
- package/dist/llms/openai.cjs +2 -2
- package/dist/llms/openai.js +2 -2
- package/dist/memory/base.cjs +9 -1
- package/dist/memory/base.d.ts +1 -0
- package/dist/memory/base.js +7 -0
- package/dist/memory/entity_memory.cjs +151 -0
- package/dist/memory/entity_memory.d.ts +35 -0
- package/dist/memory/entity_memory.js +147 -0
- package/dist/memory/index.cjs +5 -1
- package/dist/memory/index.d.ts +2 -0
- package/dist/memory/index.js +2 -0
- package/dist/memory/prompt.cjs +84 -1
- package/dist/memory/prompt.d.ts +6 -0
- package/dist/memory/prompt.js +83 -0
- package/dist/memory/stores/entity/in_memory.cjs +32 -0
- package/dist/memory/stores/entity/in_memory.d.ts +10 -0
- package/dist/memory/stores/entity/in_memory.js +28 -0
- package/dist/prompts/index.cjs +6 -1
- package/dist/prompts/index.d.ts +1 -0
- package/dist/prompts/index.js +1 -0
- package/dist/{chains/prompt_selector.d.ts → prompts/selectors/conditional.d.ts} +4 -4
- package/dist/schema/index.cjs +13 -1
- package/dist/schema/index.d.ts +17 -0
- package/dist/schema/index.js +11 -0
- package/dist/stores/message/dynamodb.cjs +8 -6
- package/dist/stores/message/dynamodb.js +8 -6
- package/dist/stores/message/redis.cjs +5 -3
- package/dist/stores/message/redis.js +5 -3
- package/dist/stores/message/utils.cjs +30 -15
- package/dist/stores/message/utils.d.ts +4 -2
- package/dist/stores/message/utils.js +28 -14
- package/dist/text_splitter.cjs +3 -23
- package/dist/text_splitter.d.ts +1 -3
- package/dist/text_splitter.js +3 -23
- package/dist/tools/webbrowser.cjs +5 -7
- package/dist/tools/webbrowser.js +3 -5
- package/dist/util/env.cjs +39 -7
- package/dist/util/env.d.ts +19 -0
- package/dist/util/env.js +32 -6
- package/dist/util/sql_utils.cjs +18 -0
- package/dist/util/sql_utils.js +19 -1
- package/dist/util/tiktoken.cjs +26 -0
- package/dist/util/tiktoken.d.ts +9 -0
- package/dist/util/tiktoken.js +21 -0
- package/dist/vectorstores/redis.cjs +236 -0
- package/dist/vectorstores/redis.d.ts +80 -0
- package/dist/vectorstores/redis.js +232 -0
- package/document_loaders/web/apify_dataset.cjs +1 -0
- package/document_loaders/web/apify_dataset.d.ts +1 -0
- package/document_loaders/web/apify_dataset.js +1 -0
- package/package.json +25 -5
- package/vectorstores/redis.cjs +1 -0
- package/vectorstores/redis.d.ts +1 -0
- package/vectorstores/redis.js +1 -0
- /package/dist/{chains/prompt_selector.cjs → prompts/selectors/conditional.cjs} +0 -0
- /package/dist/{chains/prompt_selector.js → prompts/selectors/conditional.js} +0 -0
package/dist/util/env.d.ts
CHANGED
|
@@ -1 +1,20 @@
|
|
|
1
|
+
declare global {
|
|
2
|
+
const Deno: {
|
|
3
|
+
version: {
|
|
4
|
+
deno: string;
|
|
5
|
+
};
|
|
6
|
+
} | undefined;
|
|
7
|
+
}
|
|
8
|
+
export declare const isBrowser: () => boolean;
|
|
9
|
+
export declare const isWebWorker: () => boolean;
|
|
10
|
+
export declare const isJsDom: () => boolean;
|
|
11
|
+
export declare const isDeno: () => boolean;
|
|
12
|
+
export declare const isNode: () => boolean;
|
|
1
13
|
export declare const getEnv: () => string;
|
|
14
|
+
export type RuntimeEnvironment = {
|
|
15
|
+
library: string;
|
|
16
|
+
libraryVersion?: string;
|
|
17
|
+
runtime: string;
|
|
18
|
+
runtimeVersion?: string;
|
|
19
|
+
};
|
|
20
|
+
export declare function getRuntimeEnvironment(): Promise<RuntimeEnvironment>;
|
package/dist/util/env.js
CHANGED
|
@@ -1,19 +1,34 @@
|
|
|
1
|
-
|
|
1
|
+
export const isBrowser = () => typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
2
|
+
export const isWebWorker = () => typeof globalThis === "object" &&
|
|
3
|
+
globalThis.constructor &&
|
|
4
|
+
globalThis.constructor.name === "DedicatedWorkerGlobalScope";
|
|
5
|
+
export const isJsDom = () => (typeof window !== "undefined" && window.name === "nodejs") ||
|
|
6
|
+
(typeof navigator !== "undefined" &&
|
|
7
|
+
(navigator.userAgent.includes("Node.js") ||
|
|
8
|
+
navigator.userAgent.includes("jsdom")));
|
|
9
|
+
// Supabase Edge Function provides a `Deno` global object
|
|
10
|
+
// without `version` property
|
|
11
|
+
export const isDeno = () => typeof Deno !== "undefined";
|
|
12
|
+
// Mark not-as-node if in Supabase Edge Function
|
|
13
|
+
export const isNode = () => typeof process !== "undefined" &&
|
|
14
|
+
typeof process.versions !== "undefined" &&
|
|
15
|
+
typeof process.versions.node !== "undefined" &&
|
|
16
|
+
!isDeno();
|
|
2
17
|
export const getEnv = () => {
|
|
3
18
|
let env;
|
|
4
|
-
if (isBrowser) {
|
|
19
|
+
if (isBrowser()) {
|
|
5
20
|
env = "browser";
|
|
6
21
|
}
|
|
7
|
-
else if (isNode) {
|
|
22
|
+
else if (isNode()) {
|
|
8
23
|
env = "node";
|
|
9
24
|
}
|
|
10
|
-
else if (isWebWorker) {
|
|
25
|
+
else if (isWebWorker()) {
|
|
11
26
|
env = "webworker";
|
|
12
27
|
}
|
|
13
|
-
else if (isJsDom) {
|
|
28
|
+
else if (isJsDom()) {
|
|
14
29
|
env = "jsdom";
|
|
15
30
|
}
|
|
16
|
-
else if (isDeno) {
|
|
31
|
+
else if (isDeno()) {
|
|
17
32
|
env = "deno";
|
|
18
33
|
}
|
|
19
34
|
else {
|
|
@@ -21,3 +36,14 @@ export const getEnv = () => {
|
|
|
21
36
|
}
|
|
22
37
|
return env;
|
|
23
38
|
};
|
|
39
|
+
let runtimeEnvironment;
|
|
40
|
+
export async function getRuntimeEnvironment() {
|
|
41
|
+
if (runtimeEnvironment === undefined) {
|
|
42
|
+
const env = getEnv();
|
|
43
|
+
runtimeEnvironment = {
|
|
44
|
+
library: "langchain-js",
|
|
45
|
+
runtime: env,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
return runtimeEnvironment;
|
|
49
|
+
}
|
package/dist/util/sql_utils.cjs
CHANGED
|
@@ -95,6 +95,18 @@ const getTableAndColumnsName = async (appDataSource) => {
|
|
|
95
95
|
const rep = await appDataSource.query(sql);
|
|
96
96
|
return formatToSqlTable(rep);
|
|
97
97
|
}
|
|
98
|
+
if (appDataSource.options.type === "mssql") {
|
|
99
|
+
sql =
|
|
100
|
+
"SELECT " +
|
|
101
|
+
"TABLE_NAME AS table_name, " +
|
|
102
|
+
"COLUMN_NAME AS column_name, " +
|
|
103
|
+
"DATA_TYPE AS data_type, " +
|
|
104
|
+
"IS_NULLABLE AS is_nullable " +
|
|
105
|
+
"FROM INFORMATION_SCHEMA.COLUMNS " +
|
|
106
|
+
"ORDER BY TABLE_NAME, ORDINAL_POSITION;";
|
|
107
|
+
const rep = await appDataSource.query(sql);
|
|
108
|
+
return formatToSqlTable(rep);
|
|
109
|
+
}
|
|
98
110
|
throw new Error("Database type not implemented yet");
|
|
99
111
|
};
|
|
100
112
|
exports.getTableAndColumnsName = getTableAndColumnsName;
|
|
@@ -137,6 +149,9 @@ const generateTableInfoFromTables = async (tables, appDataSource, nbSampleRow) =
|
|
|
137
149
|
const schema = appDataSource.options?.schema ?? "public";
|
|
138
150
|
sqlSelectInfoQuery = `SELECT * FROM "${schema}"."${currentTable.tableName}" LIMIT ${nbSampleRow};\n`;
|
|
139
151
|
}
|
|
152
|
+
else if (appDataSource.options.type === "mssql") {
|
|
153
|
+
sqlSelectInfoQuery = `SELECT TOP ${nbSampleRow} * FROM [${currentTable.tableName}];\n`;
|
|
154
|
+
}
|
|
140
155
|
else {
|
|
141
156
|
sqlSelectInfoQuery = `SELECT * FROM "${currentTable.tableName}" LIMIT ${nbSampleRow};\n`;
|
|
142
157
|
}
|
|
@@ -170,6 +185,9 @@ const getPromptTemplateFromDataSource = (appDataSource) => {
|
|
|
170
185
|
if (appDataSource.options.type === "mysql") {
|
|
171
186
|
return sql_db_prompt_js_1.SQL_MYSQL_PROMPT;
|
|
172
187
|
}
|
|
188
|
+
if (appDataSource.options.type === "mssql") {
|
|
189
|
+
return sql_db_prompt_js_1.SQL_MSSQL_PROMPT;
|
|
190
|
+
}
|
|
173
191
|
return sql_db_prompt_js_1.DEFAULT_SQL_DATABASE_PROMPT;
|
|
174
192
|
};
|
|
175
193
|
exports.getPromptTemplateFromDataSource = getPromptTemplateFromDataSource;
|
package/dist/util/sql_utils.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DEFAULT_SQL_DATABASE_PROMPT, SQL_MYSQL_PROMPT, SQL_POSTGRES_PROMPT, SQL_SQLITE_PROMPT, } from "../chains/sql_db/sql_db_prompt.js";
|
|
1
|
+
import { DEFAULT_SQL_DATABASE_PROMPT, SQL_MSSQL_PROMPT, SQL_MYSQL_PROMPT, SQL_POSTGRES_PROMPT, SQL_SQLITE_PROMPT, } from "../chains/sql_db/sql_db_prompt.js";
|
|
2
2
|
export const verifyListTablesExistInDatabase = (tablesFromDatabase, listTables, errorPrefixMsg) => {
|
|
3
3
|
const onlyTableNames = tablesFromDatabase.map((table) => table.tableName);
|
|
4
4
|
if (listTables.length > 0) {
|
|
@@ -89,6 +89,18 @@ export const getTableAndColumnsName = async (appDataSource) => {
|
|
|
89
89
|
const rep = await appDataSource.query(sql);
|
|
90
90
|
return formatToSqlTable(rep);
|
|
91
91
|
}
|
|
92
|
+
if (appDataSource.options.type === "mssql") {
|
|
93
|
+
sql =
|
|
94
|
+
"SELECT " +
|
|
95
|
+
"TABLE_NAME AS table_name, " +
|
|
96
|
+
"COLUMN_NAME AS column_name, " +
|
|
97
|
+
"DATA_TYPE AS data_type, " +
|
|
98
|
+
"IS_NULLABLE AS is_nullable " +
|
|
99
|
+
"FROM INFORMATION_SCHEMA.COLUMNS " +
|
|
100
|
+
"ORDER BY TABLE_NAME, ORDINAL_POSITION;";
|
|
101
|
+
const rep = await appDataSource.query(sql);
|
|
102
|
+
return formatToSqlTable(rep);
|
|
103
|
+
}
|
|
92
104
|
throw new Error("Database type not implemented yet");
|
|
93
105
|
};
|
|
94
106
|
const formatSqlResponseToSimpleTableString = (rawResult) => {
|
|
@@ -130,6 +142,9 @@ export const generateTableInfoFromTables = async (tables, appDataSource, nbSampl
|
|
|
130
142
|
const schema = appDataSource.options?.schema ?? "public";
|
|
131
143
|
sqlSelectInfoQuery = `SELECT * FROM "${schema}"."${currentTable.tableName}" LIMIT ${nbSampleRow};\n`;
|
|
132
144
|
}
|
|
145
|
+
else if (appDataSource.options.type === "mssql") {
|
|
146
|
+
sqlSelectInfoQuery = `SELECT TOP ${nbSampleRow} * FROM [${currentTable.tableName}];\n`;
|
|
147
|
+
}
|
|
133
148
|
else {
|
|
134
149
|
sqlSelectInfoQuery = `SELECT * FROM "${currentTable.tableName}" LIMIT ${nbSampleRow};\n`;
|
|
135
150
|
}
|
|
@@ -162,5 +177,8 @@ export const getPromptTemplateFromDataSource = (appDataSource) => {
|
|
|
162
177
|
if (appDataSource.options.type === "mysql") {
|
|
163
178
|
return SQL_MYSQL_PROMPT;
|
|
164
179
|
}
|
|
180
|
+
if (appDataSource.options.type === "mssql") {
|
|
181
|
+
return SQL_MSSQL_PROMPT;
|
|
182
|
+
}
|
|
165
183
|
return DEFAULT_SQL_DATABASE_PROMPT;
|
|
166
184
|
};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.encodingForModel = exports.getEncoding = void 0;
|
|
4
|
+
const lite_1 = require("js-tiktoken/lite");
|
|
5
|
+
const async_caller_js_1 = require("./async_caller.cjs");
|
|
6
|
+
const cache = {};
|
|
7
|
+
const caller = /* #__PURE__ */ new async_caller_js_1.AsyncCaller({});
|
|
8
|
+
async function getEncoding(encoding, options) {
|
|
9
|
+
if (!(encoding in cache)) {
|
|
10
|
+
cache[encoding] = caller
|
|
11
|
+
.fetch(`https://tiktoken.pages.dev/js/${encoding}.json`, {
|
|
12
|
+
signal: options?.signal,
|
|
13
|
+
})
|
|
14
|
+
.then((res) => res.json())
|
|
15
|
+
.catch((e) => {
|
|
16
|
+
delete cache[encoding];
|
|
17
|
+
throw e;
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
return new lite_1.Tiktoken(await cache[encoding], options?.extendedSpecialTokens);
|
|
21
|
+
}
|
|
22
|
+
exports.getEncoding = getEncoding;
|
|
23
|
+
async function encodingForModel(model, options) {
|
|
24
|
+
return getEncoding((0, lite_1.getEncodingNameForModel)(model), options);
|
|
25
|
+
}
|
|
26
|
+
exports.encodingForModel = encodingForModel;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Tiktoken, TiktokenEncoding, TiktokenModel } from "js-tiktoken/lite";
|
|
2
|
+
export declare function getEncoding(encoding: TiktokenEncoding, options?: {
|
|
3
|
+
signal?: AbortSignal;
|
|
4
|
+
extendedSpecialTokens?: Record<string, number>;
|
|
5
|
+
}): Promise<Tiktoken>;
|
|
6
|
+
export declare function encodingForModel(model: TiktokenModel, options?: {
|
|
7
|
+
signal?: AbortSignal;
|
|
8
|
+
extendedSpecialTokens?: Record<string, number>;
|
|
9
|
+
}): Promise<Tiktoken>;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Tiktoken, getEncodingNameForModel, } from "js-tiktoken/lite";
|
|
2
|
+
import { AsyncCaller } from "./async_caller.js";
|
|
3
|
+
const cache = {};
|
|
4
|
+
const caller = /* #__PURE__ */ new AsyncCaller({});
|
|
5
|
+
export async function getEncoding(encoding, options) {
|
|
6
|
+
if (!(encoding in cache)) {
|
|
7
|
+
cache[encoding] = caller
|
|
8
|
+
.fetch(`https://tiktoken.pages.dev/js/${encoding}.json`, {
|
|
9
|
+
signal: options?.signal,
|
|
10
|
+
})
|
|
11
|
+
.then((res) => res.json())
|
|
12
|
+
.catch((e) => {
|
|
13
|
+
delete cache[encoding];
|
|
14
|
+
throw e;
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
return new Tiktoken(await cache[encoding], options?.extendedSpecialTokens);
|
|
18
|
+
}
|
|
19
|
+
export async function encodingForModel(model, options) {
|
|
20
|
+
return getEncoding(getEncodingNameForModel(model), options);
|
|
21
|
+
}
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RedisVectorStore = void 0;
|
|
4
|
+
const redis_1 = require("redis");
|
|
5
|
+
const base_js_1 = require("./base.cjs");
|
|
6
|
+
const document_js_1 = require("../document.cjs");
|
|
7
|
+
class RedisVectorStore extends base_js_1.VectorStore {
|
|
8
|
+
constructor(embeddings, _dbConfig) {
|
|
9
|
+
super(embeddings, _dbConfig);
|
|
10
|
+
Object.defineProperty(this, "redisClient", {
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true,
|
|
14
|
+
value: void 0
|
|
15
|
+
});
|
|
16
|
+
Object.defineProperty(this, "indexName", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: void 0
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "indexOptions", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "keyPrefix", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
Object.defineProperty(this, "contentKey", {
|
|
35
|
+
enumerable: true,
|
|
36
|
+
configurable: true,
|
|
37
|
+
writable: true,
|
|
38
|
+
value: void 0
|
|
39
|
+
});
|
|
40
|
+
Object.defineProperty(this, "metadataKey", {
|
|
41
|
+
enumerable: true,
|
|
42
|
+
configurable: true,
|
|
43
|
+
writable: true,
|
|
44
|
+
value: void 0
|
|
45
|
+
});
|
|
46
|
+
Object.defineProperty(this, "vectorKey", {
|
|
47
|
+
enumerable: true,
|
|
48
|
+
configurable: true,
|
|
49
|
+
writable: true,
|
|
50
|
+
value: void 0
|
|
51
|
+
});
|
|
52
|
+
Object.defineProperty(this, "filter", {
|
|
53
|
+
enumerable: true,
|
|
54
|
+
configurable: true,
|
|
55
|
+
writable: true,
|
|
56
|
+
value: void 0
|
|
57
|
+
});
|
|
58
|
+
this.redisClient = _dbConfig.redisClient;
|
|
59
|
+
this.indexName = _dbConfig.indexName;
|
|
60
|
+
this.indexOptions = _dbConfig.indexOptions ?? {
|
|
61
|
+
ALGORITHM: redis_1.VectorAlgorithms.HNSW,
|
|
62
|
+
DISTANCE_METRIC: "COSINE",
|
|
63
|
+
};
|
|
64
|
+
this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`;
|
|
65
|
+
this.contentKey = _dbConfig.contentKey ?? "content";
|
|
66
|
+
this.metadataKey = _dbConfig.metadataKey ?? "metadata";
|
|
67
|
+
this.vectorKey = _dbConfig.vectorKey ?? "content_vector";
|
|
68
|
+
this.filter = _dbConfig.filter;
|
|
69
|
+
}
|
|
70
|
+
async addDocuments(documents, options) {
|
|
71
|
+
const texts = documents.map(({ pageContent }) => pageContent);
|
|
72
|
+
await this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
|
|
73
|
+
}
|
|
74
|
+
async addVectors(vectors, documents, { keys, batchSize = 1000 } = {}) {
|
|
75
|
+
// check if the index exists and create it if it doesn't
|
|
76
|
+
await this.createIndex(vectors[0].length);
|
|
77
|
+
const multi = this.redisClient.multi();
|
|
78
|
+
vectors.map(async (vector, idx) => {
|
|
79
|
+
const key = keys && keys.length ? keys[idx] : `${this.keyPrefix}${idx}`;
|
|
80
|
+
const metadata = documents[idx] && documents[idx].metadata
|
|
81
|
+
? documents[idx].metadata
|
|
82
|
+
: {};
|
|
83
|
+
multi.hSet(key, {
|
|
84
|
+
[this.vectorKey]: this.getFloat32Buffer(vector),
|
|
85
|
+
[this.contentKey]: documents[idx].pageContent,
|
|
86
|
+
[this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)),
|
|
87
|
+
});
|
|
88
|
+
// write batch
|
|
89
|
+
if (idx % batchSize === 0) {
|
|
90
|
+
await multi.exec();
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
// insert final batch
|
|
94
|
+
await multi.exec();
|
|
95
|
+
}
|
|
96
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
97
|
+
if (filter && this.filter) {
|
|
98
|
+
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
99
|
+
}
|
|
100
|
+
const _filter = filter ?? this.filter;
|
|
101
|
+
const results = await this.redisClient.ft.search(this.indexName, ...this.buildQuery(query, k, _filter));
|
|
102
|
+
const result = [];
|
|
103
|
+
if (results.total) {
|
|
104
|
+
for (const res of results.documents) {
|
|
105
|
+
if (res.value) {
|
|
106
|
+
const document = res.value;
|
|
107
|
+
if (document.vector_score) {
|
|
108
|
+
result.push([
|
|
109
|
+
new document_js_1.Document({
|
|
110
|
+
pageContent: document[this.contentKey],
|
|
111
|
+
metadata: JSON.parse(this.unEscapeSpecialChars(document.metadata)),
|
|
112
|
+
}),
|
|
113
|
+
Number(document.vector_score),
|
|
114
|
+
]);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
static fromTexts(texts, metadatas, embeddings, dbConfig) {
|
|
122
|
+
const docs = [];
|
|
123
|
+
for (let i = 0; i < texts.length; i += 1) {
|
|
124
|
+
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
|
125
|
+
const newDoc = new document_js_1.Document({
|
|
126
|
+
pageContent: texts[i],
|
|
127
|
+
metadata,
|
|
128
|
+
});
|
|
129
|
+
docs.push(newDoc);
|
|
130
|
+
}
|
|
131
|
+
return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig);
|
|
132
|
+
}
|
|
133
|
+
static async fromDocuments(docs, embeddings, dbConfig) {
|
|
134
|
+
const instance = new this(embeddings, dbConfig);
|
|
135
|
+
await instance.addDocuments(docs);
|
|
136
|
+
return instance;
|
|
137
|
+
}
|
|
138
|
+
async checkIndexExists() {
|
|
139
|
+
try {
|
|
140
|
+
await this.redisClient.ft.info(this.indexName);
|
|
141
|
+
}
|
|
142
|
+
catch (err) {
|
|
143
|
+
// index doesn't exist
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
async createIndex(dimensions = 1536) {
|
|
149
|
+
if (await this.checkIndexExists()) {
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
const schema = {
|
|
153
|
+
[this.vectorKey]: {
|
|
154
|
+
type: redis_1.SchemaFieldTypes.VECTOR,
|
|
155
|
+
TYPE: "FLOAT32",
|
|
156
|
+
DIM: dimensions,
|
|
157
|
+
...this.indexOptions,
|
|
158
|
+
},
|
|
159
|
+
[this.contentKey]: redis_1.SchemaFieldTypes.TEXT,
|
|
160
|
+
[this.metadataKey]: redis_1.SchemaFieldTypes.TEXT,
|
|
161
|
+
};
|
|
162
|
+
await this.redisClient.ft.create(this.indexName, schema, {
|
|
163
|
+
ON: "HASH",
|
|
164
|
+
PREFIX: this.keyPrefix,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
async dropIndex() {
|
|
168
|
+
try {
|
|
169
|
+
await this.redisClient.ft.dropIndex(this.indexName);
|
|
170
|
+
return true;
|
|
171
|
+
}
|
|
172
|
+
catch (err) {
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
buildQuery(query, k, filter) {
|
|
177
|
+
const vectorScoreField = "vector_score";
|
|
178
|
+
let hybridFields = "*";
|
|
179
|
+
// if a filter is set, modify the hybrid query
|
|
180
|
+
if (filter && filter.length) {
|
|
181
|
+
// `filter` is a list of strings, then it's applied using the OR operator in the metadata key
|
|
182
|
+
// for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar'
|
|
183
|
+
hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`;
|
|
184
|
+
}
|
|
185
|
+
const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`;
|
|
186
|
+
const returnFields = [this.metadataKey, this.contentKey, vectorScoreField];
|
|
187
|
+
const options = {
|
|
188
|
+
PARAMS: {
|
|
189
|
+
vector: this.getFloat32Buffer(query),
|
|
190
|
+
},
|
|
191
|
+
RETURN: returnFields,
|
|
192
|
+
SORTBY: vectorScoreField,
|
|
193
|
+
DIALECT: 2,
|
|
194
|
+
LIMIT: {
|
|
195
|
+
from: 0,
|
|
196
|
+
size: k,
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
return [baseQuery, options];
|
|
200
|
+
}
|
|
201
|
+
prepareFilter(filter) {
|
|
202
|
+
return filter.map(this.escapeSpecialChars).join("|");
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Escapes all '-' characters.
|
|
206
|
+
* RediSearch considers '-' as a negative operator, hence we need
|
|
207
|
+
* to escape it
|
|
208
|
+
* @see https://redis.io/docs/stack/search/reference/query_syntax
|
|
209
|
+
*
|
|
210
|
+
* @param str
|
|
211
|
+
* @returns
|
|
212
|
+
*/
|
|
213
|
+
escapeSpecialChars(str) {
|
|
214
|
+
return str.replaceAll("-", "\\-");
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Unescapes all '-' characters, returning the original string
|
|
218
|
+
*
|
|
219
|
+
* @param str
|
|
220
|
+
* @returns
|
|
221
|
+
*/
|
|
222
|
+
unEscapeSpecialChars(str) {
|
|
223
|
+
return str.replaceAll("\\-", "-");
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Converts the vector to the buffer Redis needs to
|
|
227
|
+
* correctly store an embedding
|
|
228
|
+
*
|
|
229
|
+
* @param vector
|
|
230
|
+
* @returns Buffer
|
|
231
|
+
*/
|
|
232
|
+
getFloat32Buffer(vector) {
|
|
233
|
+
return Buffer.from(new Float32Array(vector).buffer);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
exports.RedisVectorStore = RedisVectorStore;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type { createCluster, createClient } from "redis";
|
|
2
|
+
import { VectorAlgorithms } from "redis";
|
|
3
|
+
import { Embeddings } from "../embeddings/base.js";
|
|
4
|
+
import { VectorStore } from "./base.js";
|
|
5
|
+
import { Document } from "../document.js";
|
|
6
|
+
export type CreateSchemaVectorField<T extends VectorAlgorithms, A extends Record<string, unknown>> = {
|
|
7
|
+
ALGORITHM: T;
|
|
8
|
+
DISTANCE_METRIC: "L2" | "IP" | "COSINE";
|
|
9
|
+
INITIAL_CAP?: number;
|
|
10
|
+
} & A;
|
|
11
|
+
export type CreateSchemaFlatVectorField = CreateSchemaVectorField<VectorAlgorithms.FLAT, {
|
|
12
|
+
BLOCK_SIZE?: number;
|
|
13
|
+
}>;
|
|
14
|
+
export type CreateSchemaHNSWVectorField = CreateSchemaVectorField<VectorAlgorithms.HNSW, {
|
|
15
|
+
M?: number;
|
|
16
|
+
EF_CONSTRUCTION?: number;
|
|
17
|
+
EF_RUNTIME?: number;
|
|
18
|
+
}>;
|
|
19
|
+
export interface RedisVectorStoreConfig {
|
|
20
|
+
redisClient: ReturnType<typeof createClient> | ReturnType<typeof createCluster>;
|
|
21
|
+
indexName: string;
|
|
22
|
+
indexOptions?: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField;
|
|
23
|
+
keyPrefix?: string;
|
|
24
|
+
contentKey?: string;
|
|
25
|
+
metadataKey?: string;
|
|
26
|
+
vectorKey?: string;
|
|
27
|
+
filter?: RedisVectorStoreFilterType;
|
|
28
|
+
}
|
|
29
|
+
export interface RedisAddOptions {
|
|
30
|
+
keys?: string[];
|
|
31
|
+
batchSize?: number;
|
|
32
|
+
}
|
|
33
|
+
export type RedisVectorStoreFilterType = string[];
|
|
34
|
+
export declare class RedisVectorStore extends VectorStore {
|
|
35
|
+
FilterType: RedisVectorStoreFilterType;
|
|
36
|
+
private redisClient;
|
|
37
|
+
indexName: string;
|
|
38
|
+
indexOptions: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField;
|
|
39
|
+
keyPrefix: string;
|
|
40
|
+
contentKey: string;
|
|
41
|
+
metadataKey: string;
|
|
42
|
+
vectorKey: string;
|
|
43
|
+
filter?: RedisVectorStoreFilterType;
|
|
44
|
+
constructor(embeddings: Embeddings, _dbConfig: RedisVectorStoreConfig);
|
|
45
|
+
addDocuments(documents: Document[], options?: RedisAddOptions): Promise<void>;
|
|
46
|
+
addVectors(vectors: number[][], documents: Document[], { keys, batchSize }?: RedisAddOptions): Promise<void>;
|
|
47
|
+
similaritySearchVectorWithScore(query: number[], k: number, filter?: RedisVectorStoreFilterType): Promise<[Document, number][]>;
|
|
48
|
+
static fromTexts(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig: RedisVectorStoreConfig): Promise<RedisVectorStore>;
|
|
49
|
+
static fromDocuments(docs: Document[], embeddings: Embeddings, dbConfig: RedisVectorStoreConfig): Promise<RedisVectorStore>;
|
|
50
|
+
checkIndexExists(): Promise<boolean>;
|
|
51
|
+
createIndex(dimensions?: number): Promise<void>;
|
|
52
|
+
dropIndex(): Promise<boolean>;
|
|
53
|
+
private buildQuery;
|
|
54
|
+
private prepareFilter;
|
|
55
|
+
/**
|
|
56
|
+
* Escapes all '-' characters.
|
|
57
|
+
* RediSearch considers '-' as a negative operator, hence we need
|
|
58
|
+
* to escape it
|
|
59
|
+
* @see https://redis.io/docs/stack/search/reference/query_syntax
|
|
60
|
+
*
|
|
61
|
+
* @param str
|
|
62
|
+
* @returns
|
|
63
|
+
*/
|
|
64
|
+
private escapeSpecialChars;
|
|
65
|
+
/**
|
|
66
|
+
* Unescapes all '-' characters, returning the original string
|
|
67
|
+
*
|
|
68
|
+
* @param str
|
|
69
|
+
* @returns
|
|
70
|
+
*/
|
|
71
|
+
private unEscapeSpecialChars;
|
|
72
|
+
/**
|
|
73
|
+
* Converts the vector to the buffer Redis needs to
|
|
74
|
+
* correctly store an embedding
|
|
75
|
+
*
|
|
76
|
+
* @param vector
|
|
77
|
+
* @returns Buffer
|
|
78
|
+
*/
|
|
79
|
+
private getFloat32Buffer;
|
|
80
|
+
}
|