langchain 0.0.74 → 0.0.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/client.cjs +1 -0
  2. package/client.d.ts +1 -0
  3. package/client.js +1 -0
  4. package/dist/base_language/count_tokens.cjs +5 -21
  5. package/dist/base_language/count_tokens.d.ts +1 -6
  6. package/dist/base_language/count_tokens.js +4 -19
  7. package/dist/base_language/index.cjs +16 -24
  8. package/dist/base_language/index.d.ts +22 -3
  9. package/dist/base_language/index.js +17 -25
  10. package/dist/cache/redis.d.ts +3 -1
  11. package/dist/callbacks/base.d.ts +17 -3
  12. package/dist/callbacks/handlers/console.cjs +15 -28
  13. package/dist/callbacks/handlers/console.d.ts +11 -20
  14. package/dist/callbacks/handlers/console.js +14 -27
  15. package/dist/callbacks/handlers/initialize.cjs +8 -3
  16. package/dist/callbacks/handlers/initialize.d.ts +4 -2
  17. package/dist/callbacks/handlers/initialize.js +6 -2
  18. package/dist/callbacks/handlers/tracer.cjs +193 -0
  19. package/dist/callbacks/handlers/tracer.d.ts +65 -0
  20. package/dist/callbacks/handlers/tracer.js +189 -0
  21. package/dist/callbacks/handlers/tracer_langchain.cjs +163 -0
  22. package/dist/callbacks/handlers/tracer_langchain.d.ts +39 -0
  23. package/dist/callbacks/handlers/tracer_langchain.js +159 -0
  24. package/dist/callbacks/handlers/tracer_langchain_v1.cjs +202 -0
  25. package/dist/callbacks/handlers/tracer_langchain_v1.d.ts +57 -0
  26. package/dist/callbacks/handlers/tracer_langchain_v1.js +198 -0
  27. package/dist/callbacks/index.cjs +10 -5
  28. package/dist/callbacks/index.d.ts +5 -3
  29. package/dist/callbacks/index.js +5 -3
  30. package/dist/callbacks/manager.cjs +39 -9
  31. package/dist/callbacks/manager.d.ts +5 -2
  32. package/dist/callbacks/manager.js +40 -10
  33. package/dist/chains/llm_chain.cjs +17 -5
  34. package/dist/chains/llm_chain.d.ts +9 -3
  35. package/dist/chains/llm_chain.js +17 -5
  36. package/dist/chains/question_answering/map_reduce_prompts.cjs +5 -5
  37. package/dist/chains/question_answering/map_reduce_prompts.d.ts +1 -1
  38. package/dist/chains/question_answering/map_reduce_prompts.js +1 -1
  39. package/dist/chains/question_answering/refine_prompts.cjs +5 -5
  40. package/dist/chains/question_answering/refine_prompts.d.ts +1 -1
  41. package/dist/chains/question_answering/refine_prompts.js +1 -1
  42. package/dist/chains/question_answering/stuff_prompts.cjs +2 -2
  43. package/dist/chains/question_answering/stuff_prompts.d.ts +1 -1
  44. package/dist/chains/question_answering/stuff_prompts.js +1 -1
  45. package/dist/chains/sequential_chain.cjs +2 -2
  46. package/dist/chains/sequential_chain.d.ts +2 -2
  47. package/dist/chains/sequential_chain.js +2 -2
  48. package/dist/chains/sql_db/sql_db_prompt.cjs +20 -1
  49. package/dist/chains/sql_db/sql_db_prompt.d.ts +1 -0
  50. package/dist/chains/sql_db/sql_db_prompt.js +19 -0
  51. package/dist/chains/summarization/load.cjs +14 -5
  52. package/dist/chains/summarization/load.d.ts +7 -2
  53. package/dist/chains/summarization/load.js +14 -5
  54. package/dist/chat_models/anthropic.cjs +36 -9
  55. package/dist/chat_models/anthropic.d.ts +6 -3
  56. package/dist/chat_models/anthropic.js +36 -9
  57. package/dist/chat_models/base.cjs +40 -13
  58. package/dist/chat_models/base.d.ts +14 -7
  59. package/dist/chat_models/base.js +41 -14
  60. package/dist/chat_models/openai.cjs +20 -14
  61. package/dist/chat_models/openai.d.ts +2 -1
  62. package/dist/chat_models/openai.js +20 -14
  63. package/dist/client/index.cjs +5 -0
  64. package/dist/client/index.d.ts +1 -0
  65. package/dist/client/index.js +1 -0
  66. package/dist/client/langchainplus.cjs +405 -0
  67. package/dist/client/langchainplus.d.ts +65 -0
  68. package/dist/client/langchainplus.js +398 -0
  69. package/dist/document_loaders/fs/unstructured.cjs +8 -0
  70. package/dist/document_loaders/fs/unstructured.d.ts +3 -0
  71. package/dist/document_loaders/fs/unstructured.js +8 -0
  72. package/dist/document_loaders/web/apify_dataset.cjs +64 -0
  73. package/dist/document_loaders/web/apify_dataset.d.ts +28 -0
  74. package/dist/document_loaders/web/apify_dataset.js +60 -0
  75. package/dist/embeddings/openai.cjs +2 -2
  76. package/dist/embeddings/openai.js +2 -2
  77. package/dist/llms/base.cjs +43 -17
  78. package/dist/llms/base.d.ts +16 -9
  79. package/dist/llms/base.js +44 -18
  80. package/dist/llms/cohere.cjs +3 -2
  81. package/dist/llms/cohere.d.ts +1 -1
  82. package/dist/llms/cohere.js +3 -2
  83. package/dist/llms/hf.cjs +2 -2
  84. package/dist/llms/hf.d.ts +1 -1
  85. package/dist/llms/hf.js +2 -2
  86. package/dist/llms/openai-chat.cjs +13 -14
  87. package/dist/llms/openai-chat.d.ts +2 -1
  88. package/dist/llms/openai-chat.js +13 -14
  89. package/dist/llms/openai.cjs +15 -13
  90. package/dist/llms/openai.d.ts +4 -3
  91. package/dist/llms/openai.js +15 -13
  92. package/dist/llms/replicate.cjs +2 -2
  93. package/dist/llms/replicate.d.ts +1 -1
  94. package/dist/llms/replicate.js +2 -2
  95. package/dist/memory/base.cjs +9 -1
  96. package/dist/memory/base.d.ts +1 -0
  97. package/dist/memory/base.js +7 -0
  98. package/dist/memory/entity_memory.cjs +151 -0
  99. package/dist/memory/entity_memory.d.ts +35 -0
  100. package/dist/memory/entity_memory.js +147 -0
  101. package/dist/memory/index.cjs +5 -1
  102. package/dist/memory/index.d.ts +2 -0
  103. package/dist/memory/index.js +2 -0
  104. package/dist/memory/prompt.cjs +84 -1
  105. package/dist/memory/prompt.d.ts +6 -0
  106. package/dist/memory/prompt.js +83 -0
  107. package/dist/memory/stores/entity/in_memory.cjs +32 -0
  108. package/dist/memory/stores/entity/in_memory.d.ts +10 -0
  109. package/dist/memory/stores/entity/in_memory.js +28 -0
  110. package/dist/prompts/index.cjs +6 -1
  111. package/dist/prompts/index.d.ts +1 -0
  112. package/dist/prompts/index.js +1 -0
  113. package/dist/{chains/prompt_selector.d.ts → prompts/selectors/conditional.d.ts} +4 -4
  114. package/dist/retrievers/document_compressors/chain_extract.cjs +9 -11
  115. package/dist/retrievers/document_compressors/chain_extract.js +9 -11
  116. package/dist/schema/index.cjs +13 -1
  117. package/dist/schema/index.d.ts +19 -0
  118. package/dist/schema/index.js +11 -0
  119. package/dist/stores/message/dynamodb.cjs +8 -6
  120. package/dist/stores/message/dynamodb.js +8 -6
  121. package/dist/stores/message/redis.cjs +69 -0
  122. package/dist/stores/message/redis.d.ts +18 -0
  123. package/dist/stores/message/redis.js +65 -0
  124. package/dist/stores/message/utils.cjs +30 -15
  125. package/dist/stores/message/utils.d.ts +4 -2
  126. package/dist/stores/message/utils.js +28 -14
  127. package/dist/text_splitter.cjs +3 -23
  128. package/dist/text_splitter.d.ts +1 -3
  129. package/dist/text_splitter.js +3 -23
  130. package/dist/tools/webbrowser.cjs +5 -7
  131. package/dist/tools/webbrowser.js +3 -5
  132. package/dist/types/openai-types.d.ts +3 -2
  133. package/dist/util/async_caller.cjs +16 -0
  134. package/dist/util/async_caller.d.ts +4 -0
  135. package/dist/util/async_caller.js +16 -0
  136. package/dist/util/axios-fetch-adapter.cjs +6 -0
  137. package/dist/util/axios-fetch-adapter.js +6 -0
  138. package/dist/util/env.cjs +39 -7
  139. package/dist/util/env.d.ts +19 -0
  140. package/dist/util/env.js +32 -6
  141. package/dist/util/sql_utils.cjs +18 -0
  142. package/dist/util/sql_utils.js +19 -1
  143. package/dist/util/tiktoken.cjs +26 -0
  144. package/dist/util/tiktoken.d.ts +9 -0
  145. package/dist/util/tiktoken.js +21 -0
  146. package/dist/vectorstores/redis.cjs +236 -0
  147. package/dist/vectorstores/redis.d.ts +80 -0
  148. package/dist/vectorstores/redis.js +232 -0
  149. package/document_loaders/web/apify_dataset.cjs +1 -0
  150. package/document_loaders/web/apify_dataset.d.ts +1 -0
  151. package/document_loaders/web/apify_dataset.js +1 -0
  152. package/package.json +41 -5
  153. package/stores/message/redis.cjs +1 -0
  154. package/stores/message/redis.d.ts +1 -0
  155. package/stores/message/redis.js +1 -0
  156. package/vectorstores/redis.cjs +1 -0
  157. package/vectorstores/redis.d.ts +1 -0
  158. package/vectorstores/redis.js +1 -0
  159. package/dist/callbacks/handlers/tracers.cjs +0 -341
  160. package/dist/callbacks/handlers/tracers.d.ts +0 -100
  161. package/dist/callbacks/handlers/tracers.js +0 -336
  162. /package/dist/{chains/prompt_selector.cjs → prompts/selectors/conditional.cjs} +0 -0
  163. /package/dist/{chains/prompt_selector.js → prompts/selectors/conditional.js} +0 -0
@@ -0,0 +1,21 @@
1
+ import { Tiktoken, getEncodingNameForModel, } from "js-tiktoken/lite";
2
+ import { AsyncCaller } from "./async_caller.js";
3
+ const cache = {};
4
+ const caller = /* #__PURE__ */ new AsyncCaller({});
5
+ export async function getEncoding(encoding, options) {
6
+ if (!(encoding in cache)) {
7
+ cache[encoding] = caller
8
+ .fetch(`https://tiktoken.pages.dev/js/${encoding}.json`, {
9
+ signal: options?.signal,
10
+ })
11
+ .then((res) => res.json())
12
+ .catch((e) => {
13
+ delete cache[encoding];
14
+ throw e;
15
+ });
16
+ }
17
+ return new Tiktoken(await cache[encoding], options?.extendedSpecialTokens);
18
+ }
19
+ export async function encodingForModel(model, options) {
20
+ return getEncoding(getEncodingNameForModel(model), options);
21
+ }
@@ -0,0 +1,236 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.RedisVectorStore = void 0;
4
+ const redis_1 = require("redis");
5
+ const base_js_1 = require("./base.cjs");
6
+ const document_js_1 = require("../document.cjs");
7
+ class RedisVectorStore extends base_js_1.VectorStore {
8
+ constructor(embeddings, _dbConfig) {
9
+ super(embeddings, _dbConfig);
10
+ Object.defineProperty(this, "redisClient", {
11
+ enumerable: true,
12
+ configurable: true,
13
+ writable: true,
14
+ value: void 0
15
+ });
16
+ Object.defineProperty(this, "indexName", {
17
+ enumerable: true,
18
+ configurable: true,
19
+ writable: true,
20
+ value: void 0
21
+ });
22
+ Object.defineProperty(this, "indexOptions", {
23
+ enumerable: true,
24
+ configurable: true,
25
+ writable: true,
26
+ value: void 0
27
+ });
28
+ Object.defineProperty(this, "keyPrefix", {
29
+ enumerable: true,
30
+ configurable: true,
31
+ writable: true,
32
+ value: void 0
33
+ });
34
+ Object.defineProperty(this, "contentKey", {
35
+ enumerable: true,
36
+ configurable: true,
37
+ writable: true,
38
+ value: void 0
39
+ });
40
+ Object.defineProperty(this, "metadataKey", {
41
+ enumerable: true,
42
+ configurable: true,
43
+ writable: true,
44
+ value: void 0
45
+ });
46
+ Object.defineProperty(this, "vectorKey", {
47
+ enumerable: true,
48
+ configurable: true,
49
+ writable: true,
50
+ value: void 0
51
+ });
52
+ Object.defineProperty(this, "filter", {
53
+ enumerable: true,
54
+ configurable: true,
55
+ writable: true,
56
+ value: void 0
57
+ });
58
+ this.redisClient = _dbConfig.redisClient;
59
+ this.indexName = _dbConfig.indexName;
60
+ this.indexOptions = _dbConfig.indexOptions ?? {
61
+ ALGORITHM: redis_1.VectorAlgorithms.HNSW,
62
+ DISTANCE_METRIC: "COSINE",
63
+ };
64
+ this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`;
65
+ this.contentKey = _dbConfig.contentKey ?? "content";
66
+ this.metadataKey = _dbConfig.metadataKey ?? "metadata";
67
+ this.vectorKey = _dbConfig.vectorKey ?? "content_vector";
68
+ this.filter = _dbConfig.filter;
69
+ }
70
+ async addDocuments(documents, options) {
71
+ const texts = documents.map(({ pageContent }) => pageContent);
72
+ await this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
73
+ }
74
+ async addVectors(vectors, documents, { keys, batchSize = 1000 } = {}) {
75
+ // check if the index exists and create it if it doesn't
76
+ await this.createIndex(vectors[0].length);
77
+ const multi = this.redisClient.multi();
78
+ vectors.map(async (vector, idx) => {
79
+ const key = keys && keys.length ? keys[idx] : `${this.keyPrefix}${idx}`;
80
+ const metadata = documents[idx] && documents[idx].metadata
81
+ ? documents[idx].metadata
82
+ : {};
83
+ multi.hSet(key, {
84
+ [this.vectorKey]: this.getFloat32Buffer(vector),
85
+ [this.contentKey]: documents[idx].pageContent,
86
+ [this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)),
87
+ });
88
+ // write batch
89
+ if (idx % batchSize === 0) {
90
+ await multi.exec();
91
+ }
92
+ });
93
+ // insert final batch
94
+ await multi.exec();
95
+ }
96
+ async similaritySearchVectorWithScore(query, k, filter) {
97
+ if (filter && this.filter) {
98
+ throw new Error("cannot provide both `filter` and `this.filter`");
99
+ }
100
+ const _filter = filter ?? this.filter;
101
+ const results = await this.redisClient.ft.search(this.indexName, ...this.buildQuery(query, k, _filter));
102
+ const result = [];
103
+ if (results.total) {
104
+ for (const res of results.documents) {
105
+ if (res.value) {
106
+ const document = res.value;
107
+ if (document.vector_score) {
108
+ result.push([
109
+ new document_js_1.Document({
110
+ pageContent: document[this.contentKey],
111
+ metadata: JSON.parse(this.unEscapeSpecialChars(document.metadata)),
112
+ }),
113
+ Number(document.vector_score),
114
+ ]);
115
+ }
116
+ }
117
+ }
118
+ }
119
+ return result;
120
+ }
121
+ static fromTexts(texts, metadatas, embeddings, dbConfig) {
122
+ const docs = [];
123
+ for (let i = 0; i < texts.length; i += 1) {
124
+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
125
+ const newDoc = new document_js_1.Document({
126
+ pageContent: texts[i],
127
+ metadata,
128
+ });
129
+ docs.push(newDoc);
130
+ }
131
+ return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig);
132
+ }
133
+ static async fromDocuments(docs, embeddings, dbConfig) {
134
+ const instance = new this(embeddings, dbConfig);
135
+ await instance.addDocuments(docs);
136
+ return instance;
137
+ }
138
+ async checkIndexExists() {
139
+ try {
140
+ await this.redisClient.ft.info(this.indexName);
141
+ }
142
+ catch (err) {
143
+ // index doesn't exist
144
+ return false;
145
+ }
146
+ return true;
147
+ }
148
+ async createIndex(dimensions = 1536) {
149
+ if (await this.checkIndexExists()) {
150
+ return;
151
+ }
152
+ const schema = {
153
+ [this.vectorKey]: {
154
+ type: redis_1.SchemaFieldTypes.VECTOR,
155
+ TYPE: "FLOAT32",
156
+ DIM: dimensions,
157
+ ...this.indexOptions,
158
+ },
159
+ [this.contentKey]: redis_1.SchemaFieldTypes.TEXT,
160
+ [this.metadataKey]: redis_1.SchemaFieldTypes.TEXT,
161
+ };
162
+ await this.redisClient.ft.create(this.indexName, schema, {
163
+ ON: "HASH",
164
+ PREFIX: this.keyPrefix,
165
+ });
166
+ }
167
+ async dropIndex() {
168
+ try {
169
+ await this.redisClient.ft.dropIndex(this.indexName);
170
+ return true;
171
+ }
172
+ catch (err) {
173
+ return false;
174
+ }
175
+ }
176
+ buildQuery(query, k, filter) {
177
+ const vectorScoreField = "vector_score";
178
+ let hybridFields = "*";
179
+ // if a filter is set, modify the hybrid query
180
+ if (filter && filter.length) {
181
+ // `filter` is a list of strings, then it's applied using the OR operator in the metadata key
182
+ // for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar'
183
+ hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`;
184
+ }
185
+ const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`;
186
+ const returnFields = [this.metadataKey, this.contentKey, vectorScoreField];
187
+ const options = {
188
+ PARAMS: {
189
+ vector: this.getFloat32Buffer(query),
190
+ },
191
+ RETURN: returnFields,
192
+ SORTBY: vectorScoreField,
193
+ DIALECT: 2,
194
+ LIMIT: {
195
+ from: 0,
196
+ size: k,
197
+ },
198
+ };
199
+ return [baseQuery, options];
200
+ }
201
+ prepareFilter(filter) {
202
+ return filter.map(this.escapeSpecialChars).join("|");
203
+ }
204
+ /**
205
+ * Escapes all '-' characters.
206
+ * RediSearch considers '-' as a negative operator, hence we need
207
+ * to escape it
208
+ * @see https://redis.io/docs/stack/search/reference/query_syntax
209
+ *
210
+ * @param str
211
+ * @returns
212
+ */
213
+ escapeSpecialChars(str) {
214
+ return str.replaceAll("-", "\\-");
215
+ }
216
+ /**
217
+ * Unescapes all '-' characters, returning the original string
218
+ *
219
+ * @param str
220
+ * @returns
221
+ */
222
+ unEscapeSpecialChars(str) {
223
+ return str.replaceAll("\\-", "-");
224
+ }
225
+ /**
226
+ * Converts the vector to the buffer Redis needs to
227
+ * correctly store an embedding
228
+ *
229
+ * @param vector
230
+ * @returns Buffer
231
+ */
232
+ getFloat32Buffer(vector) {
233
+ return Buffer.from(new Float32Array(vector).buffer);
234
+ }
235
+ }
236
+ exports.RedisVectorStore = RedisVectorStore;
@@ -0,0 +1,80 @@
1
+ import type { createCluster, createClient } from "redis";
2
+ import { VectorAlgorithms } from "redis";
3
+ import { Embeddings } from "../embeddings/base.js";
4
+ import { VectorStore } from "./base.js";
5
+ import { Document } from "../document.js";
6
+ export type CreateSchemaVectorField<T extends VectorAlgorithms, A extends Record<string, unknown>> = {
7
+ ALGORITHM: T;
8
+ DISTANCE_METRIC: "L2" | "IP" | "COSINE";
9
+ INITIAL_CAP?: number;
10
+ } & A;
11
+ export type CreateSchemaFlatVectorField = CreateSchemaVectorField<VectorAlgorithms.FLAT, {
12
+ BLOCK_SIZE?: number;
13
+ }>;
14
+ export type CreateSchemaHNSWVectorField = CreateSchemaVectorField<VectorAlgorithms.HNSW, {
15
+ M?: number;
16
+ EF_CONSTRUCTION?: number;
17
+ EF_RUNTIME?: number;
18
+ }>;
19
+ export interface RedisVectorStoreConfig {
20
+ redisClient: ReturnType<typeof createClient> | ReturnType<typeof createCluster>;
21
+ indexName: string;
22
+ indexOptions?: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField;
23
+ keyPrefix?: string;
24
+ contentKey?: string;
25
+ metadataKey?: string;
26
+ vectorKey?: string;
27
+ filter?: RedisVectorStoreFilterType;
28
+ }
29
+ export interface RedisAddOptions {
30
+ keys?: string[];
31
+ batchSize?: number;
32
+ }
33
+ export type RedisVectorStoreFilterType = string[];
34
+ export declare class RedisVectorStore extends VectorStore {
35
+ FilterType: RedisVectorStoreFilterType;
36
+ private redisClient;
37
+ indexName: string;
38
+ indexOptions: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField;
39
+ keyPrefix: string;
40
+ contentKey: string;
41
+ metadataKey: string;
42
+ vectorKey: string;
43
+ filter?: RedisVectorStoreFilterType;
44
+ constructor(embeddings: Embeddings, _dbConfig: RedisVectorStoreConfig);
45
+ addDocuments(documents: Document[], options?: RedisAddOptions): Promise<void>;
46
+ addVectors(vectors: number[][], documents: Document[], { keys, batchSize }?: RedisAddOptions): Promise<void>;
47
+ similaritySearchVectorWithScore(query: number[], k: number, filter?: RedisVectorStoreFilterType): Promise<[Document, number][]>;
48
+ static fromTexts(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig: RedisVectorStoreConfig): Promise<RedisVectorStore>;
49
+ static fromDocuments(docs: Document[], embeddings: Embeddings, dbConfig: RedisVectorStoreConfig): Promise<RedisVectorStore>;
50
+ checkIndexExists(): Promise<boolean>;
51
+ createIndex(dimensions?: number): Promise<void>;
52
+ dropIndex(): Promise<boolean>;
53
+ private buildQuery;
54
+ private prepareFilter;
55
+ /**
56
+ * Escapes all '-' characters.
57
+ * RediSearch considers '-' as a negative operator, hence we need
58
+ * to escape it
59
+ * @see https://redis.io/docs/stack/search/reference/query_syntax
60
+ *
61
+ * @param str
62
+ * @returns
63
+ */
64
+ private escapeSpecialChars;
65
+ /**
66
+ * Unescapes all '-' characters, returning the original string
67
+ *
68
+ * @param str
69
+ * @returns
70
+ */
71
+ private unEscapeSpecialChars;
72
+ /**
73
+ * Converts the vector to the buffer Redis needs to
74
+ * correctly store an embedding
75
+ *
76
+ * @param vector
77
+ * @returns Buffer
78
+ */
79
+ private getFloat32Buffer;
80
+ }
@@ -0,0 +1,232 @@
1
+ import { SchemaFieldTypes, VectorAlgorithms } from "redis";
2
+ import { VectorStore } from "./base.js";
3
+ import { Document } from "../document.js";
4
+ export class RedisVectorStore extends VectorStore {
5
+ constructor(embeddings, _dbConfig) {
6
+ super(embeddings, _dbConfig);
7
+ Object.defineProperty(this, "redisClient", {
8
+ enumerable: true,
9
+ configurable: true,
10
+ writable: true,
11
+ value: void 0
12
+ });
13
+ Object.defineProperty(this, "indexName", {
14
+ enumerable: true,
15
+ configurable: true,
16
+ writable: true,
17
+ value: void 0
18
+ });
19
+ Object.defineProperty(this, "indexOptions", {
20
+ enumerable: true,
21
+ configurable: true,
22
+ writable: true,
23
+ value: void 0
24
+ });
25
+ Object.defineProperty(this, "keyPrefix", {
26
+ enumerable: true,
27
+ configurable: true,
28
+ writable: true,
29
+ value: void 0
30
+ });
31
+ Object.defineProperty(this, "contentKey", {
32
+ enumerable: true,
33
+ configurable: true,
34
+ writable: true,
35
+ value: void 0
36
+ });
37
+ Object.defineProperty(this, "metadataKey", {
38
+ enumerable: true,
39
+ configurable: true,
40
+ writable: true,
41
+ value: void 0
42
+ });
43
+ Object.defineProperty(this, "vectorKey", {
44
+ enumerable: true,
45
+ configurable: true,
46
+ writable: true,
47
+ value: void 0
48
+ });
49
+ Object.defineProperty(this, "filter", {
50
+ enumerable: true,
51
+ configurable: true,
52
+ writable: true,
53
+ value: void 0
54
+ });
55
+ this.redisClient = _dbConfig.redisClient;
56
+ this.indexName = _dbConfig.indexName;
57
+ this.indexOptions = _dbConfig.indexOptions ?? {
58
+ ALGORITHM: VectorAlgorithms.HNSW,
59
+ DISTANCE_METRIC: "COSINE",
60
+ };
61
+ this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`;
62
+ this.contentKey = _dbConfig.contentKey ?? "content";
63
+ this.metadataKey = _dbConfig.metadataKey ?? "metadata";
64
+ this.vectorKey = _dbConfig.vectorKey ?? "content_vector";
65
+ this.filter = _dbConfig.filter;
66
+ }
67
+ async addDocuments(documents, options) {
68
+ const texts = documents.map(({ pageContent }) => pageContent);
69
+ await this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
70
+ }
71
+ async addVectors(vectors, documents, { keys, batchSize = 1000 } = {}) {
72
+ // check if the index exists and create it if it doesn't
73
+ await this.createIndex(vectors[0].length);
74
+ const multi = this.redisClient.multi();
75
+ vectors.map(async (vector, idx) => {
76
+ const key = keys && keys.length ? keys[idx] : `${this.keyPrefix}${idx}`;
77
+ const metadata = documents[idx] && documents[idx].metadata
78
+ ? documents[idx].metadata
79
+ : {};
80
+ multi.hSet(key, {
81
+ [this.vectorKey]: this.getFloat32Buffer(vector),
82
+ [this.contentKey]: documents[idx].pageContent,
83
+ [this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)),
84
+ });
85
+ // write batch
86
+ if (idx % batchSize === 0) {
87
+ await multi.exec();
88
+ }
89
+ });
90
+ // insert final batch
91
+ await multi.exec();
92
+ }
93
+ async similaritySearchVectorWithScore(query, k, filter) {
94
+ if (filter && this.filter) {
95
+ throw new Error("cannot provide both `filter` and `this.filter`");
96
+ }
97
+ const _filter = filter ?? this.filter;
98
+ const results = await this.redisClient.ft.search(this.indexName, ...this.buildQuery(query, k, _filter));
99
+ const result = [];
100
+ if (results.total) {
101
+ for (const res of results.documents) {
102
+ if (res.value) {
103
+ const document = res.value;
104
+ if (document.vector_score) {
105
+ result.push([
106
+ new Document({
107
+ pageContent: document[this.contentKey],
108
+ metadata: JSON.parse(this.unEscapeSpecialChars(document.metadata)),
109
+ }),
110
+ Number(document.vector_score),
111
+ ]);
112
+ }
113
+ }
114
+ }
115
+ }
116
+ return result;
117
+ }
118
+ static fromTexts(texts, metadatas, embeddings, dbConfig) {
119
+ const docs = [];
120
+ for (let i = 0; i < texts.length; i += 1) {
121
+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
122
+ const newDoc = new Document({
123
+ pageContent: texts[i],
124
+ metadata,
125
+ });
126
+ docs.push(newDoc);
127
+ }
128
+ return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig);
129
+ }
130
+ static async fromDocuments(docs, embeddings, dbConfig) {
131
+ const instance = new this(embeddings, dbConfig);
132
+ await instance.addDocuments(docs);
133
+ return instance;
134
+ }
135
+ async checkIndexExists() {
136
+ try {
137
+ await this.redisClient.ft.info(this.indexName);
138
+ }
139
+ catch (err) {
140
+ // index doesn't exist
141
+ return false;
142
+ }
143
+ return true;
144
+ }
145
+ async createIndex(dimensions = 1536) {
146
+ if (await this.checkIndexExists()) {
147
+ return;
148
+ }
149
+ const schema = {
150
+ [this.vectorKey]: {
151
+ type: SchemaFieldTypes.VECTOR,
152
+ TYPE: "FLOAT32",
153
+ DIM: dimensions,
154
+ ...this.indexOptions,
155
+ },
156
+ [this.contentKey]: SchemaFieldTypes.TEXT,
157
+ [this.metadataKey]: SchemaFieldTypes.TEXT,
158
+ };
159
+ await this.redisClient.ft.create(this.indexName, schema, {
160
+ ON: "HASH",
161
+ PREFIX: this.keyPrefix,
162
+ });
163
+ }
164
+ async dropIndex() {
165
+ try {
166
+ await this.redisClient.ft.dropIndex(this.indexName);
167
+ return true;
168
+ }
169
+ catch (err) {
170
+ return false;
171
+ }
172
+ }
173
+ buildQuery(query, k, filter) {
174
+ const vectorScoreField = "vector_score";
175
+ let hybridFields = "*";
176
+ // if a filter is set, modify the hybrid query
177
+ if (filter && filter.length) {
178
+ // `filter` is a list of strings, then it's applied using the OR operator in the metadata key
179
+ // for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar'
180
+ hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`;
181
+ }
182
+ const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`;
183
+ const returnFields = [this.metadataKey, this.contentKey, vectorScoreField];
184
+ const options = {
185
+ PARAMS: {
186
+ vector: this.getFloat32Buffer(query),
187
+ },
188
+ RETURN: returnFields,
189
+ SORTBY: vectorScoreField,
190
+ DIALECT: 2,
191
+ LIMIT: {
192
+ from: 0,
193
+ size: k,
194
+ },
195
+ };
196
+ return [baseQuery, options];
197
+ }
198
+ prepareFilter(filter) {
199
+ return filter.map(this.escapeSpecialChars).join("|");
200
+ }
201
+ /**
202
+ * Escapes all '-' characters.
203
+ * RediSearch considers '-' as a negative operator, hence we need
204
+ * to escape it
205
+ * @see https://redis.io/docs/stack/search/reference/query_syntax
206
+ *
207
+ * @param str
208
+ * @returns
209
+ */
210
+ escapeSpecialChars(str) {
211
+ return str.replaceAll("-", "\\-");
212
+ }
213
+ /**
214
+ * Unescapes all '-' characters, returning the original string
215
+ *
216
+ * @param str
217
+ * @returns
218
+ */
219
+ unEscapeSpecialChars(str) {
220
+ return str.replaceAll("\\-", "-");
221
+ }
222
+ /**
223
+ * Converts the vector to the buffer Redis needs to
224
+ * correctly store an embedding
225
+ *
226
+ * @param vector
227
+ * @returns Buffer
228
+ */
229
+ getFloat32Buffer(vector) {
230
+ return Buffer.from(new Float32Array(vector).buffer);
231
+ }
232
+ }
@@ -0,0 +1 @@
1
+ module.exports = require('../../dist/document_loaders/web/apify_dataset.cjs');
@@ -0,0 +1 @@
1
+ export * from '../../dist/document_loaders/web/apify_dataset.js'
@@ -0,0 +1 @@
1
+ export * from '../../dist/document_loaders/web/apify_dataset.js'