langchain 0.0.171 → 0.0.173
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/format_scratchpad/log.cjs +1 -0
- package/agents/format_scratchpad/log.d.ts +1 -0
- package/agents/format_scratchpad/log.js +1 -0
- package/agents/format_scratchpad/log_to_message.cjs +1 -0
- package/agents/format_scratchpad/log_to_message.d.ts +1 -0
- package/agents/format_scratchpad/log_to_message.js +1 -0
- package/agents/format_scratchpad/xml.cjs +1 -0
- package/agents/format_scratchpad/xml.d.ts +1 -0
- package/agents/format_scratchpad/xml.js +1 -0
- package/agents/format_scratchpad.cjs +1 -1
- package/agents/format_scratchpad.d.ts +1 -1
- package/agents/format_scratchpad.js +1 -1
- package/agents/openai/output_parser.cjs +1 -0
- package/agents/openai/output_parser.d.ts +1 -0
- package/agents/openai/output_parser.js +1 -0
- package/agents/react/output_parser.cjs +1 -0
- package/agents/react/output_parser.d.ts +1 -0
- package/agents/react/output_parser.js +1 -0
- package/agents/xml/output_parser.cjs +1 -0
- package/agents/xml/output_parser.d.ts +1 -0
- package/agents/xml/output_parser.js +1 -0
- package/dist/agents/format_scratchpad/log.cjs +16 -0
- package/dist/agents/format_scratchpad/log.d.ts +9 -0
- package/dist/agents/format_scratchpad/log.js +12 -0
- package/dist/agents/format_scratchpad/log_to_message.cjs +22 -0
- package/dist/agents/format_scratchpad/log_to_message.d.ts +2 -0
- package/dist/agents/format_scratchpad/log_to_message.js +18 -0
- package/dist/agents/{format_scratchpad.cjs → format_scratchpad/openai_functions.cjs} +3 -3
- package/dist/agents/{format_scratchpad.d.ts → format_scratchpad/openai_functions.d.ts} +1 -1
- package/dist/agents/{format_scratchpad.js → format_scratchpad/openai_functions.js} +3 -3
- package/dist/agents/format_scratchpad/xml.cjs +12 -0
- package/dist/agents/format_scratchpad/xml.d.ts +2 -0
- package/dist/agents/format_scratchpad/xml.js +8 -0
- package/dist/agents/index.cjs +3 -1
- package/dist/agents/index.d.ts +1 -0
- package/dist/agents/index.js +1 -0
- package/dist/agents/openai/index.cjs +8 -31
- package/dist/agents/openai/index.d.ts +2 -0
- package/dist/agents/openai/index.js +8 -31
- package/dist/agents/openai/output_parser.cjs +65 -0
- package/dist/agents/openai/output_parser.d.ts +22 -0
- package/dist/agents/openai/output_parser.js +61 -0
- package/dist/agents/react/output_parser.cjs +96 -0
- package/dist/agents/react/output_parser.d.ts +47 -0
- package/dist/agents/react/output_parser.js +92 -0
- package/dist/agents/react/prompt.cjs +13 -0
- package/dist/agents/react/prompt.d.ts +1 -0
- package/dist/agents/react/prompt.js +10 -0
- package/dist/agents/toolkits/conversational_retrieval/tool.cjs +2 -1
- package/dist/agents/toolkits/conversational_retrieval/tool.js +2 -1
- package/dist/agents/xml/index.cjs +9 -25
- package/dist/agents/xml/index.d.ts +2 -7
- package/dist/agents/xml/index.js +8 -23
- package/dist/agents/xml/output_parser.cjs +44 -0
- package/dist/agents/xml/output_parser.d.ts +14 -0
- package/dist/agents/xml/output_parser.js +40 -0
- package/dist/document_loaders/fs/pdf.cjs +2 -1
- package/dist/document_loaders/fs/pdf.js +2 -1
- package/dist/document_loaders/web/pdf.cjs +2 -1
- package/dist/document_loaders/web/pdf.js +2 -1
- package/dist/embeddings/openai.cjs +11 -0
- package/dist/embeddings/openai.d.ts +2 -0
- package/dist/embeddings/openai.js +11 -0
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/load/import_map.cjs +11 -3
- package/dist/load/import_map.d.ts +9 -1
- package/dist/load/import_map.js +9 -1
- package/dist/memory/vector_store.cjs +2 -1
- package/dist/memory/vector_store.js +2 -1
- package/dist/storage/file_system.cjs +167 -0
- package/dist/storage/file_system.d.ts +60 -0
- package/dist/storage/file_system.js +140 -0
- package/dist/tools/index.cjs +3 -1
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.js +1 -0
- package/dist/tools/render.cjs +36 -0
- package/dist/tools/render.d.ts +25 -0
- package/dist/tools/render.js +31 -0
- package/dist/tools/serpapi.d.ts +2 -2
- package/dist/tools/webbrowser.cjs +2 -1
- package/dist/tools/webbrowser.js +2 -1
- package/dist/util/document.cjs +12 -0
- package/dist/util/document.d.ts +9 -0
- package/dist/util/document.js +8 -0
- package/dist/vectorstores/analyticdb.cjs +7 -3
- package/dist/vectorstores/analyticdb.d.ts +1 -1
- package/dist/vectorstores/analyticdb.js +7 -3
- package/dist/vectorstores/cassandra.cjs +130 -35
- package/dist/vectorstores/cassandra.d.ts +21 -10
- package/dist/vectorstores/cassandra.js +130 -35
- package/dist/vectorstores/pgvector.cjs +13 -7
- package/dist/vectorstores/pgvector.d.ts +7 -0
- package/dist/vectorstores/pgvector.js +13 -7
- package/dist/vectorstores/qdrant.cjs +19 -11
- package/dist/vectorstores/qdrant.d.ts +1 -1
- package/dist/vectorstores/qdrant.js +19 -11
- package/dist/vectorstores/redis.cjs +4 -1
- package/dist/vectorstores/redis.d.ts +1 -1
- package/dist/vectorstores/redis.js +4 -1
- package/package.json +75 -3
- package/storage/file_system.cjs +1 -0
- package/storage/file_system.d.ts +1 -0
- package/storage/file_system.js +1 -0
- package/tools/render.cjs +1 -0
- package/tools/render.d.ts +1 -0
- package/tools/render.js +1 -0
- package/util/document.cjs +1 -0
- package/util/document.d.ts +1 -0
- package/util/document.js +1 -0
package/dist/tools/webbrowser.js
CHANGED
|
@@ -6,6 +6,7 @@ import { MemoryVectorStore } from "../vectorstores/memory.js";
|
|
|
6
6
|
import { Document } from "../document.js";
|
|
7
7
|
import { Tool } from "./base.js";
|
|
8
8
|
import fetchAdapter from "../util/axios-fetch-adapter.js";
|
|
9
|
+
import { formatDocumentsAsString } from "../util/document.js";
|
|
9
10
|
export const parseInputs = (inputs) => {
|
|
10
11
|
const [baseUrl, task] = inputs.split(",").map((input) => {
|
|
11
12
|
let t = input.trim();
|
|
@@ -203,7 +204,7 @@ export class WebBrowser extends Tool {
|
|
|
203
204
|
}));
|
|
204
205
|
const vectorStore = await MemoryVectorStore.fromDocuments(docs, this.embeddings);
|
|
205
206
|
const results = await vectorStore.similaritySearch(task, 4, undefined, runManager?.getChild("vectorstore"));
|
|
206
|
-
context = results
|
|
207
|
+
context = formatDocumentsAsString(results, "\n");
|
|
207
208
|
}
|
|
208
209
|
const input = `Text:${context}\n\nI need ${doSummary ? "a summary" : task} from the above text, also provide up to 5 markdown links from within that would be of interest (always including URL and text). Links should be provided, if present, in markdown syntax as a list under the heading "Relevant Links:".`;
|
|
209
210
|
return this.model.predict(input, undefined, runManager?.getChild());
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.formatDocumentsAsString = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Given a list of documents, this util formats their contents
|
|
6
|
+
* into a string, separated by newlines.
|
|
7
|
+
*
|
|
8
|
+
* @param documents
|
|
9
|
+
* @returns A string of the documents page content, separated by newlines.
|
|
10
|
+
*/
|
|
11
|
+
const formatDocumentsAsString = (documents, separator = "\n\n") => documents.map((doc) => doc.pageContent).join(separator);
|
|
12
|
+
exports.formatDocumentsAsString = formatDocumentsAsString;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Document } from "../document.js";
|
|
2
|
+
/**
|
|
3
|
+
* Given a list of documents, this util formats their contents
|
|
4
|
+
* into a string, separated by newlines.
|
|
5
|
+
*
|
|
6
|
+
* @param documents
|
|
7
|
+
* @returns A string of the documents page content, separated by newlines.
|
|
8
|
+
*/
|
|
9
|
+
export declare const formatDocumentsAsString: (documents: Document[], separator?: string) => string;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Given a list of documents, this util formats their contents
|
|
3
|
+
* into a string, separated by newlines.
|
|
4
|
+
*
|
|
5
|
+
* @param documents
|
|
6
|
+
* @returns A string of the documents page content, separated by newlines.
|
|
7
|
+
*/
|
|
8
|
+
export const formatDocumentsAsString = (documents, separator = "\n\n") => documents.map((doc) => doc.pageContent).join(separator);
|
|
@@ -34,7 +34,6 @@ const promises_1 = require("node:stream/promises");
|
|
|
34
34
|
const node_stream_1 = require("node:stream");
|
|
35
35
|
const base_js_1 = require("./base.cjs");
|
|
36
36
|
const document_js_1 = require("../document.cjs");
|
|
37
|
-
const _LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536;
|
|
38
37
|
const _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document";
|
|
39
38
|
/**
|
|
40
39
|
* Class that provides methods for creating and managing a collection of
|
|
@@ -85,8 +84,7 @@ class AnalyticDBVectorStore extends base_js_1.VectorStore {
|
|
|
85
84
|
user: args.connectionOptions.user,
|
|
86
85
|
password: args.connectionOptions.password,
|
|
87
86
|
});
|
|
88
|
-
this.embeddingDimension =
|
|
89
|
-
args.embeddingDimension || _LANGCHAIN_DEFAULT_EMBEDDING_DIM;
|
|
87
|
+
this.embeddingDimension = args.embeddingDimension;
|
|
90
88
|
this.collectionName =
|
|
91
89
|
args.collectionName || _LANGCHAIN_DEFAULT_COLLECTION_NAME;
|
|
92
90
|
this.preDeleteCollection = args.preDeleteCollection || false;
|
|
@@ -106,6 +104,9 @@ class AnalyticDBVectorStore extends base_js_1.VectorStore {
|
|
|
106
104
|
* @returns Promise that resolves when the table and index are created.
|
|
107
105
|
*/
|
|
108
106
|
async createTableIfNotExists() {
|
|
107
|
+
if (!this.embeddingDimension) {
|
|
108
|
+
this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
|
|
109
|
+
}
|
|
109
110
|
const client = await this.pool.connect();
|
|
110
111
|
try {
|
|
111
112
|
await client.query("BEGIN");
|
|
@@ -194,6 +195,9 @@ class AnalyticDBVectorStore extends base_js_1.VectorStore {
|
|
|
194
195
|
if (vectors.length !== documents.length) {
|
|
195
196
|
throw new Error(`Vectors and documents must have the same length`);
|
|
196
197
|
}
|
|
198
|
+
if (!this.embeddingDimension) {
|
|
199
|
+
this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
|
|
200
|
+
}
|
|
197
201
|
if (vectors[0].length !== this.embeddingDimension) {
|
|
198
202
|
throw new Error(`Vectors must have the same length as the number of dimensions (${this.embeddingDimension})`);
|
|
199
203
|
}
|
|
@@ -21,7 +21,7 @@ export interface AnalyticDBArgs {
|
|
|
21
21
|
export declare class AnalyticDBVectorStore extends VectorStore {
|
|
22
22
|
FilterType: Record<string, any>;
|
|
23
23
|
private pool;
|
|
24
|
-
private embeddingDimension
|
|
24
|
+
private embeddingDimension?;
|
|
25
25
|
private collectionName;
|
|
26
26
|
private preDeleteCollection;
|
|
27
27
|
private isCreateCollection;
|
|
@@ -5,7 +5,6 @@ import { pipeline } from "node:stream/promises";
|
|
|
5
5
|
import { Readable } from "node:stream";
|
|
6
6
|
import { VectorStore } from "./base.js";
|
|
7
7
|
import { Document } from "../document.js";
|
|
8
|
-
const _LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536;
|
|
9
8
|
const _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document";
|
|
10
9
|
/**
|
|
11
10
|
* Class that provides methods for creating and managing a collection of
|
|
@@ -56,8 +55,7 @@ export class AnalyticDBVectorStore extends VectorStore {
|
|
|
56
55
|
user: args.connectionOptions.user,
|
|
57
56
|
password: args.connectionOptions.password,
|
|
58
57
|
});
|
|
59
|
-
this.embeddingDimension =
|
|
60
|
-
args.embeddingDimension || _LANGCHAIN_DEFAULT_EMBEDDING_DIM;
|
|
58
|
+
this.embeddingDimension = args.embeddingDimension;
|
|
61
59
|
this.collectionName =
|
|
62
60
|
args.collectionName || _LANGCHAIN_DEFAULT_COLLECTION_NAME;
|
|
63
61
|
this.preDeleteCollection = args.preDeleteCollection || false;
|
|
@@ -77,6 +75,9 @@ export class AnalyticDBVectorStore extends VectorStore {
|
|
|
77
75
|
* @returns Promise that resolves when the table and index are created.
|
|
78
76
|
*/
|
|
79
77
|
async createTableIfNotExists() {
|
|
78
|
+
if (!this.embeddingDimension) {
|
|
79
|
+
this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
|
|
80
|
+
}
|
|
80
81
|
const client = await this.pool.connect();
|
|
81
82
|
try {
|
|
82
83
|
await client.query("BEGIN");
|
|
@@ -165,6 +166,9 @@ export class AnalyticDBVectorStore extends VectorStore {
|
|
|
165
166
|
if (vectors.length !== documents.length) {
|
|
166
167
|
throw new Error(`Vectors and documents must have the same length`);
|
|
167
168
|
}
|
|
169
|
+
if (!this.embeddingDimension) {
|
|
170
|
+
this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
|
|
171
|
+
}
|
|
168
172
|
if (vectors[0].length !== this.embeddingDimension) {
|
|
169
173
|
throw new Error(`Vectors must have the same length as the number of dimensions (${this.embeddingDimension})`);
|
|
170
174
|
}
|
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.CassandraStore = void 0;
|
|
4
4
|
/* eslint-disable prefer-template */
|
|
5
5
|
const cassandra_driver_1 = require("cassandra-driver");
|
|
6
|
+
const async_caller_js_1 = require("../util/async_caller.cjs");
|
|
6
7
|
const base_js_1 = require("./base.cjs");
|
|
7
8
|
const document_js_1 = require("../document.cjs");
|
|
8
9
|
/**
|
|
@@ -16,7 +17,13 @@ class CassandraStore extends base_js_1.VectorStore {
|
|
|
16
17
|
return "cassandra";
|
|
17
18
|
}
|
|
18
19
|
constructor(embeddings, args) {
|
|
19
|
-
|
|
20
|
+
const argsWithDefaults = {
|
|
21
|
+
indices: [],
|
|
22
|
+
maxConcurrency: 25,
|
|
23
|
+
batchSize: 1,
|
|
24
|
+
...args,
|
|
25
|
+
};
|
|
26
|
+
super(embeddings, argsWithDefaults);
|
|
20
27
|
Object.defineProperty(this, "client", {
|
|
21
28
|
enumerable: true,
|
|
22
29
|
configurable: true,
|
|
@@ -65,13 +72,31 @@ class CassandraStore extends base_js_1.VectorStore {
|
|
|
65
72
|
writable: true,
|
|
66
73
|
value: false
|
|
67
74
|
});
|
|
68
|
-
this
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
this
|
|
75
|
+
Object.defineProperty(this, "asyncCaller", {
|
|
76
|
+
enumerable: true,
|
|
77
|
+
configurable: true,
|
|
78
|
+
writable: true,
|
|
79
|
+
value: void 0
|
|
80
|
+
});
|
|
81
|
+
Object.defineProperty(this, "batchSize", {
|
|
82
|
+
enumerable: true,
|
|
83
|
+
configurable: true,
|
|
84
|
+
writable: true,
|
|
85
|
+
value: void 0
|
|
86
|
+
});
|
|
87
|
+
this.asyncCaller = new async_caller_js_1.AsyncCaller(argsWithDefaults ?? {});
|
|
88
|
+
this.client = new cassandra_driver_1.Client(argsWithDefaults);
|
|
89
|
+
this.dimensions = argsWithDefaults.dimensions;
|
|
90
|
+
this.keyspace = argsWithDefaults.keyspace;
|
|
91
|
+
this.table = argsWithDefaults.table;
|
|
92
|
+
this.primaryKey = argsWithDefaults.primaryKey;
|
|
93
|
+
this.metadataColumns = argsWithDefaults.metadataColumns;
|
|
94
|
+
this.indices = argsWithDefaults.indices;
|
|
95
|
+
this.batchSize = argsWithDefaults.batchSize;
|
|
96
|
+
if (this.batchSize < 1) {
|
|
97
|
+
console.warn("batchSize must be greater than or equal to 1, defaulting to 1");
|
|
98
|
+
this.batchSize = 1;
|
|
99
|
+
}
|
|
75
100
|
}
|
|
76
101
|
/**
|
|
77
102
|
* Method to save vectors to the Cassandra database.
|
|
@@ -86,8 +111,7 @@ class CassandraStore extends base_js_1.VectorStore {
|
|
|
86
111
|
if (!this.isInitialized) {
|
|
87
112
|
await this.initialize();
|
|
88
113
|
}
|
|
89
|
-
|
|
90
|
-
await this.client.batch(queries);
|
|
114
|
+
await this.insertAll(vectors, documents);
|
|
91
115
|
}
|
|
92
116
|
/**
|
|
93
117
|
* Method to add documents to the Cassandra database.
|
|
@@ -188,31 +212,6 @@ class CassandraStore extends base_js_1.VectorStore {
|
|
|
188
212
|
}
|
|
189
213
|
this.isInitialized = true;
|
|
190
214
|
}
|
|
191
|
-
/**
|
|
192
|
-
* Method to build an CQL query for inserting vectors and documents into
|
|
193
|
-
* the Cassandra database.
|
|
194
|
-
* @param vectors The vectors to insert.
|
|
195
|
-
* @param documents The documents to insert.
|
|
196
|
-
* @returns The CQL query string.
|
|
197
|
-
*/
|
|
198
|
-
buildInsertQuery(vectors, documents) {
|
|
199
|
-
const queries = [];
|
|
200
|
-
for (let index = 0; index < vectors.length; index += 1) {
|
|
201
|
-
const vector = vectors[index];
|
|
202
|
-
const document = documents[index];
|
|
203
|
-
const metadataColNames = Object.keys(document.metadata);
|
|
204
|
-
const metadataVals = Object.values(document.metadata);
|
|
205
|
-
const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
|
|
206
|
-
const query = `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert}) VALUES ([${vector}], '${document.pageContent}'${metadataVals.length > 0
|
|
207
|
-
? ", " +
|
|
208
|
-
metadataVals
|
|
209
|
-
.map((val) => (typeof val === "number" ? val : `'${val}'`))
|
|
210
|
-
.join(", ")
|
|
211
|
-
: ""});`;
|
|
212
|
-
queries.push(query);
|
|
213
|
-
}
|
|
214
|
-
return queries;
|
|
215
|
-
}
|
|
216
215
|
buildWhereClause(filter) {
|
|
217
216
|
const whereClause = Object.entries(filter)
|
|
218
217
|
.map(([key, value]) => `${key} = '${value}'`)
|
|
@@ -231,5 +230,101 @@ class CassandraStore extends base_js_1.VectorStore {
|
|
|
231
230
|
const whereClause = filter ? this.buildWhereClause(filter) : "";
|
|
232
231
|
return `SELECT * FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF [${query}] LIMIT ${k}`;
|
|
233
232
|
}
|
|
233
|
+
/**
|
|
234
|
+
* Method for inserting vectors and documents into the Cassandra database in a batch.
|
|
235
|
+
* @param batchVectors The list of vectors to insert.
|
|
236
|
+
* @param batchDocuments The list of documents to insert.
|
|
237
|
+
* @returns Promise that resolves when the batch has been inserted.
|
|
238
|
+
*/
|
|
239
|
+
async executeInsert(batchVectors, batchDocuments) {
|
|
240
|
+
// Input validation: Check if the lengths of batchVectors and batchDocuments are the same
|
|
241
|
+
if (batchVectors.length !== batchDocuments.length) {
|
|
242
|
+
throw new Error(`The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.`);
|
|
243
|
+
}
|
|
244
|
+
// Initialize an array to hold query objects
|
|
245
|
+
const queries = [];
|
|
246
|
+
// Loop through each vector and document in the batch
|
|
247
|
+
for (let i = 0; i < batchVectors.length; i += 1) {
|
|
248
|
+
// Convert the list of numbers to a Float32Array, the driver's expected format of a vector
|
|
249
|
+
const preparedVector = new Float32Array(batchVectors[i]);
|
|
250
|
+
// Retrieve the corresponding document
|
|
251
|
+
const document = batchDocuments[i];
|
|
252
|
+
// Extract metadata column names and values from the document
|
|
253
|
+
const metadataColNames = Object.keys(document.metadata);
|
|
254
|
+
const metadataVals = Object.values(document.metadata);
|
|
255
|
+
// Prepare the metadata columns string for the query, if metadata exists
|
|
256
|
+
const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
|
|
257
|
+
// Construct the query string and parameters
|
|
258
|
+
const query = {
|
|
259
|
+
query: `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert})
|
|
260
|
+
VALUES (?, ?${", ?".repeat(metadataColNames.length)})`,
|
|
261
|
+
params: [preparedVector, document.pageContent, ...metadataVals],
|
|
262
|
+
};
|
|
263
|
+
// Add the query to the list
|
|
264
|
+
queries.push(query);
|
|
265
|
+
}
|
|
266
|
+
// Execute the queries: use a batch if multiple, otherwise execute a single query
|
|
267
|
+
if (queries.length === 1) {
|
|
268
|
+
await this.client.execute(queries[0].query, queries[0].params, {
|
|
269
|
+
prepare: true,
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
else {
|
|
273
|
+
await this.client.batch(queries, { prepare: true, logged: false });
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Method for inserting vectors and documents into the Cassandra database in
|
|
278
|
+
* parallel, keeping within maxConcurrency number of active insert statements.
|
|
279
|
+
* @param vectors The vectors to insert.
|
|
280
|
+
* @param documents The documents to insert.
|
|
281
|
+
* @returns Promise that resolves when the documents have been added.
|
|
282
|
+
*/
|
|
283
|
+
async insertAll(vectors, documents) {
|
|
284
|
+
// Input validation: Check if the lengths of vectors and documents are the same
|
|
285
|
+
if (vectors.length !== documents.length) {
|
|
286
|
+
throw new Error(`The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.`);
|
|
287
|
+
}
|
|
288
|
+
// Early exit: If there are no vectors or documents to insert, return immediately
|
|
289
|
+
if (vectors.length === 0) {
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
// Ensure the store is initialized before proceeding
|
|
293
|
+
if (!this.isInitialized) {
|
|
294
|
+
await this.initialize();
|
|
295
|
+
}
|
|
296
|
+
// Initialize an array to hold promises for each batch insert
|
|
297
|
+
const insertPromises = [];
|
|
298
|
+
// Buffers to hold the current batch of vectors and documents
|
|
299
|
+
let currentBatchVectors = [];
|
|
300
|
+
let currentBatchDocuments = [];
|
|
301
|
+
// Loop through each vector/document pair to insert; we use
|
|
302
|
+
// <= vectors.length to ensure the last batch is inserted
|
|
303
|
+
for (let i = 0; i <= vectors.length; i += 1) {
|
|
304
|
+
// Check if we're still within the array boundaries
|
|
305
|
+
if (i < vectors.length) {
|
|
306
|
+
// Add the current vector and document to the batch
|
|
307
|
+
currentBatchVectors.push(vectors[i]);
|
|
308
|
+
currentBatchDocuments.push(documents[i]);
|
|
309
|
+
}
|
|
310
|
+
// Check if we've reached the batch size or end of the array
|
|
311
|
+
if (currentBatchVectors.length >= this.batchSize ||
|
|
312
|
+
i === vectors.length) {
|
|
313
|
+
// Only proceed if there are items in the current batch
|
|
314
|
+
if (currentBatchVectors.length > 0) {
|
|
315
|
+
// Create copies of the current batch arrays to use in the async insert operation
|
|
316
|
+
const batchVectors = [...currentBatchVectors];
|
|
317
|
+
const batchDocuments = [...currentBatchDocuments];
|
|
318
|
+
// Execute the insert using the AsyncCaller - it will handle concurrency and queueing.
|
|
319
|
+
insertPromises.push(this.asyncCaller.call(() => this.executeInsert(batchVectors, batchDocuments)));
|
|
320
|
+
// Clear the current buffers for the next iteration
|
|
321
|
+
currentBatchVectors = [];
|
|
322
|
+
currentBatchDocuments = [];
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
// Wait for all insert operations to complete.
|
|
327
|
+
await Promise.all(insertPromises);
|
|
328
|
+
}
|
|
234
329
|
}
|
|
235
330
|
exports.CassandraStore = CassandraStore;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { DseClientOptions } from "cassandra-driver";
|
|
2
|
+
import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js";
|
|
2
3
|
import { Embeddings } from "../embeddings/base.js";
|
|
3
4
|
import { VectorStore } from "./base.js";
|
|
4
5
|
import { Document } from "../document.js";
|
|
@@ -10,13 +11,14 @@ export interface Index {
|
|
|
10
11
|
name: string;
|
|
11
12
|
value: string;
|
|
12
13
|
}
|
|
13
|
-
export interface CassandraLibArgs extends DseClientOptions {
|
|
14
|
+
export interface CassandraLibArgs extends DseClientOptions, AsyncCallerParams {
|
|
14
15
|
table: string;
|
|
15
16
|
keyspace: string;
|
|
16
17
|
dimensions: number;
|
|
17
18
|
primaryKey: Column;
|
|
18
19
|
metadataColumns: Column[];
|
|
19
|
-
indices
|
|
20
|
+
indices?: Index[];
|
|
21
|
+
batchSize?: number;
|
|
20
22
|
}
|
|
21
23
|
/**
|
|
22
24
|
* Class for interacting with the Cassandra database. It extends the
|
|
@@ -34,6 +36,8 @@ export declare class CassandraStore extends VectorStore {
|
|
|
34
36
|
private readonly table;
|
|
35
37
|
private indices;
|
|
36
38
|
private isInitialized;
|
|
39
|
+
asyncCaller: AsyncCaller;
|
|
40
|
+
private readonly batchSize;
|
|
37
41
|
_vectorstoreType(): string;
|
|
38
42
|
constructor(embeddings: Embeddings, args: CassandraLibArgs);
|
|
39
43
|
/**
|
|
@@ -87,14 +91,6 @@ export declare class CassandraStore extends VectorStore {
|
|
|
87
91
|
* @returns Promise that resolves when the database has been initialized.
|
|
88
92
|
*/
|
|
89
93
|
private initialize;
|
|
90
|
-
/**
|
|
91
|
-
* Method to build an CQL query for inserting vectors and documents into
|
|
92
|
-
* the Cassandra database.
|
|
93
|
-
* @param vectors The vectors to insert.
|
|
94
|
-
* @param documents The documents to insert.
|
|
95
|
-
* @returns The CQL query string.
|
|
96
|
-
*/
|
|
97
|
-
private buildInsertQuery;
|
|
98
94
|
private buildWhereClause;
|
|
99
95
|
/**
|
|
100
96
|
* Method to build an CQL query for searching for similar vectors in the
|
|
@@ -105,4 +101,19 @@ export declare class CassandraStore extends VectorStore {
|
|
|
105
101
|
* @returns The CQL query string.
|
|
106
102
|
*/
|
|
107
103
|
private buildSearchQuery;
|
|
104
|
+
/**
|
|
105
|
+
* Method for inserting vectors and documents into the Cassandra database in a batch.
|
|
106
|
+
* @param batchVectors The list of vectors to insert.
|
|
107
|
+
* @param batchDocuments The list of documents to insert.
|
|
108
|
+
* @returns Promise that resolves when the batch has been inserted.
|
|
109
|
+
*/
|
|
110
|
+
private executeInsert;
|
|
111
|
+
/**
|
|
112
|
+
* Method for inserting vectors and documents into the Cassandra database in
|
|
113
|
+
* parallel, keeping within maxConcurrency number of active insert statements.
|
|
114
|
+
* @param vectors The vectors to insert.
|
|
115
|
+
* @param documents The documents to insert.
|
|
116
|
+
* @returns Promise that resolves when the documents have been added.
|
|
117
|
+
*/
|
|
118
|
+
private insertAll;
|
|
108
119
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/* eslint-disable prefer-template */
|
|
2
2
|
import { Client as CassandraClient } from "cassandra-driver";
|
|
3
|
+
import { AsyncCaller } from "../util/async_caller.js";
|
|
3
4
|
import { VectorStore } from "./base.js";
|
|
4
5
|
import { Document } from "../document.js";
|
|
5
6
|
/**
|
|
@@ -13,7 +14,13 @@ export class CassandraStore extends VectorStore {
|
|
|
13
14
|
return "cassandra";
|
|
14
15
|
}
|
|
15
16
|
constructor(embeddings, args) {
|
|
16
|
-
|
|
17
|
+
const argsWithDefaults = {
|
|
18
|
+
indices: [],
|
|
19
|
+
maxConcurrency: 25,
|
|
20
|
+
batchSize: 1,
|
|
21
|
+
...args,
|
|
22
|
+
};
|
|
23
|
+
super(embeddings, argsWithDefaults);
|
|
17
24
|
Object.defineProperty(this, "client", {
|
|
18
25
|
enumerable: true,
|
|
19
26
|
configurable: true,
|
|
@@ -62,13 +69,31 @@ export class CassandraStore extends VectorStore {
|
|
|
62
69
|
writable: true,
|
|
63
70
|
value: false
|
|
64
71
|
});
|
|
65
|
-
this
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
this
|
|
72
|
+
Object.defineProperty(this, "asyncCaller", {
|
|
73
|
+
enumerable: true,
|
|
74
|
+
configurable: true,
|
|
75
|
+
writable: true,
|
|
76
|
+
value: void 0
|
|
77
|
+
});
|
|
78
|
+
Object.defineProperty(this, "batchSize", {
|
|
79
|
+
enumerable: true,
|
|
80
|
+
configurable: true,
|
|
81
|
+
writable: true,
|
|
82
|
+
value: void 0
|
|
83
|
+
});
|
|
84
|
+
this.asyncCaller = new AsyncCaller(argsWithDefaults ?? {});
|
|
85
|
+
this.client = new CassandraClient(argsWithDefaults);
|
|
86
|
+
this.dimensions = argsWithDefaults.dimensions;
|
|
87
|
+
this.keyspace = argsWithDefaults.keyspace;
|
|
88
|
+
this.table = argsWithDefaults.table;
|
|
89
|
+
this.primaryKey = argsWithDefaults.primaryKey;
|
|
90
|
+
this.metadataColumns = argsWithDefaults.metadataColumns;
|
|
91
|
+
this.indices = argsWithDefaults.indices;
|
|
92
|
+
this.batchSize = argsWithDefaults.batchSize;
|
|
93
|
+
if (this.batchSize < 1) {
|
|
94
|
+
console.warn("batchSize must be greater than or equal to 1, defaulting to 1");
|
|
95
|
+
this.batchSize = 1;
|
|
96
|
+
}
|
|
72
97
|
}
|
|
73
98
|
/**
|
|
74
99
|
* Method to save vectors to the Cassandra database.
|
|
@@ -83,8 +108,7 @@ export class CassandraStore extends VectorStore {
|
|
|
83
108
|
if (!this.isInitialized) {
|
|
84
109
|
await this.initialize();
|
|
85
110
|
}
|
|
86
|
-
|
|
87
|
-
await this.client.batch(queries);
|
|
111
|
+
await this.insertAll(vectors, documents);
|
|
88
112
|
}
|
|
89
113
|
/**
|
|
90
114
|
* Method to add documents to the Cassandra database.
|
|
@@ -185,31 +209,6 @@ export class CassandraStore extends VectorStore {
|
|
|
185
209
|
}
|
|
186
210
|
this.isInitialized = true;
|
|
187
211
|
}
|
|
188
|
-
/**
|
|
189
|
-
* Method to build an CQL query for inserting vectors and documents into
|
|
190
|
-
* the Cassandra database.
|
|
191
|
-
* @param vectors The vectors to insert.
|
|
192
|
-
* @param documents The documents to insert.
|
|
193
|
-
* @returns The CQL query string.
|
|
194
|
-
*/
|
|
195
|
-
buildInsertQuery(vectors, documents) {
|
|
196
|
-
const queries = [];
|
|
197
|
-
for (let index = 0; index < vectors.length; index += 1) {
|
|
198
|
-
const vector = vectors[index];
|
|
199
|
-
const document = documents[index];
|
|
200
|
-
const metadataColNames = Object.keys(document.metadata);
|
|
201
|
-
const metadataVals = Object.values(document.metadata);
|
|
202
|
-
const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
|
|
203
|
-
const query = `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert}) VALUES ([${vector}], '${document.pageContent}'${metadataVals.length > 0
|
|
204
|
-
? ", " +
|
|
205
|
-
metadataVals
|
|
206
|
-
.map((val) => (typeof val === "number" ? val : `'${val}'`))
|
|
207
|
-
.join(", ")
|
|
208
|
-
: ""});`;
|
|
209
|
-
queries.push(query);
|
|
210
|
-
}
|
|
211
|
-
return queries;
|
|
212
|
-
}
|
|
213
212
|
buildWhereClause(filter) {
|
|
214
213
|
const whereClause = Object.entries(filter)
|
|
215
214
|
.map(([key, value]) => `${key} = '${value}'`)
|
|
@@ -228,4 +227,100 @@ export class CassandraStore extends VectorStore {
|
|
|
228
227
|
const whereClause = filter ? this.buildWhereClause(filter) : "";
|
|
229
228
|
return `SELECT * FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF [${query}] LIMIT ${k}`;
|
|
230
229
|
}
|
|
230
|
+
/**
|
|
231
|
+
* Method for inserting vectors and documents into the Cassandra database in a batch.
|
|
232
|
+
* @param batchVectors The list of vectors to insert.
|
|
233
|
+
* @param batchDocuments The list of documents to insert.
|
|
234
|
+
* @returns Promise that resolves when the batch has been inserted.
|
|
235
|
+
*/
|
|
236
|
+
async executeInsert(batchVectors, batchDocuments) {
|
|
237
|
+
// Input validation: Check if the lengths of batchVectors and batchDocuments are the same
|
|
238
|
+
if (batchVectors.length !== batchDocuments.length) {
|
|
239
|
+
throw new Error(`The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.`);
|
|
240
|
+
}
|
|
241
|
+
// Initialize an array to hold query objects
|
|
242
|
+
const queries = [];
|
|
243
|
+
// Loop through each vector and document in the batch
|
|
244
|
+
for (let i = 0; i < batchVectors.length; i += 1) {
|
|
245
|
+
// Convert the list of numbers to a Float32Array, the driver's expected format of a vector
|
|
246
|
+
const preparedVector = new Float32Array(batchVectors[i]);
|
|
247
|
+
// Retrieve the corresponding document
|
|
248
|
+
const document = batchDocuments[i];
|
|
249
|
+
// Extract metadata column names and values from the document
|
|
250
|
+
const metadataColNames = Object.keys(document.metadata);
|
|
251
|
+
const metadataVals = Object.values(document.metadata);
|
|
252
|
+
// Prepare the metadata columns string for the query, if metadata exists
|
|
253
|
+
const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
|
|
254
|
+
// Construct the query string and parameters
|
|
255
|
+
const query = {
|
|
256
|
+
query: `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert})
|
|
257
|
+
VALUES (?, ?${", ?".repeat(metadataColNames.length)})`,
|
|
258
|
+
params: [preparedVector, document.pageContent, ...metadataVals],
|
|
259
|
+
};
|
|
260
|
+
// Add the query to the list
|
|
261
|
+
queries.push(query);
|
|
262
|
+
}
|
|
263
|
+
// Execute the queries: use a batch if multiple, otherwise execute a single query
|
|
264
|
+
if (queries.length === 1) {
|
|
265
|
+
await this.client.execute(queries[0].query, queries[0].params, {
|
|
266
|
+
prepare: true,
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
await this.client.batch(queries, { prepare: true, logged: false });
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Method for inserting vectors and documents into the Cassandra database in
|
|
275
|
+
* parallel, keeping within maxConcurrency number of active insert statements.
|
|
276
|
+
* @param vectors The vectors to insert.
|
|
277
|
+
* @param documents The documents to insert.
|
|
278
|
+
* @returns Promise that resolves when the documents have been added.
|
|
279
|
+
*/
|
|
280
|
+
async insertAll(vectors, documents) {
|
|
281
|
+
// Input validation: Check if the lengths of vectors and documents are the same
|
|
282
|
+
if (vectors.length !== documents.length) {
|
|
283
|
+
throw new Error(`The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.`);
|
|
284
|
+
}
|
|
285
|
+
// Early exit: If there are no vectors or documents to insert, return immediately
|
|
286
|
+
if (vectors.length === 0) {
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
// Ensure the store is initialized before proceeding
|
|
290
|
+
if (!this.isInitialized) {
|
|
291
|
+
await this.initialize();
|
|
292
|
+
}
|
|
293
|
+
// Initialize an array to hold promises for each batch insert
|
|
294
|
+
const insertPromises = [];
|
|
295
|
+
// Buffers to hold the current batch of vectors and documents
|
|
296
|
+
let currentBatchVectors = [];
|
|
297
|
+
let currentBatchDocuments = [];
|
|
298
|
+
// Loop through each vector/document pair to insert; we use
|
|
299
|
+
// <= vectors.length to ensure the last batch is inserted
|
|
300
|
+
for (let i = 0; i <= vectors.length; i += 1) {
|
|
301
|
+
// Check if we're still within the array boundaries
|
|
302
|
+
if (i < vectors.length) {
|
|
303
|
+
// Add the current vector and document to the batch
|
|
304
|
+
currentBatchVectors.push(vectors[i]);
|
|
305
|
+
currentBatchDocuments.push(documents[i]);
|
|
306
|
+
}
|
|
307
|
+
// Check if we've reached the batch size or end of the array
|
|
308
|
+
if (currentBatchVectors.length >= this.batchSize ||
|
|
309
|
+
i === vectors.length) {
|
|
310
|
+
// Only proceed if there are items in the current batch
|
|
311
|
+
if (currentBatchVectors.length > 0) {
|
|
312
|
+
// Create copies of the current batch arrays to use in the async insert operation
|
|
313
|
+
const batchVectors = [...currentBatchVectors];
|
|
314
|
+
const batchDocuments = [...currentBatchDocuments];
|
|
315
|
+
// Execute the insert using the AsyncCaller - it will handle concurrency and queueing.
|
|
316
|
+
insertPromises.push(this.asyncCaller.call(() => this.executeInsert(batchVectors, batchDocuments)));
|
|
317
|
+
// Clear the current buffers for the next iteration
|
|
318
|
+
currentBatchVectors = [];
|
|
319
|
+
currentBatchDocuments = [];
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
// Wait for all insert operations to complete.
|
|
324
|
+
await Promise.all(insertPromises);
|
|
325
|
+
}
|
|
231
326
|
}
|
|
@@ -74,6 +74,12 @@ class PGVectorStore extends base_js_1.VectorStore {
|
|
|
74
74
|
writable: true,
|
|
75
75
|
value: void 0
|
|
76
76
|
});
|
|
77
|
+
Object.defineProperty(this, "chunkSize", {
|
|
78
|
+
enumerable: true,
|
|
79
|
+
configurable: true,
|
|
80
|
+
writable: true,
|
|
81
|
+
value: 500
|
|
82
|
+
});
|
|
77
83
|
this.tableName = config.tableName;
|
|
78
84
|
this.filter = config.filter;
|
|
79
85
|
this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding";
|
|
@@ -82,6 +88,7 @@ class PGVectorStore extends base_js_1.VectorStore {
|
|
|
82
88
|
this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata";
|
|
83
89
|
const pool = new pg_1.default.Pool(config.postgresConnectionOptions);
|
|
84
90
|
this.pool = pool;
|
|
91
|
+
this.chunkSize = config.chunkSize ?? 500;
|
|
85
92
|
this._verbose =
|
|
86
93
|
(0, env_js_1.getEnvironmentVariable)("LANGCHAIN_VERBOSE") === "true" ??
|
|
87
94
|
!!config.verbose;
|
|
@@ -132,9 +139,9 @@ class PGVectorStore extends base_js_1.VectorStore {
|
|
|
132
139
|
* @param chunkIndex - The starting index for generating query placeholders based on chunk positioning.
|
|
133
140
|
* @returns The complete SQL INSERT INTO query string.
|
|
134
141
|
*/
|
|
135
|
-
buildInsertQuery(rows
|
|
142
|
+
buildInsertQuery(rows) {
|
|
136
143
|
const valuesPlaceholders = rows
|
|
137
|
-
.map((_, j) => this.generatePlaceholderForRowAt(
|
|
144
|
+
.map((_, j) => this.generatePlaceholderForRowAt(j))
|
|
138
145
|
.join(", ");
|
|
139
146
|
const text = `
|
|
140
147
|
INSERT INTO ${this.tableName}(
|
|
@@ -163,10 +170,9 @@ class PGVectorStore extends base_js_1.VectorStore {
|
|
|
163
170
|
documents[idx].metadata,
|
|
164
171
|
];
|
|
165
172
|
});
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
const
|
|
169
|
-
const insertQuery = this.buildInsertQuery(chunk, i);
|
|
173
|
+
for (let i = 0; i < rows.length; i += this.chunkSize) {
|
|
174
|
+
const chunk = rows.slice(i, i + this.chunkSize);
|
|
175
|
+
const insertQuery = this.buildInsertQuery(chunk);
|
|
170
176
|
const flatValues = chunk.flat();
|
|
171
177
|
try {
|
|
172
178
|
await this.pool.query(insertQuery, flatValues);
|
|
@@ -270,7 +276,7 @@ class PGVectorStore extends base_js_1.VectorStore {
|
|
|
270
276
|
* @returns Promise that resolves when all clients are closed and the pool is terminated.
|
|
271
277
|
*/
|
|
272
278
|
async end() {
|
|
273
|
-
|
|
279
|
+
this.client?.release();
|
|
274
280
|
return this.pool.end();
|
|
275
281
|
}
|
|
276
282
|
}
|