langchain 0.0.171 → 0.0.173

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/agents/format_scratchpad/log.cjs +1 -0
  2. package/agents/format_scratchpad/log.d.ts +1 -0
  3. package/agents/format_scratchpad/log.js +1 -0
  4. package/agents/format_scratchpad/log_to_message.cjs +1 -0
  5. package/agents/format_scratchpad/log_to_message.d.ts +1 -0
  6. package/agents/format_scratchpad/log_to_message.js +1 -0
  7. package/agents/format_scratchpad/xml.cjs +1 -0
  8. package/agents/format_scratchpad/xml.d.ts +1 -0
  9. package/agents/format_scratchpad/xml.js +1 -0
  10. package/agents/format_scratchpad.cjs +1 -1
  11. package/agents/format_scratchpad.d.ts +1 -1
  12. package/agents/format_scratchpad.js +1 -1
  13. package/agents/openai/output_parser.cjs +1 -0
  14. package/agents/openai/output_parser.d.ts +1 -0
  15. package/agents/openai/output_parser.js +1 -0
  16. package/agents/react/output_parser.cjs +1 -0
  17. package/agents/react/output_parser.d.ts +1 -0
  18. package/agents/react/output_parser.js +1 -0
  19. package/agents/xml/output_parser.cjs +1 -0
  20. package/agents/xml/output_parser.d.ts +1 -0
  21. package/agents/xml/output_parser.js +1 -0
  22. package/dist/agents/format_scratchpad/log.cjs +16 -0
  23. package/dist/agents/format_scratchpad/log.d.ts +9 -0
  24. package/dist/agents/format_scratchpad/log.js +12 -0
  25. package/dist/agents/format_scratchpad/log_to_message.cjs +22 -0
  26. package/dist/agents/format_scratchpad/log_to_message.d.ts +2 -0
  27. package/dist/agents/format_scratchpad/log_to_message.js +18 -0
  28. package/dist/agents/{format_scratchpad.cjs → format_scratchpad/openai_functions.cjs} +3 -3
  29. package/dist/agents/{format_scratchpad.d.ts → format_scratchpad/openai_functions.d.ts} +1 -1
  30. package/dist/agents/{format_scratchpad.js → format_scratchpad/openai_functions.js} +3 -3
  31. package/dist/agents/format_scratchpad/xml.cjs +12 -0
  32. package/dist/agents/format_scratchpad/xml.d.ts +2 -0
  33. package/dist/agents/format_scratchpad/xml.js +8 -0
  34. package/dist/agents/index.cjs +3 -1
  35. package/dist/agents/index.d.ts +1 -0
  36. package/dist/agents/index.js +1 -0
  37. package/dist/agents/openai/index.cjs +8 -31
  38. package/dist/agents/openai/index.d.ts +2 -0
  39. package/dist/agents/openai/index.js +8 -31
  40. package/dist/agents/openai/output_parser.cjs +65 -0
  41. package/dist/agents/openai/output_parser.d.ts +22 -0
  42. package/dist/agents/openai/output_parser.js +61 -0
  43. package/dist/agents/react/output_parser.cjs +96 -0
  44. package/dist/agents/react/output_parser.d.ts +47 -0
  45. package/dist/agents/react/output_parser.js +92 -0
  46. package/dist/agents/react/prompt.cjs +13 -0
  47. package/dist/agents/react/prompt.d.ts +1 -0
  48. package/dist/agents/react/prompt.js +10 -0
  49. package/dist/agents/toolkits/conversational_retrieval/tool.cjs +2 -1
  50. package/dist/agents/toolkits/conversational_retrieval/tool.js +2 -1
  51. package/dist/agents/xml/index.cjs +9 -25
  52. package/dist/agents/xml/index.d.ts +2 -7
  53. package/dist/agents/xml/index.js +8 -23
  54. package/dist/agents/xml/output_parser.cjs +44 -0
  55. package/dist/agents/xml/output_parser.d.ts +14 -0
  56. package/dist/agents/xml/output_parser.js +40 -0
  57. package/dist/document_loaders/fs/pdf.cjs +2 -1
  58. package/dist/document_loaders/fs/pdf.js +2 -1
  59. package/dist/document_loaders/web/pdf.cjs +2 -1
  60. package/dist/document_loaders/web/pdf.js +2 -1
  61. package/dist/embeddings/openai.cjs +11 -0
  62. package/dist/embeddings/openai.d.ts +2 -0
  63. package/dist/embeddings/openai.js +11 -0
  64. package/dist/load/import_constants.cjs +1 -0
  65. package/dist/load/import_constants.js +1 -0
  66. package/dist/load/import_map.cjs +11 -3
  67. package/dist/load/import_map.d.ts +9 -1
  68. package/dist/load/import_map.js +9 -1
  69. package/dist/memory/vector_store.cjs +2 -1
  70. package/dist/memory/vector_store.js +2 -1
  71. package/dist/storage/file_system.cjs +167 -0
  72. package/dist/storage/file_system.d.ts +60 -0
  73. package/dist/storage/file_system.js +140 -0
  74. package/dist/tools/index.cjs +3 -1
  75. package/dist/tools/index.d.ts +1 -0
  76. package/dist/tools/index.js +1 -0
  77. package/dist/tools/render.cjs +36 -0
  78. package/dist/tools/render.d.ts +25 -0
  79. package/dist/tools/render.js +31 -0
  80. package/dist/tools/serpapi.d.ts +2 -2
  81. package/dist/tools/webbrowser.cjs +2 -1
  82. package/dist/tools/webbrowser.js +2 -1
  83. package/dist/util/document.cjs +12 -0
  84. package/dist/util/document.d.ts +9 -0
  85. package/dist/util/document.js +8 -0
  86. package/dist/vectorstores/analyticdb.cjs +7 -3
  87. package/dist/vectorstores/analyticdb.d.ts +1 -1
  88. package/dist/vectorstores/analyticdb.js +7 -3
  89. package/dist/vectorstores/cassandra.cjs +130 -35
  90. package/dist/vectorstores/cassandra.d.ts +21 -10
  91. package/dist/vectorstores/cassandra.js +130 -35
  92. package/dist/vectorstores/pgvector.cjs +13 -7
  93. package/dist/vectorstores/pgvector.d.ts +7 -0
  94. package/dist/vectorstores/pgvector.js +13 -7
  95. package/dist/vectorstores/qdrant.cjs +19 -11
  96. package/dist/vectorstores/qdrant.d.ts +1 -1
  97. package/dist/vectorstores/qdrant.js +19 -11
  98. package/dist/vectorstores/redis.cjs +4 -1
  99. package/dist/vectorstores/redis.d.ts +1 -1
  100. package/dist/vectorstores/redis.js +4 -1
  101. package/package.json +75 -3
  102. package/storage/file_system.cjs +1 -0
  103. package/storage/file_system.d.ts +1 -0
  104. package/storage/file_system.js +1 -0
  105. package/tools/render.cjs +1 -0
  106. package/tools/render.d.ts +1 -0
  107. package/tools/render.js +1 -0
  108. package/util/document.cjs +1 -0
  109. package/util/document.d.ts +1 -0
  110. package/util/document.js +1 -0
@@ -6,6 +6,7 @@ import { MemoryVectorStore } from "../vectorstores/memory.js";
6
6
  import { Document } from "../document.js";
7
7
  import { Tool } from "./base.js";
8
8
  import fetchAdapter from "../util/axios-fetch-adapter.js";
9
+ import { formatDocumentsAsString } from "../util/document.js";
9
10
  export const parseInputs = (inputs) => {
10
11
  const [baseUrl, task] = inputs.split(",").map((input) => {
11
12
  let t = input.trim();
@@ -203,7 +204,7 @@ export class WebBrowser extends Tool {
203
204
  }));
204
205
  const vectorStore = await MemoryVectorStore.fromDocuments(docs, this.embeddings);
205
206
  const results = await vectorStore.similaritySearch(task, 4, undefined, runManager?.getChild("vectorstore"));
206
- context = results.map((res) => res.pageContent).join("\n");
207
+ context = formatDocumentsAsString(results, "\n");
207
208
  }
208
209
  const input = `Text:${context}\n\nI need ${doSummary ? "a summary" : task} from the above text, also provide up to 5 markdown links from within that would be of interest (always including URL and text). Links should be provided, if present, in markdown syntax as a list under the heading "Relevant Links:".`;
209
210
  return this.model.predict(input, undefined, runManager?.getChild());
@@ -0,0 +1,12 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.formatDocumentsAsString = void 0;
4
+ /**
5
+ * Given a list of documents, this util formats their contents
6
+ * into a string, separated by newlines.
7
+ *
8
+ * @param documents
9
+ * @returns A string of the documents page content, separated by newlines.
10
+ */
11
+ const formatDocumentsAsString = (documents, separator = "\n\n") => documents.map((doc) => doc.pageContent).join(separator);
12
+ exports.formatDocumentsAsString = formatDocumentsAsString;
@@ -0,0 +1,9 @@
1
+ import { Document } from "../document.js";
2
+ /**
3
+ * Given a list of documents, this util formats their contents
4
+ * into a string, separated by newlines.
5
+ *
6
+ * @param documents
7
+ * @returns A string of the documents page content, separated by newlines.
8
+ */
9
+ export declare const formatDocumentsAsString: (documents: Document[], separator?: string) => string;
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Given a list of documents, this util formats their contents
3
+ * into a string, separated by newlines.
4
+ *
5
+ * @param documents
6
+ * @returns A string of the documents page content, separated by newlines.
7
+ */
8
+ export const formatDocumentsAsString = (documents, separator = "\n\n") => documents.map((doc) => doc.pageContent).join(separator);
@@ -34,7 +34,6 @@ const promises_1 = require("node:stream/promises");
34
34
  const node_stream_1 = require("node:stream");
35
35
  const base_js_1 = require("./base.cjs");
36
36
  const document_js_1 = require("../document.cjs");
37
- const _LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536;
38
37
  const _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document";
39
38
  /**
40
39
  * Class that provides methods for creating and managing a collection of
@@ -85,8 +84,7 @@ class AnalyticDBVectorStore extends base_js_1.VectorStore {
85
84
  user: args.connectionOptions.user,
86
85
  password: args.connectionOptions.password,
87
86
  });
88
- this.embeddingDimension =
89
- args.embeddingDimension || _LANGCHAIN_DEFAULT_EMBEDDING_DIM;
87
+ this.embeddingDimension = args.embeddingDimension;
90
88
  this.collectionName =
91
89
  args.collectionName || _LANGCHAIN_DEFAULT_COLLECTION_NAME;
92
90
  this.preDeleteCollection = args.preDeleteCollection || false;
@@ -106,6 +104,9 @@ class AnalyticDBVectorStore extends base_js_1.VectorStore {
106
104
  * @returns Promise that resolves when the table and index are created.
107
105
  */
108
106
  async createTableIfNotExists() {
107
+ if (!this.embeddingDimension) {
108
+ this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
109
+ }
109
110
  const client = await this.pool.connect();
110
111
  try {
111
112
  await client.query("BEGIN");
@@ -194,6 +195,9 @@ class AnalyticDBVectorStore extends base_js_1.VectorStore {
194
195
  if (vectors.length !== documents.length) {
195
196
  throw new Error(`Vectors and documents must have the same length`);
196
197
  }
198
+ if (!this.embeddingDimension) {
199
+ this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
200
+ }
197
201
  if (vectors[0].length !== this.embeddingDimension) {
198
202
  throw new Error(`Vectors must have the same length as the number of dimensions (${this.embeddingDimension})`);
199
203
  }
@@ -21,7 +21,7 @@ export interface AnalyticDBArgs {
21
21
  export declare class AnalyticDBVectorStore extends VectorStore {
22
22
  FilterType: Record<string, any>;
23
23
  private pool;
24
- private embeddingDimension;
24
+ private embeddingDimension?;
25
25
  private collectionName;
26
26
  private preDeleteCollection;
27
27
  private isCreateCollection;
@@ -5,7 +5,6 @@ import { pipeline } from "node:stream/promises";
5
5
  import { Readable } from "node:stream";
6
6
  import { VectorStore } from "./base.js";
7
7
  import { Document } from "../document.js";
8
- const _LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536;
9
8
  const _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document";
10
9
  /**
11
10
  * Class that provides methods for creating and managing a collection of
@@ -56,8 +55,7 @@ export class AnalyticDBVectorStore extends VectorStore {
56
55
  user: args.connectionOptions.user,
57
56
  password: args.connectionOptions.password,
58
57
  });
59
- this.embeddingDimension =
60
- args.embeddingDimension || _LANGCHAIN_DEFAULT_EMBEDDING_DIM;
58
+ this.embeddingDimension = args.embeddingDimension;
61
59
  this.collectionName =
62
60
  args.collectionName || _LANGCHAIN_DEFAULT_COLLECTION_NAME;
63
61
  this.preDeleteCollection = args.preDeleteCollection || false;
@@ -77,6 +75,9 @@ export class AnalyticDBVectorStore extends VectorStore {
77
75
  * @returns Promise that resolves when the table and index are created.
78
76
  */
79
77
  async createTableIfNotExists() {
78
+ if (!this.embeddingDimension) {
79
+ this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
80
+ }
80
81
  const client = await this.pool.connect();
81
82
  try {
82
83
  await client.query("BEGIN");
@@ -165,6 +166,9 @@ export class AnalyticDBVectorStore extends VectorStore {
165
166
  if (vectors.length !== documents.length) {
166
167
  throw new Error(`Vectors and documents must have the same length`);
167
168
  }
169
+ if (!this.embeddingDimension) {
170
+ this.embeddingDimension = (await this.embeddings.embedQuery("test")).length;
171
+ }
168
172
  if (vectors[0].length !== this.embeddingDimension) {
169
173
  throw new Error(`Vectors must have the same length as the number of dimensions (${this.embeddingDimension})`);
170
174
  }
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.CassandraStore = void 0;
4
4
  /* eslint-disable prefer-template */
5
5
  const cassandra_driver_1 = require("cassandra-driver");
6
+ const async_caller_js_1 = require("../util/async_caller.cjs");
6
7
  const base_js_1 = require("./base.cjs");
7
8
  const document_js_1 = require("../document.cjs");
8
9
  /**
@@ -16,7 +17,13 @@ class CassandraStore extends base_js_1.VectorStore {
16
17
  return "cassandra";
17
18
  }
18
19
  constructor(embeddings, args) {
19
- super(embeddings, args);
20
+ const argsWithDefaults = {
21
+ indices: [],
22
+ maxConcurrency: 25,
23
+ batchSize: 1,
24
+ ...args,
25
+ };
26
+ super(embeddings, argsWithDefaults);
20
27
  Object.defineProperty(this, "client", {
21
28
  enumerable: true,
22
29
  configurable: true,
@@ -65,13 +72,31 @@ class CassandraStore extends base_js_1.VectorStore {
65
72
  writable: true,
66
73
  value: false
67
74
  });
68
- this.client = new cassandra_driver_1.Client(args);
69
- this.dimensions = args.dimensions;
70
- this.keyspace = args.keyspace;
71
- this.table = args.table;
72
- this.primaryKey = args.primaryKey;
73
- this.metadataColumns = args.metadataColumns;
74
- this.indices = args.indices;
75
+ Object.defineProperty(this, "asyncCaller", {
76
+ enumerable: true,
77
+ configurable: true,
78
+ writable: true,
79
+ value: void 0
80
+ });
81
+ Object.defineProperty(this, "batchSize", {
82
+ enumerable: true,
83
+ configurable: true,
84
+ writable: true,
85
+ value: void 0
86
+ });
87
+ this.asyncCaller = new async_caller_js_1.AsyncCaller(argsWithDefaults ?? {});
88
+ this.client = new cassandra_driver_1.Client(argsWithDefaults);
89
+ this.dimensions = argsWithDefaults.dimensions;
90
+ this.keyspace = argsWithDefaults.keyspace;
91
+ this.table = argsWithDefaults.table;
92
+ this.primaryKey = argsWithDefaults.primaryKey;
93
+ this.metadataColumns = argsWithDefaults.metadataColumns;
94
+ this.indices = argsWithDefaults.indices;
95
+ this.batchSize = argsWithDefaults.batchSize;
96
+ if (this.batchSize < 1) {
97
+ console.warn("batchSize must be greater than or equal to 1, defaulting to 1");
98
+ this.batchSize = 1;
99
+ }
75
100
  }
76
101
  /**
77
102
  * Method to save vectors to the Cassandra database.
@@ -86,8 +111,7 @@ class CassandraStore extends base_js_1.VectorStore {
86
111
  if (!this.isInitialized) {
87
112
  await this.initialize();
88
113
  }
89
- const queries = this.buildInsertQuery(vectors, documents);
90
- await this.client.batch(queries);
114
+ await this.insertAll(vectors, documents);
91
115
  }
92
116
  /**
93
117
  * Method to add documents to the Cassandra database.
@@ -188,31 +212,6 @@ class CassandraStore extends base_js_1.VectorStore {
188
212
  }
189
213
  this.isInitialized = true;
190
214
  }
191
- /**
192
- * Method to build an CQL query for inserting vectors and documents into
193
- * the Cassandra database.
194
- * @param vectors The vectors to insert.
195
- * @param documents The documents to insert.
196
- * @returns The CQL query string.
197
- */
198
- buildInsertQuery(vectors, documents) {
199
- const queries = [];
200
- for (let index = 0; index < vectors.length; index += 1) {
201
- const vector = vectors[index];
202
- const document = documents[index];
203
- const metadataColNames = Object.keys(document.metadata);
204
- const metadataVals = Object.values(document.metadata);
205
- const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
206
- const query = `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert}) VALUES ([${vector}], '${document.pageContent}'${metadataVals.length > 0
207
- ? ", " +
208
- metadataVals
209
- .map((val) => (typeof val === "number" ? val : `'${val}'`))
210
- .join(", ")
211
- : ""});`;
212
- queries.push(query);
213
- }
214
- return queries;
215
- }
216
215
  buildWhereClause(filter) {
217
216
  const whereClause = Object.entries(filter)
218
217
  .map(([key, value]) => `${key} = '${value}'`)
@@ -231,5 +230,101 @@ class CassandraStore extends base_js_1.VectorStore {
231
230
  const whereClause = filter ? this.buildWhereClause(filter) : "";
232
231
  return `SELECT * FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF [${query}] LIMIT ${k}`;
233
232
  }
233
+ /**
234
+ * Method for inserting vectors and documents into the Cassandra database in a batch.
235
+ * @param batchVectors The list of vectors to insert.
236
+ * @param batchDocuments The list of documents to insert.
237
+ * @returns Promise that resolves when the batch has been inserted.
238
+ */
239
+ async executeInsert(batchVectors, batchDocuments) {
240
+ // Input validation: Check if the lengths of batchVectors and batchDocuments are the same
241
+ if (batchVectors.length !== batchDocuments.length) {
242
+ throw new Error(`The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.`);
243
+ }
244
+ // Initialize an array to hold query objects
245
+ const queries = [];
246
+ // Loop through each vector and document in the batch
247
+ for (let i = 0; i < batchVectors.length; i += 1) {
248
+ // Convert the list of numbers to a Float32Array, the driver's expected format of a vector
249
+ const preparedVector = new Float32Array(batchVectors[i]);
250
+ // Retrieve the corresponding document
251
+ const document = batchDocuments[i];
252
+ // Extract metadata column names and values from the document
253
+ const metadataColNames = Object.keys(document.metadata);
254
+ const metadataVals = Object.values(document.metadata);
255
+ // Prepare the metadata columns string for the query, if metadata exists
256
+ const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
257
+ // Construct the query string and parameters
258
+ const query = {
259
+ query: `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert})
260
+ VALUES (?, ?${", ?".repeat(metadataColNames.length)})`,
261
+ params: [preparedVector, document.pageContent, ...metadataVals],
262
+ };
263
+ // Add the query to the list
264
+ queries.push(query);
265
+ }
266
+ // Execute the queries: use a batch if multiple, otherwise execute a single query
267
+ if (queries.length === 1) {
268
+ await this.client.execute(queries[0].query, queries[0].params, {
269
+ prepare: true,
270
+ });
271
+ }
272
+ else {
273
+ await this.client.batch(queries, { prepare: true, logged: false });
274
+ }
275
+ }
276
+ /**
277
+ * Method for inserting vectors and documents into the Cassandra database in
278
+ * parallel, keeping within maxConcurrency number of active insert statements.
279
+ * @param vectors The vectors to insert.
280
+ * @param documents The documents to insert.
281
+ * @returns Promise that resolves when the documents have been added.
282
+ */
283
+ async insertAll(vectors, documents) {
284
+ // Input validation: Check if the lengths of vectors and documents are the same
285
+ if (vectors.length !== documents.length) {
286
+ throw new Error(`The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.`);
287
+ }
288
+ // Early exit: If there are no vectors or documents to insert, return immediately
289
+ if (vectors.length === 0) {
290
+ return;
291
+ }
292
+ // Ensure the store is initialized before proceeding
293
+ if (!this.isInitialized) {
294
+ await this.initialize();
295
+ }
296
+ // Initialize an array to hold promises for each batch insert
297
+ const insertPromises = [];
298
+ // Buffers to hold the current batch of vectors and documents
299
+ let currentBatchVectors = [];
300
+ let currentBatchDocuments = [];
301
+ // Loop through each vector/document pair to insert; we use
302
+ // <= vectors.length to ensure the last batch is inserted
303
+ for (let i = 0; i <= vectors.length; i += 1) {
304
+ // Check if we're still within the array boundaries
305
+ if (i < vectors.length) {
306
+ // Add the current vector and document to the batch
307
+ currentBatchVectors.push(vectors[i]);
308
+ currentBatchDocuments.push(documents[i]);
309
+ }
310
+ // Check if we've reached the batch size or end of the array
311
+ if (currentBatchVectors.length >= this.batchSize ||
312
+ i === vectors.length) {
313
+ // Only proceed if there are items in the current batch
314
+ if (currentBatchVectors.length > 0) {
315
+ // Create copies of the current batch arrays to use in the async insert operation
316
+ const batchVectors = [...currentBatchVectors];
317
+ const batchDocuments = [...currentBatchDocuments];
318
+ // Execute the insert using the AsyncCaller - it will handle concurrency and queueing.
319
+ insertPromises.push(this.asyncCaller.call(() => this.executeInsert(batchVectors, batchDocuments)));
320
+ // Clear the current buffers for the next iteration
321
+ currentBatchVectors = [];
322
+ currentBatchDocuments = [];
323
+ }
324
+ }
325
+ }
326
+ // Wait for all insert operations to complete.
327
+ await Promise.all(insertPromises);
328
+ }
234
329
  }
235
330
  exports.CassandraStore = CassandraStore;
@@ -1,4 +1,5 @@
1
1
  import { DseClientOptions } from "cassandra-driver";
2
+ import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js";
2
3
  import { Embeddings } from "../embeddings/base.js";
3
4
  import { VectorStore } from "./base.js";
4
5
  import { Document } from "../document.js";
@@ -10,13 +11,14 @@ export interface Index {
10
11
  name: string;
11
12
  value: string;
12
13
  }
13
- export interface CassandraLibArgs extends DseClientOptions {
14
+ export interface CassandraLibArgs extends DseClientOptions, AsyncCallerParams {
14
15
  table: string;
15
16
  keyspace: string;
16
17
  dimensions: number;
17
18
  primaryKey: Column;
18
19
  metadataColumns: Column[];
19
- indices: Index[];
20
+ indices?: Index[];
21
+ batchSize?: number;
20
22
  }
21
23
  /**
22
24
  * Class for interacting with the Cassandra database. It extends the
@@ -34,6 +36,8 @@ export declare class CassandraStore extends VectorStore {
34
36
  private readonly table;
35
37
  private indices;
36
38
  private isInitialized;
39
+ asyncCaller: AsyncCaller;
40
+ private readonly batchSize;
37
41
  _vectorstoreType(): string;
38
42
  constructor(embeddings: Embeddings, args: CassandraLibArgs);
39
43
  /**
@@ -87,14 +91,6 @@ export declare class CassandraStore extends VectorStore {
87
91
  * @returns Promise that resolves when the database has been initialized.
88
92
  */
89
93
  private initialize;
90
- /**
91
- * Method to build an CQL query for inserting vectors and documents into
92
- * the Cassandra database.
93
- * @param vectors The vectors to insert.
94
- * @param documents The documents to insert.
95
- * @returns The CQL query string.
96
- */
97
- private buildInsertQuery;
98
94
  private buildWhereClause;
99
95
  /**
100
96
  * Method to build an CQL query for searching for similar vectors in the
@@ -105,4 +101,19 @@ export declare class CassandraStore extends VectorStore {
105
101
  * @returns The CQL query string.
106
102
  */
107
103
  private buildSearchQuery;
104
+ /**
105
+ * Method for inserting vectors and documents into the Cassandra database in a batch.
106
+ * @param batchVectors The list of vectors to insert.
107
+ * @param batchDocuments The list of documents to insert.
108
+ * @returns Promise that resolves when the batch has been inserted.
109
+ */
110
+ private executeInsert;
111
+ /**
112
+ * Method for inserting vectors and documents into the Cassandra database in
113
+ * parallel, keeping within maxConcurrency number of active insert statements.
114
+ * @param vectors The vectors to insert.
115
+ * @param documents The documents to insert.
116
+ * @returns Promise that resolves when the documents have been added.
117
+ */
118
+ private insertAll;
108
119
  }
@@ -1,5 +1,6 @@
1
1
  /* eslint-disable prefer-template */
2
2
  import { Client as CassandraClient } from "cassandra-driver";
3
+ import { AsyncCaller } from "../util/async_caller.js";
3
4
  import { VectorStore } from "./base.js";
4
5
  import { Document } from "../document.js";
5
6
  /**
@@ -13,7 +14,13 @@ export class CassandraStore extends VectorStore {
13
14
  return "cassandra";
14
15
  }
15
16
  constructor(embeddings, args) {
16
- super(embeddings, args);
17
+ const argsWithDefaults = {
18
+ indices: [],
19
+ maxConcurrency: 25,
20
+ batchSize: 1,
21
+ ...args,
22
+ };
23
+ super(embeddings, argsWithDefaults);
17
24
  Object.defineProperty(this, "client", {
18
25
  enumerable: true,
19
26
  configurable: true,
@@ -62,13 +69,31 @@ export class CassandraStore extends VectorStore {
62
69
  writable: true,
63
70
  value: false
64
71
  });
65
- this.client = new CassandraClient(args);
66
- this.dimensions = args.dimensions;
67
- this.keyspace = args.keyspace;
68
- this.table = args.table;
69
- this.primaryKey = args.primaryKey;
70
- this.metadataColumns = args.metadataColumns;
71
- this.indices = args.indices;
72
+ Object.defineProperty(this, "asyncCaller", {
73
+ enumerable: true,
74
+ configurable: true,
75
+ writable: true,
76
+ value: void 0
77
+ });
78
+ Object.defineProperty(this, "batchSize", {
79
+ enumerable: true,
80
+ configurable: true,
81
+ writable: true,
82
+ value: void 0
83
+ });
84
+ this.asyncCaller = new AsyncCaller(argsWithDefaults ?? {});
85
+ this.client = new CassandraClient(argsWithDefaults);
86
+ this.dimensions = argsWithDefaults.dimensions;
87
+ this.keyspace = argsWithDefaults.keyspace;
88
+ this.table = argsWithDefaults.table;
89
+ this.primaryKey = argsWithDefaults.primaryKey;
90
+ this.metadataColumns = argsWithDefaults.metadataColumns;
91
+ this.indices = argsWithDefaults.indices;
92
+ this.batchSize = argsWithDefaults.batchSize;
93
+ if (this.batchSize < 1) {
94
+ console.warn("batchSize must be greater than or equal to 1, defaulting to 1");
95
+ this.batchSize = 1;
96
+ }
72
97
  }
73
98
  /**
74
99
  * Method to save vectors to the Cassandra database.
@@ -83,8 +108,7 @@ export class CassandraStore extends VectorStore {
83
108
  if (!this.isInitialized) {
84
109
  await this.initialize();
85
110
  }
86
- const queries = this.buildInsertQuery(vectors, documents);
87
- await this.client.batch(queries);
111
+ await this.insertAll(vectors, documents);
88
112
  }
89
113
  /**
90
114
  * Method to add documents to the Cassandra database.
@@ -185,31 +209,6 @@ export class CassandraStore extends VectorStore {
185
209
  }
186
210
  this.isInitialized = true;
187
211
  }
188
- /**
189
- * Method to build an CQL query for inserting vectors and documents into
190
- * the Cassandra database.
191
- * @param vectors The vectors to insert.
192
- * @param documents The documents to insert.
193
- * @returns The CQL query string.
194
- */
195
- buildInsertQuery(vectors, documents) {
196
- const queries = [];
197
- for (let index = 0; index < vectors.length; index += 1) {
198
- const vector = vectors[index];
199
- const document = documents[index];
200
- const metadataColNames = Object.keys(document.metadata);
201
- const metadataVals = Object.values(document.metadata);
202
- const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
203
- const query = `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert}) VALUES ([${vector}], '${document.pageContent}'${metadataVals.length > 0
204
- ? ", " +
205
- metadataVals
206
- .map((val) => (typeof val === "number" ? val : `'${val}'`))
207
- .join(", ")
208
- : ""});`;
209
- queries.push(query);
210
- }
211
- return queries;
212
- }
213
212
  buildWhereClause(filter) {
214
213
  const whereClause = Object.entries(filter)
215
214
  .map(([key, value]) => `${key} = '${value}'`)
@@ -228,4 +227,100 @@ export class CassandraStore extends VectorStore {
228
227
  const whereClause = filter ? this.buildWhereClause(filter) : "";
229
228
  return `SELECT * FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF [${query}] LIMIT ${k}`;
230
229
  }
230
+ /**
231
+ * Method for inserting vectors and documents into the Cassandra database in a batch.
232
+ * @param batchVectors The list of vectors to insert.
233
+ * @param batchDocuments The list of documents to insert.
234
+ * @returns Promise that resolves when the batch has been inserted.
235
+ */
236
+ async executeInsert(batchVectors, batchDocuments) {
237
+ // Input validation: Check if the lengths of batchVectors and batchDocuments are the same
238
+ if (batchVectors.length !== batchDocuments.length) {
239
+ throw new Error(`The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.`);
240
+ }
241
+ // Initialize an array to hold query objects
242
+ const queries = [];
243
+ // Loop through each vector and document in the batch
244
+ for (let i = 0; i < batchVectors.length; i += 1) {
245
+ // Convert the list of numbers to a Float32Array, the driver's expected format of a vector
246
+ const preparedVector = new Float32Array(batchVectors[i]);
247
+ // Retrieve the corresponding document
248
+ const document = batchDocuments[i];
249
+ // Extract metadata column names and values from the document
250
+ const metadataColNames = Object.keys(document.metadata);
251
+ const metadataVals = Object.values(document.metadata);
252
+ // Prepare the metadata columns string for the query, if metadata exists
253
+ const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
254
+ // Construct the query string and parameters
255
+ const query = {
256
+ query: `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert})
257
+ VALUES (?, ?${", ?".repeat(metadataColNames.length)})`,
258
+ params: [preparedVector, document.pageContent, ...metadataVals],
259
+ };
260
+ // Add the query to the list
261
+ queries.push(query);
262
+ }
263
+ // Execute the queries: use a batch if multiple, otherwise execute a single query
264
+ if (queries.length === 1) {
265
+ await this.client.execute(queries[0].query, queries[0].params, {
266
+ prepare: true,
267
+ });
268
+ }
269
+ else {
270
+ await this.client.batch(queries, { prepare: true, logged: false });
271
+ }
272
+ }
273
+ /**
274
+ * Method for inserting vectors and documents into the Cassandra database in
275
+ * parallel, keeping within maxConcurrency number of active insert statements.
276
+ * @param vectors The vectors to insert.
277
+ * @param documents The documents to insert.
278
+ * @returns Promise that resolves when the documents have been added.
279
+ */
280
+ async insertAll(vectors, documents) {
281
+ // Input validation: Check if the lengths of vectors and documents are the same
282
+ if (vectors.length !== documents.length) {
283
+ throw new Error(`The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.`);
284
+ }
285
+ // Early exit: If there are no vectors or documents to insert, return immediately
286
+ if (vectors.length === 0) {
287
+ return;
288
+ }
289
+ // Ensure the store is initialized before proceeding
290
+ if (!this.isInitialized) {
291
+ await this.initialize();
292
+ }
293
+ // Initialize an array to hold promises for each batch insert
294
+ const insertPromises = [];
295
+ // Buffers to hold the current batch of vectors and documents
296
+ let currentBatchVectors = [];
297
+ let currentBatchDocuments = [];
298
+ // Loop through each vector/document pair to insert; we use
299
+ // <= vectors.length to ensure the last batch is inserted
300
+ for (let i = 0; i <= vectors.length; i += 1) {
301
+ // Check if we're still within the array boundaries
302
+ if (i < vectors.length) {
303
+ // Add the current vector and document to the batch
304
+ currentBatchVectors.push(vectors[i]);
305
+ currentBatchDocuments.push(documents[i]);
306
+ }
307
+ // Check if we've reached the batch size or end of the array
308
+ if (currentBatchVectors.length >= this.batchSize ||
309
+ i === vectors.length) {
310
+ // Only proceed if there are items in the current batch
311
+ if (currentBatchVectors.length > 0) {
312
+ // Create copies of the current batch arrays to use in the async insert operation
313
+ const batchVectors = [...currentBatchVectors];
314
+ const batchDocuments = [...currentBatchDocuments];
315
+ // Execute the insert using the AsyncCaller - it will handle concurrency and queueing.
316
+ insertPromises.push(this.asyncCaller.call(() => this.executeInsert(batchVectors, batchDocuments)));
317
+ // Clear the current buffers for the next iteration
318
+ currentBatchVectors = [];
319
+ currentBatchDocuments = [];
320
+ }
321
+ }
322
+ }
323
+ // Wait for all insert operations to complete.
324
+ await Promise.all(insertPromises);
325
+ }
231
326
  }
@@ -74,6 +74,12 @@ class PGVectorStore extends base_js_1.VectorStore {
74
74
  writable: true,
75
75
  value: void 0
76
76
  });
77
+ Object.defineProperty(this, "chunkSize", {
78
+ enumerable: true,
79
+ configurable: true,
80
+ writable: true,
81
+ value: 500
82
+ });
77
83
  this.tableName = config.tableName;
78
84
  this.filter = config.filter;
79
85
  this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding";
@@ -82,6 +88,7 @@ class PGVectorStore extends base_js_1.VectorStore {
82
88
  this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata";
83
89
  const pool = new pg_1.default.Pool(config.postgresConnectionOptions);
84
90
  this.pool = pool;
91
+ this.chunkSize = config.chunkSize ?? 500;
85
92
  this._verbose =
86
93
  (0, env_js_1.getEnvironmentVariable)("LANGCHAIN_VERBOSE") === "true" ??
87
94
  !!config.verbose;
@@ -132,9 +139,9 @@ class PGVectorStore extends base_js_1.VectorStore {
132
139
  * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning.
133
140
  * @returns The complete SQL INSERT INTO query string.
134
141
  */
135
- buildInsertQuery(rows, chunkIndex) {
142
+ buildInsertQuery(rows) {
136
143
  const valuesPlaceholders = rows
137
- .map((_, j) => this.generatePlaceholderForRowAt(chunkIndex + j))
144
+ .map((_, j) => this.generatePlaceholderForRowAt(j))
138
145
  .join(", ");
139
146
  const text = `
140
147
  INSERT INTO ${this.tableName}(
@@ -163,10 +170,9 @@ class PGVectorStore extends base_js_1.VectorStore {
163
170
  documents[idx].metadata,
164
171
  ];
165
172
  });
166
- const chunkSize = 500;
167
- for (let i = 0; i < rows.length; i += chunkSize) {
168
- const chunk = rows.slice(i, i + chunkSize);
169
- const insertQuery = this.buildInsertQuery(chunk, i);
173
+ for (let i = 0; i < rows.length; i += this.chunkSize) {
174
+ const chunk = rows.slice(i, i + this.chunkSize);
175
+ const insertQuery = this.buildInsertQuery(chunk);
170
176
  const flatValues = chunk.flat();
171
177
  try {
172
178
  await this.pool.query(insertQuery, flatValues);
@@ -270,7 +276,7 @@ class PGVectorStore extends base_js_1.VectorStore {
270
276
  * @returns Promise that resolves when all clients are closed and the pool is terminated.
271
277
  */
272
278
  async end() {
273
- await this.client?.release();
279
+ this.client?.release();
274
280
  return this.pool.end();
275
281
  }
276
282
  }