langchain 0.0.172 → 0.0.174

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/agents/openai/output_parser.cjs +1 -0
  2. package/agents/openai/output_parser.d.ts +1 -0
  3. package/agents/openai/output_parser.js +1 -0
  4. package/agents/xml/output_parser.cjs +1 -0
  5. package/agents/xml/output_parser.d.ts +1 -0
  6. package/agents/xml/output_parser.js +1 -0
  7. package/dist/agents/index.cjs +3 -1
  8. package/dist/agents/index.d.ts +1 -0
  9. package/dist/agents/index.js +1 -0
  10. package/dist/agents/openai/index.cjs +8 -31
  11. package/dist/agents/openai/index.d.ts +2 -0
  12. package/dist/agents/openai/index.js +8 -31
  13. package/dist/agents/openai/output_parser.cjs +65 -0
  14. package/dist/agents/openai/output_parser.d.ts +22 -0
  15. package/dist/agents/openai/output_parser.js +61 -0
  16. package/dist/agents/toolkits/conversational_retrieval/tool.cjs +2 -1
  17. package/dist/agents/toolkits/conversational_retrieval/tool.js +2 -1
  18. package/dist/agents/xml/index.cjs +9 -25
  19. package/dist/agents/xml/index.d.ts +2 -7
  20. package/dist/agents/xml/index.js +8 -23
  21. package/dist/agents/xml/output_parser.cjs +44 -0
  22. package/dist/agents/xml/output_parser.d.ts +14 -0
  23. package/dist/agents/xml/output_parser.js +40 -0
  24. package/dist/callbacks/manager.cjs +2 -1
  25. package/dist/callbacks/manager.js +2 -1
  26. package/dist/document_loaders/fs/pdf.cjs +2 -1
  27. package/dist/document_loaders/fs/pdf.js +2 -1
  28. package/dist/document_loaders/web/pdf.cjs +2 -1
  29. package/dist/document_loaders/web/pdf.js +2 -1
  30. package/dist/load/import_map.cjs +5 -2
  31. package/dist/load/import_map.d.ts +3 -0
  32. package/dist/load/import_map.js +3 -0
  33. package/dist/memory/index.cjs +2 -1
  34. package/dist/memory/index.d.ts +1 -1
  35. package/dist/memory/index.js +1 -1
  36. package/dist/memory/vector_store.cjs +2 -1
  37. package/dist/memory/vector_store.js +2 -1
  38. package/dist/prompts/selectors/SemanticSimilarityExampleSelector.cjs +15 -18
  39. package/dist/prompts/selectors/SemanticSimilarityExampleSelector.d.ts +12 -6
  40. package/dist/prompts/selectors/SemanticSimilarityExampleSelector.js +15 -18
  41. package/dist/storage/file_system.cjs +31 -11
  42. package/dist/storage/file_system.js +9 -9
  43. package/dist/tools/index.cjs +3 -1
  44. package/dist/tools/index.d.ts +1 -0
  45. package/dist/tools/index.js +1 -0
  46. package/dist/tools/render.cjs +19 -1
  47. package/dist/tools/render.d.ts +12 -0
  48. package/dist/tools/render.js +17 -0
  49. package/dist/tools/serpapi.d.ts +2 -2
  50. package/dist/tools/webbrowser.cjs +2 -1
  51. package/dist/tools/webbrowser.js +2 -1
  52. package/dist/util/document.cjs +12 -0
  53. package/dist/util/document.d.ts +9 -0
  54. package/dist/util/document.js +8 -0
  55. package/dist/vectorstores/cassandra.cjs +130 -35
  56. package/dist/vectorstores/cassandra.d.ts +21 -10
  57. package/dist/vectorstores/cassandra.js +130 -35
  58. package/dist/vectorstores/pgvector.cjs +13 -7
  59. package/dist/vectorstores/pgvector.d.ts +7 -0
  60. package/dist/vectorstores/pgvector.js +13 -7
  61. package/dist/vectorstores/pinecone.cjs +46 -9
  62. package/dist/vectorstores/pinecone.d.ts +20 -2
  63. package/dist/vectorstores/pinecone.js +46 -9
  64. package/package.json +27 -3
  65. package/util/document.cjs +1 -0
  66. package/util/document.d.ts +1 -0
  67. package/util/document.js +1 -0
@@ -66,8 +66,8 @@ export interface SerpAPIParameters extends BaseParameters {
66
66
  * Additional Google Place ID
67
67
  * Parameter that you might have to use to force the knowledge graph map view to
68
68
  * show up. You can find the lsig ID by using our [Local Pack
69
- * API](https://serpapi.com/local-pack) or [Places Results
70
- * API](https://serpapi.com/places-results).
69
+ * API](https://serpapi.com/local-pack) or [Local Places Results
70
+ * API](https://serpapi.com/local-results).
71
71
  * lsig ID is also available via a redirect Google uses within [Google My
72
72
  * Business](https://www.google.com/business/).
73
73
  */
@@ -35,6 +35,7 @@ const memory_js_1 = require("../vectorstores/memory.cjs");
35
35
  const document_js_1 = require("../document.cjs");
36
36
  const base_js_1 = require("./base.cjs");
37
37
  const axios_fetch_adapter_js_1 = __importDefault(require("../util/axios-fetch-adapter.cjs"));
38
+ const document_js_2 = require("../util/document.cjs");
38
39
  const parseInputs = (inputs) => {
39
40
  const [baseUrl, task] = inputs.split(",").map((input) => {
40
41
  let t = input.trim();
@@ -234,7 +235,7 @@ class WebBrowser extends base_js_1.Tool {
234
235
  }));
235
236
  const vectorStore = await memory_js_1.MemoryVectorStore.fromDocuments(docs, this.embeddings);
236
237
  const results = await vectorStore.similaritySearch(task, 4, undefined, runManager?.getChild("vectorstore"));
237
- context = results.map((res) => res.pageContent).join("\n");
238
+ context = (0, document_js_2.formatDocumentsAsString)(results, "\n");
238
239
  }
239
240
  const input = `Text:${context}\n\nI need ${doSummary ? "a summary" : task} from the above text, also provide up to 5 markdown links from within that would be of interest (always including URL and text). Links should be provided, if present, in markdown syntax as a list under the heading "Relevant Links:".`;
240
241
  return this.model.predict(input, undefined, runManager?.getChild());
@@ -6,6 +6,7 @@ import { MemoryVectorStore } from "../vectorstores/memory.js";
6
6
  import { Document } from "../document.js";
7
7
  import { Tool } from "./base.js";
8
8
  import fetchAdapter from "../util/axios-fetch-adapter.js";
9
+ import { formatDocumentsAsString } from "../util/document.js";
9
10
  export const parseInputs = (inputs) => {
10
11
  const [baseUrl, task] = inputs.split(",").map((input) => {
11
12
  let t = input.trim();
@@ -203,7 +204,7 @@ export class WebBrowser extends Tool {
203
204
  }));
204
205
  const vectorStore = await MemoryVectorStore.fromDocuments(docs, this.embeddings);
205
206
  const results = await vectorStore.similaritySearch(task, 4, undefined, runManager?.getChild("vectorstore"));
206
- context = results.map((res) => res.pageContent).join("\n");
207
+ context = formatDocumentsAsString(results, "\n");
207
208
  }
208
209
  const input = `Text:${context}\n\nI need ${doSummary ? "a summary" : task} from the above text, also provide up to 5 markdown links from within that would be of interest (always including URL and text). Links should be provided, if present, in markdown syntax as a list under the heading "Relevant Links:".`;
209
210
  return this.model.predict(input, undefined, runManager?.getChild());
@@ -0,0 +1,12 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.formatDocumentsAsString = void 0;
4
+ /**
5
+ * Given a list of documents, this util formats their contents
6
+ * into a string, separated by newlines.
7
+ *
8
+ * @param documents
9
+ * @returns A string of the documents page content, separated by newlines.
10
+ */
11
+ const formatDocumentsAsString = (documents, separator = "\n\n") => documents.map((doc) => doc.pageContent).join(separator);
12
+ exports.formatDocumentsAsString = formatDocumentsAsString;
@@ -0,0 +1,9 @@
1
+ import { Document } from "../document.js";
2
+ /**
3
+ * Given a list of documents, this util formats their contents
4
+ * into a string, separated by newlines.
5
+ *
6
+ * @param documents
7
+ * @returns A string of the documents page content, separated by newlines.
8
+ */
9
+ export declare const formatDocumentsAsString: (documents: Document[], separator?: string) => string;
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Given a list of documents, this util formats their contents
3
+ * into a string, separated by newlines.
4
+ *
5
+ * @param documents
6
+ * @returns A string of the documents page content, separated by newlines.
7
+ */
8
+ export const formatDocumentsAsString = (documents, separator = "\n\n") => documents.map((doc) => doc.pageContent).join(separator);
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.CassandraStore = void 0;
4
4
  /* eslint-disable prefer-template */
5
5
  const cassandra_driver_1 = require("cassandra-driver");
6
+ const async_caller_js_1 = require("../util/async_caller.cjs");
6
7
  const base_js_1 = require("./base.cjs");
7
8
  const document_js_1 = require("../document.cjs");
8
9
  /**
@@ -16,7 +17,13 @@ class CassandraStore extends base_js_1.VectorStore {
16
17
  return "cassandra";
17
18
  }
18
19
  constructor(embeddings, args) {
19
- super(embeddings, args);
20
+ const argsWithDefaults = {
21
+ indices: [],
22
+ maxConcurrency: 25,
23
+ batchSize: 1,
24
+ ...args,
25
+ };
26
+ super(embeddings, argsWithDefaults);
20
27
  Object.defineProperty(this, "client", {
21
28
  enumerable: true,
22
29
  configurable: true,
@@ -65,13 +72,31 @@ class CassandraStore extends base_js_1.VectorStore {
65
72
  writable: true,
66
73
  value: false
67
74
  });
68
- this.client = new cassandra_driver_1.Client(args);
69
- this.dimensions = args.dimensions;
70
- this.keyspace = args.keyspace;
71
- this.table = args.table;
72
- this.primaryKey = args.primaryKey;
73
- this.metadataColumns = args.metadataColumns;
74
- this.indices = args.indices;
75
+ Object.defineProperty(this, "asyncCaller", {
76
+ enumerable: true,
77
+ configurable: true,
78
+ writable: true,
79
+ value: void 0
80
+ });
81
+ Object.defineProperty(this, "batchSize", {
82
+ enumerable: true,
83
+ configurable: true,
84
+ writable: true,
85
+ value: void 0
86
+ });
87
+ this.asyncCaller = new async_caller_js_1.AsyncCaller(argsWithDefaults ?? {});
88
+ this.client = new cassandra_driver_1.Client(argsWithDefaults);
89
+ this.dimensions = argsWithDefaults.dimensions;
90
+ this.keyspace = argsWithDefaults.keyspace;
91
+ this.table = argsWithDefaults.table;
92
+ this.primaryKey = argsWithDefaults.primaryKey;
93
+ this.metadataColumns = argsWithDefaults.metadataColumns;
94
+ this.indices = argsWithDefaults.indices;
95
+ this.batchSize = argsWithDefaults.batchSize;
96
+ if (this.batchSize < 1) {
97
+ console.warn("batchSize must be greater than or equal to 1, defaulting to 1");
98
+ this.batchSize = 1;
99
+ }
75
100
  }
76
101
  /**
77
102
  * Method to save vectors to the Cassandra database.
@@ -86,8 +111,7 @@ class CassandraStore extends base_js_1.VectorStore {
86
111
  if (!this.isInitialized) {
87
112
  await this.initialize();
88
113
  }
89
- const queries = this.buildInsertQuery(vectors, documents);
90
- await this.client.batch(queries);
114
+ await this.insertAll(vectors, documents);
91
115
  }
92
116
  /**
93
117
  * Method to add documents to the Cassandra database.
@@ -188,31 +212,6 @@ class CassandraStore extends base_js_1.VectorStore {
188
212
  }
189
213
  this.isInitialized = true;
190
214
  }
191
- /**
192
- * Method to build an CQL query for inserting vectors and documents into
193
- * the Cassandra database.
194
- * @param vectors The vectors to insert.
195
- * @param documents The documents to insert.
196
- * @returns The CQL query string.
197
- */
198
- buildInsertQuery(vectors, documents) {
199
- const queries = [];
200
- for (let index = 0; index < vectors.length; index += 1) {
201
- const vector = vectors[index];
202
- const document = documents[index];
203
- const metadataColNames = Object.keys(document.metadata);
204
- const metadataVals = Object.values(document.metadata);
205
- const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
206
- const query = `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert}) VALUES ([${vector}], '${document.pageContent}'${metadataVals.length > 0
207
- ? ", " +
208
- metadataVals
209
- .map((val) => (typeof val === "number" ? val : `'${val}'`))
210
- .join(", ")
211
- : ""});`;
212
- queries.push(query);
213
- }
214
- return queries;
215
- }
216
215
  buildWhereClause(filter) {
217
216
  const whereClause = Object.entries(filter)
218
217
  .map(([key, value]) => `${key} = '${value}'`)
@@ -231,5 +230,101 @@ class CassandraStore extends base_js_1.VectorStore {
231
230
  const whereClause = filter ? this.buildWhereClause(filter) : "";
232
231
  return `SELECT * FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF [${query}] LIMIT ${k}`;
233
232
  }
233
+ /**
234
+ * Method for inserting vectors and documents into the Cassandra database in a batch.
235
+ * @param batchVectors The list of vectors to insert.
236
+ * @param batchDocuments The list of documents to insert.
237
+ * @returns Promise that resolves when the batch has been inserted.
238
+ */
239
+ async executeInsert(batchVectors, batchDocuments) {
240
+ // Input validation: Check if the lengths of batchVectors and batchDocuments are the same
241
+ if (batchVectors.length !== batchDocuments.length) {
242
+ throw new Error(`The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.`);
243
+ }
244
+ // Initialize an array to hold query objects
245
+ const queries = [];
246
+ // Loop through each vector and document in the batch
247
+ for (let i = 0; i < batchVectors.length; i += 1) {
248
+ // Convert the list of numbers to a Float32Array, the driver's expected format of a vector
249
+ const preparedVector = new Float32Array(batchVectors[i]);
250
+ // Retrieve the corresponding document
251
+ const document = batchDocuments[i];
252
+ // Extract metadata column names and values from the document
253
+ const metadataColNames = Object.keys(document.metadata);
254
+ const metadataVals = Object.values(document.metadata);
255
+ // Prepare the metadata columns string for the query, if metadata exists
256
+ const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
257
+ // Construct the query string and parameters
258
+ const query = {
259
+ query: `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert})
260
+ VALUES (?, ?${", ?".repeat(metadataColNames.length)})`,
261
+ params: [preparedVector, document.pageContent, ...metadataVals],
262
+ };
263
+ // Add the query to the list
264
+ queries.push(query);
265
+ }
266
+ // Execute the queries: use a batch if multiple, otherwise execute a single query
267
+ if (queries.length === 1) {
268
+ await this.client.execute(queries[0].query, queries[0].params, {
269
+ prepare: true,
270
+ });
271
+ }
272
+ else {
273
+ await this.client.batch(queries, { prepare: true, logged: false });
274
+ }
275
+ }
276
+ /**
277
+ * Method for inserting vectors and documents into the Cassandra database in
278
+ * parallel, keeping within maxConcurrency number of active insert statements.
279
+ * @param vectors The vectors to insert.
280
+ * @param documents The documents to insert.
281
+ * @returns Promise that resolves when the documents have been added.
282
+ */
283
+ async insertAll(vectors, documents) {
284
+ // Input validation: Check if the lengths of vectors and documents are the same
285
+ if (vectors.length !== documents.length) {
286
+ throw new Error(`The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.`);
287
+ }
288
+ // Early exit: If there are no vectors or documents to insert, return immediately
289
+ if (vectors.length === 0) {
290
+ return;
291
+ }
292
+ // Ensure the store is initialized before proceeding
293
+ if (!this.isInitialized) {
294
+ await this.initialize();
295
+ }
296
+ // Initialize an array to hold promises for each batch insert
297
+ const insertPromises = [];
298
+ // Buffers to hold the current batch of vectors and documents
299
+ let currentBatchVectors = [];
300
+ let currentBatchDocuments = [];
301
+ // Loop through each vector/document pair to insert; we use
302
+ // <= vectors.length to ensure the last batch is inserted
303
+ for (let i = 0; i <= vectors.length; i += 1) {
304
+ // Check if we're still within the array boundaries
305
+ if (i < vectors.length) {
306
+ // Add the current vector and document to the batch
307
+ currentBatchVectors.push(vectors[i]);
308
+ currentBatchDocuments.push(documents[i]);
309
+ }
310
+ // Check if we've reached the batch size or end of the array
311
+ if (currentBatchVectors.length >= this.batchSize ||
312
+ i === vectors.length) {
313
+ // Only proceed if there are items in the current batch
314
+ if (currentBatchVectors.length > 0) {
315
+ // Create copies of the current batch arrays to use in the async insert operation
316
+ const batchVectors = [...currentBatchVectors];
317
+ const batchDocuments = [...currentBatchDocuments];
318
+ // Execute the insert using the AsyncCaller - it will handle concurrency and queueing.
319
+ insertPromises.push(this.asyncCaller.call(() => this.executeInsert(batchVectors, batchDocuments)));
320
+ // Clear the current buffers for the next iteration
321
+ currentBatchVectors = [];
322
+ currentBatchDocuments = [];
323
+ }
324
+ }
325
+ }
326
+ // Wait for all insert operations to complete.
327
+ await Promise.all(insertPromises);
328
+ }
234
329
  }
235
330
  exports.CassandraStore = CassandraStore;
@@ -1,4 +1,5 @@
1
1
  import { DseClientOptions } from "cassandra-driver";
2
+ import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js";
2
3
  import { Embeddings } from "../embeddings/base.js";
3
4
  import { VectorStore } from "./base.js";
4
5
  import { Document } from "../document.js";
@@ -10,13 +11,14 @@ export interface Index {
10
11
  name: string;
11
12
  value: string;
12
13
  }
13
- export interface CassandraLibArgs extends DseClientOptions {
14
+ export interface CassandraLibArgs extends DseClientOptions, AsyncCallerParams {
14
15
  table: string;
15
16
  keyspace: string;
16
17
  dimensions: number;
17
18
  primaryKey: Column;
18
19
  metadataColumns: Column[];
19
- indices: Index[];
20
+ indices?: Index[];
21
+ batchSize?: number;
20
22
  }
21
23
  /**
22
24
  * Class for interacting with the Cassandra database. It extends the
@@ -34,6 +36,8 @@ export declare class CassandraStore extends VectorStore {
34
36
  private readonly table;
35
37
  private indices;
36
38
  private isInitialized;
39
+ asyncCaller: AsyncCaller;
40
+ private readonly batchSize;
37
41
  _vectorstoreType(): string;
38
42
  constructor(embeddings: Embeddings, args: CassandraLibArgs);
39
43
  /**
@@ -87,14 +91,6 @@ export declare class CassandraStore extends VectorStore {
87
91
  * @returns Promise that resolves when the database has been initialized.
88
92
  */
89
93
  private initialize;
90
- /**
91
- * Method to build an CQL query for inserting vectors and documents into
92
- * the Cassandra database.
93
- * @param vectors The vectors to insert.
94
- * @param documents The documents to insert.
95
- * @returns The CQL query string.
96
- */
97
- private buildInsertQuery;
98
94
  private buildWhereClause;
99
95
  /**
100
96
  * Method to build an CQL query for searching for similar vectors in the
@@ -105,4 +101,19 @@ export declare class CassandraStore extends VectorStore {
105
101
  * @returns The CQL query string.
106
102
  */
107
103
  private buildSearchQuery;
104
+ /**
105
+ * Method for inserting vectors and documents into the Cassandra database in a batch.
106
+ * @param batchVectors The list of vectors to insert.
107
+ * @param batchDocuments The list of documents to insert.
108
+ * @returns Promise that resolves when the batch has been inserted.
109
+ */
110
+ private executeInsert;
111
+ /**
112
+ * Method for inserting vectors and documents into the Cassandra database in
113
+ * parallel, keeping within maxConcurrency number of active insert statements.
114
+ * @param vectors The vectors to insert.
115
+ * @param documents The documents to insert.
116
+ * @returns Promise that resolves when the documents have been added.
117
+ */
118
+ private insertAll;
108
119
  }
@@ -1,5 +1,6 @@
1
1
  /* eslint-disable prefer-template */
2
2
  import { Client as CassandraClient } from "cassandra-driver";
3
+ import { AsyncCaller } from "../util/async_caller.js";
3
4
  import { VectorStore } from "./base.js";
4
5
  import { Document } from "../document.js";
5
6
  /**
@@ -13,7 +14,13 @@ export class CassandraStore extends VectorStore {
13
14
  return "cassandra";
14
15
  }
15
16
  constructor(embeddings, args) {
16
- super(embeddings, args);
17
+ const argsWithDefaults = {
18
+ indices: [],
19
+ maxConcurrency: 25,
20
+ batchSize: 1,
21
+ ...args,
22
+ };
23
+ super(embeddings, argsWithDefaults);
17
24
  Object.defineProperty(this, "client", {
18
25
  enumerable: true,
19
26
  configurable: true,
@@ -62,13 +69,31 @@ export class CassandraStore extends VectorStore {
62
69
  writable: true,
63
70
  value: false
64
71
  });
65
- this.client = new CassandraClient(args);
66
- this.dimensions = args.dimensions;
67
- this.keyspace = args.keyspace;
68
- this.table = args.table;
69
- this.primaryKey = args.primaryKey;
70
- this.metadataColumns = args.metadataColumns;
71
- this.indices = args.indices;
72
+ Object.defineProperty(this, "asyncCaller", {
73
+ enumerable: true,
74
+ configurable: true,
75
+ writable: true,
76
+ value: void 0
77
+ });
78
+ Object.defineProperty(this, "batchSize", {
79
+ enumerable: true,
80
+ configurable: true,
81
+ writable: true,
82
+ value: void 0
83
+ });
84
+ this.asyncCaller = new AsyncCaller(argsWithDefaults ?? {});
85
+ this.client = new CassandraClient(argsWithDefaults);
86
+ this.dimensions = argsWithDefaults.dimensions;
87
+ this.keyspace = argsWithDefaults.keyspace;
88
+ this.table = argsWithDefaults.table;
89
+ this.primaryKey = argsWithDefaults.primaryKey;
90
+ this.metadataColumns = argsWithDefaults.metadataColumns;
91
+ this.indices = argsWithDefaults.indices;
92
+ this.batchSize = argsWithDefaults.batchSize;
93
+ if (this.batchSize < 1) {
94
+ console.warn("batchSize must be greater than or equal to 1, defaulting to 1");
95
+ this.batchSize = 1;
96
+ }
72
97
  }
73
98
  /**
74
99
  * Method to save vectors to the Cassandra database.
@@ -83,8 +108,7 @@ export class CassandraStore extends VectorStore {
83
108
  if (!this.isInitialized) {
84
109
  await this.initialize();
85
110
  }
86
- const queries = this.buildInsertQuery(vectors, documents);
87
- await this.client.batch(queries);
111
+ await this.insertAll(vectors, documents);
88
112
  }
89
113
  /**
90
114
  * Method to add documents to the Cassandra database.
@@ -185,31 +209,6 @@ export class CassandraStore extends VectorStore {
185
209
  }
186
210
  this.isInitialized = true;
187
211
  }
188
- /**
189
- * Method to build an CQL query for inserting vectors and documents into
190
- * the Cassandra database.
191
- * @param vectors The vectors to insert.
192
- * @param documents The documents to insert.
193
- * @returns The CQL query string.
194
- */
195
- buildInsertQuery(vectors, documents) {
196
- const queries = [];
197
- for (let index = 0; index < vectors.length; index += 1) {
198
- const vector = vectors[index];
199
- const document = documents[index];
200
- const metadataColNames = Object.keys(document.metadata);
201
- const metadataVals = Object.values(document.metadata);
202
- const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
203
- const query = `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert}) VALUES ([${vector}], '${document.pageContent}'${metadataVals.length > 0
204
- ? ", " +
205
- metadataVals
206
- .map((val) => (typeof val === "number" ? val : `'${val}'`))
207
- .join(", ")
208
- : ""});`;
209
- queries.push(query);
210
- }
211
- return queries;
212
- }
213
212
  buildWhereClause(filter) {
214
213
  const whereClause = Object.entries(filter)
215
214
  .map(([key, value]) => `${key} = '${value}'`)
@@ -228,4 +227,100 @@ export class CassandraStore extends VectorStore {
228
227
  const whereClause = filter ? this.buildWhereClause(filter) : "";
229
228
  return `SELECT * FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF [${query}] LIMIT ${k}`;
230
229
  }
230
+ /**
231
+ * Method for inserting vectors and documents into the Cassandra database in a batch.
232
+ * @param batchVectors The list of vectors to insert.
233
+ * @param batchDocuments The list of documents to insert.
234
+ * @returns Promise that resolves when the batch has been inserted.
235
+ */
236
+ async executeInsert(batchVectors, batchDocuments) {
237
+ // Input validation: Check if the lengths of batchVectors and batchDocuments are the same
238
+ if (batchVectors.length !== batchDocuments.length) {
239
+ throw new Error(`The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.`);
240
+ }
241
+ // Initialize an array to hold query objects
242
+ const queries = [];
243
+ // Loop through each vector and document in the batch
244
+ for (let i = 0; i < batchVectors.length; i += 1) {
245
+ // Convert the list of numbers to a Float32Array, the driver's expected format of a vector
246
+ const preparedVector = new Float32Array(batchVectors[i]);
247
+ // Retrieve the corresponding document
248
+ const document = batchDocuments[i];
249
+ // Extract metadata column names and values from the document
250
+ const metadataColNames = Object.keys(document.metadata);
251
+ const metadataVals = Object.values(document.metadata);
252
+ // Prepare the metadata columns string for the query, if metadata exists
253
+ const metadataInsert = metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : "";
254
+ // Construct the query string and parameters
255
+ const query = {
256
+ query: `INSERT INTO ${this.keyspace}.${this.table} (vector, text${metadataInsert})
257
+ VALUES (?, ?${", ?".repeat(metadataColNames.length)})`,
258
+ params: [preparedVector, document.pageContent, ...metadataVals],
259
+ };
260
+ // Add the query to the list
261
+ queries.push(query);
262
+ }
263
+ // Execute the queries: use a batch if multiple, otherwise execute a single query
264
+ if (queries.length === 1) {
265
+ await this.client.execute(queries[0].query, queries[0].params, {
266
+ prepare: true,
267
+ });
268
+ }
269
+ else {
270
+ await this.client.batch(queries, { prepare: true, logged: false });
271
+ }
272
+ }
273
+ /**
274
+ * Method for inserting vectors and documents into the Cassandra database in
275
+ * parallel, keeping within maxConcurrency number of active insert statements.
276
+ * @param vectors The vectors to insert.
277
+ * @param documents The documents to insert.
278
+ * @returns Promise that resolves when the documents have been added.
279
+ */
280
+ async insertAll(vectors, documents) {
281
+ // Input validation: Check if the lengths of vectors and documents are the same
282
+ if (vectors.length !== documents.length) {
283
+ throw new Error(`The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.`);
284
+ }
285
+ // Early exit: If there are no vectors or documents to insert, return immediately
286
+ if (vectors.length === 0) {
287
+ return;
288
+ }
289
+ // Ensure the store is initialized before proceeding
290
+ if (!this.isInitialized) {
291
+ await this.initialize();
292
+ }
293
+ // Initialize an array to hold promises for each batch insert
294
+ const insertPromises = [];
295
+ // Buffers to hold the current batch of vectors and documents
296
+ let currentBatchVectors = [];
297
+ let currentBatchDocuments = [];
298
+ // Loop through each vector/document pair to insert; we use
299
+ // <= vectors.length to ensure the last batch is inserted
300
+ for (let i = 0; i <= vectors.length; i += 1) {
301
+ // Check if we're still within the array boundaries
302
+ if (i < vectors.length) {
303
+ // Add the current vector and document to the batch
304
+ currentBatchVectors.push(vectors[i]);
305
+ currentBatchDocuments.push(documents[i]);
306
+ }
307
+ // Check if we've reached the batch size or end of the array
308
+ if (currentBatchVectors.length >= this.batchSize ||
309
+ i === vectors.length) {
310
+ // Only proceed if there are items in the current batch
311
+ if (currentBatchVectors.length > 0) {
312
+ // Create copies of the current batch arrays to use in the async insert operation
313
+ const batchVectors = [...currentBatchVectors];
314
+ const batchDocuments = [...currentBatchDocuments];
315
+ // Execute the insert using the AsyncCaller - it will handle concurrency and queueing.
316
+ insertPromises.push(this.asyncCaller.call(() => this.executeInsert(batchVectors, batchDocuments)));
317
+ // Clear the current buffers for the next iteration
318
+ currentBatchVectors = [];
319
+ currentBatchDocuments = [];
320
+ }
321
+ }
322
+ }
323
+ // Wait for all insert operations to complete.
324
+ await Promise.all(insertPromises);
325
+ }
231
326
  }
@@ -74,6 +74,12 @@ class PGVectorStore extends base_js_1.VectorStore {
74
74
  writable: true,
75
75
  value: void 0
76
76
  });
77
+ Object.defineProperty(this, "chunkSize", {
78
+ enumerable: true,
79
+ configurable: true,
80
+ writable: true,
81
+ value: 500
82
+ });
77
83
  this.tableName = config.tableName;
78
84
  this.filter = config.filter;
79
85
  this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding";
@@ -82,6 +88,7 @@ class PGVectorStore extends base_js_1.VectorStore {
82
88
  this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata";
83
89
  const pool = new pg_1.default.Pool(config.postgresConnectionOptions);
84
90
  this.pool = pool;
91
+ this.chunkSize = config.chunkSize ?? 500;
85
92
  this._verbose =
86
93
  (0, env_js_1.getEnvironmentVariable)("LANGCHAIN_VERBOSE") === "true" ??
87
94
  !!config.verbose;
@@ -132,9 +139,9 @@ class PGVectorStore extends base_js_1.VectorStore {
132
139
  * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning.
133
140
  * @returns The complete SQL INSERT INTO query string.
134
141
  */
135
- buildInsertQuery(rows, chunkIndex) {
142
+ buildInsertQuery(rows) {
136
143
  const valuesPlaceholders = rows
137
- .map((_, j) => this.generatePlaceholderForRowAt(chunkIndex + j))
144
+ .map((_, j) => this.generatePlaceholderForRowAt(j))
138
145
  .join(", ");
139
146
  const text = `
140
147
  INSERT INTO ${this.tableName}(
@@ -163,10 +170,9 @@ class PGVectorStore extends base_js_1.VectorStore {
163
170
  documents[idx].metadata,
164
171
  ];
165
172
  });
166
- const chunkSize = 500;
167
- for (let i = 0; i < rows.length; i += chunkSize) {
168
- const chunk = rows.slice(i, i + chunkSize);
169
- const insertQuery = this.buildInsertQuery(chunk, i);
173
+ for (let i = 0; i < rows.length; i += this.chunkSize) {
174
+ const chunk = rows.slice(i, i + this.chunkSize);
175
+ const insertQuery = this.buildInsertQuery(chunk);
170
176
  const flatValues = chunk.flat();
171
177
  try {
172
178
  await this.pool.query(insertQuery, flatValues);
@@ -270,7 +276,7 @@ class PGVectorStore extends base_js_1.VectorStore {
270
276
  * @returns Promise that resolves when all clients are closed and the pool is terminated.
271
277
  */
272
278
  async end() {
273
- await this.client?.release();
279
+ this.client?.release();
274
280
  return this.pool.end();
275
281
  }
276
282
  }
@@ -19,6 +19,12 @@ export interface PGVectorStoreArgs {
19
19
  };
20
20
  filter?: Metadata;
21
21
  verbose?: boolean;
22
+ /**
23
+ * The amount of documents to chunk by when
24
+ * adding vectors.
25
+ * @default 500
26
+ */
27
+ chunkSize?: number;
22
28
  }
23
29
  /**
24
30
  * Class that provides an interface to a Postgres vector database. It
@@ -37,6 +43,7 @@ export declare class PGVectorStore extends VectorStore {
37
43
  _verbose?: boolean;
38
44
  pool: Pool;
39
45
  client?: PoolClient;
46
+ chunkSize: number;
40
47
  _vectorstoreType(): string;
41
48
  private constructor();
42
49
  /**