langchain 0.0.132 → 0.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/agents/chat/outputParser.cjs +2 -1
  2. package/dist/agents/chat/outputParser.js +2 -1
  3. package/dist/agents/executor.cjs +106 -7
  4. package/dist/agents/executor.d.ts +23 -0
  5. package/dist/agents/executor.js +104 -6
  6. package/dist/agents/mrkl/outputParser.cjs +2 -1
  7. package/dist/agents/mrkl/outputParser.js +2 -1
  8. package/dist/chat_models/googlevertexai.cjs +1 -1
  9. package/dist/chat_models/googlevertexai.d.ts +2 -2
  10. package/dist/chat_models/googlevertexai.js +2 -2
  11. package/dist/chat_models/ollama.cjs +8 -8
  12. package/dist/chat_models/ollama.js +8 -8
  13. package/dist/document_loaders/web/notionapi.cjs +153 -74
  14. package/dist/document_loaders/web/notionapi.d.ts +19 -10
  15. package/dist/document_loaders/web/notionapi.js +154 -75
  16. package/dist/embeddings/googlevertexai.cjs +1 -1
  17. package/dist/embeddings/googlevertexai.d.ts +2 -2
  18. package/dist/embeddings/googlevertexai.js +2 -2
  19. package/dist/experimental/multimodal_embeddings/googlevertexai.cjs +1 -1
  20. package/dist/experimental/multimodal_embeddings/googlevertexai.d.ts +2 -2
  21. package/dist/experimental/multimodal_embeddings/googlevertexai.js +2 -2
  22. package/dist/llms/googlevertexai.cjs +1 -1
  23. package/dist/llms/googlevertexai.js +2 -2
  24. package/dist/load/import_constants.cjs +1 -0
  25. package/dist/load/import_constants.js +1 -0
  26. package/dist/schema/output_parser.cjs +2 -2
  27. package/dist/schema/output_parser.js +2 -2
  28. package/dist/tools/base.cjs +26 -2
  29. package/dist/tools/base.d.ts +9 -0
  30. package/dist/tools/base.js +24 -1
  31. package/dist/types/googlevertexai-types.d.ts +8 -3
  32. package/dist/util/googlevertexai-connection.cjs +49 -15
  33. package/dist/util/googlevertexai-connection.d.ts +12 -4
  34. package/dist/util/googlevertexai-connection.js +46 -13
  35. package/dist/vectorstores/googlevertexai.cjs +550 -0
  36. package/dist/vectorstores/googlevertexai.d.ts +180 -0
  37. package/dist/vectorstores/googlevertexai.js +519 -0
  38. package/dist/vectorstores/vectara.cjs +11 -2
  39. package/dist/vectorstores/vectara.d.ts +10 -1
  40. package/dist/vectorstores/vectara.js +11 -2
  41. package/package.json +10 -2
  42. package/vectorstores/googlevertexai.cjs +1 -0
  43. package/vectorstores/googlevertexai.d.ts +1 -0
  44. package/vectorstores/googlevertexai.js +1 -0
@@ -0,0 +1,180 @@
1
+ import { VectorStore } from "./base.js";
2
+ import { Embeddings } from "../embeddings/base.js";
3
+ import { Document, DocumentInput } from "../document.js";
4
+ import { GoogleVertexAIConnection } from "../util/googlevertexai-connection.js";
5
+ import { AsyncCaller, AsyncCallerCallOptions, AsyncCallerParams } from "../util/async_caller.js";
6
+ import { GoogleVertexAIConnectionParams, GoogleVertexAIResponse } from "../types/googlevertexai-types.js";
7
+ import { Docstore } from "../schema/index.js";
8
+ /**
9
+ * Allows us to create IdDocument classes that contain the ID.
10
+ */
11
+ export interface IdDocumentInput extends DocumentInput {
12
+ id?: string;
13
+ }
14
+ /**
15
+ * A Document that optionally includes the ID of the document.
16
+ */
17
+ export declare class IdDocument extends Document implements IdDocumentInput {
18
+ id?: string;
19
+ constructor(fields: IdDocumentInput);
20
+ }
21
+ interface IndexEndpointConnectionParams extends GoogleVertexAIConnectionParams {
22
+ indexEndpoint: string;
23
+ }
24
+ interface DeployedIndex {
25
+ id: string;
26
+ index: string;
27
+ }
28
+ interface IndexEndpointResponse extends GoogleVertexAIResponse {
29
+ data: {
30
+ deployedIndexes: DeployedIndex[];
31
+ publicEndpointDomainName: string;
32
+ };
33
+ }
34
+ declare class IndexEndpointConnection extends GoogleVertexAIConnection<AsyncCallerCallOptions, IndexEndpointResponse> {
35
+ indexEndpoint: string;
36
+ constructor(fields: IndexEndpointConnectionParams, caller: AsyncCaller);
37
+ buildUrl(): Promise<string>;
38
+ buildMethod(): string;
39
+ request(options: AsyncCallerCallOptions): Promise<IndexEndpointResponse>;
40
+ }
41
+ /**
42
+ * Used to represent parameters that are necessary to delete documents
43
+ * from the matching engine. These must be a list of string IDs
44
+ */
45
+ export interface MatchingEngineDeleteParams {
46
+ ids: string[];
47
+ }
48
+ interface RemoveDatapointParams extends GoogleVertexAIConnectionParams {
49
+ index: string;
50
+ }
51
+ interface RemoveDatapointResponse extends GoogleVertexAIResponse {
52
+ }
53
+ declare class RemoveDatapointConnection extends GoogleVertexAIConnection<AsyncCallerCallOptions, RemoveDatapointResponse> {
54
+ index: string;
55
+ constructor(fields: RemoveDatapointParams, caller: AsyncCaller);
56
+ buildUrl(): Promise<string>;
57
+ buildMethod(): string;
58
+ request(datapointIds: string[], options: AsyncCallerCallOptions): Promise<RemoveDatapointResponse>;
59
+ }
60
+ interface UpsertDatapointParams extends GoogleVertexAIConnectionParams {
61
+ index: string;
62
+ }
63
+ export interface Restriction {
64
+ namespace: string;
65
+ allowList?: string[];
66
+ denyList?: string[];
67
+ }
68
+ interface CrowdingTag {
69
+ crowdingAttribute: string;
70
+ }
71
+ interface IndexDatapoint {
72
+ datapointId: string;
73
+ featureVector: number[];
74
+ restricts?: Restriction[];
75
+ crowdingTag?: CrowdingTag;
76
+ }
77
+ interface UpsertDatapointResponse extends GoogleVertexAIResponse {
78
+ }
79
+ declare class UpsertDatapointConnection extends GoogleVertexAIConnection<AsyncCallerCallOptions, UpsertDatapointResponse> {
80
+ index: string;
81
+ constructor(fields: UpsertDatapointParams, caller: AsyncCaller);
82
+ buildUrl(): Promise<string>;
83
+ buildMethod(): string;
84
+ request(datapoints: IndexDatapoint[], options: AsyncCallerCallOptions): Promise<UpsertDatapointResponse>;
85
+ }
86
+ /**
87
+ * Information about the Matching Engine public API endpoint.
88
+ * Primarily exported to allow for testing.
89
+ */
90
+ export interface PublicAPIEndpointInfo {
91
+ apiEndpoint?: string;
92
+ deployedIndexId?: string;
93
+ }
94
+ /**
95
+ * Parameters necessary to configure the Matching Engine.
96
+ */
97
+ export interface MatchingEngineArgs extends GoogleVertexAIConnectionParams, IndexEndpointConnectionParams, UpsertDatapointParams {
98
+ docstore: Docstore;
99
+ callerParams?: AsyncCallerParams;
100
+ callerOptions?: AsyncCallerCallOptions;
101
+ apiEndpoint?: string;
102
+ deployedIndexId?: string;
103
+ }
104
+ /**
105
+ * A class that represents a connection to a Google Vertex AI Matching Engine
106
+ * instance.
107
+ */
108
+ export declare class MatchingEngine extends VectorStore implements MatchingEngineArgs {
109
+ FilterType: Restriction[];
110
+ /**
111
+ * Docstore that retains the document, stored by ID
112
+ */
113
+ docstore: Docstore;
114
+ /**
115
+ * The host to connect to for queries and upserts.
116
+ */
117
+ apiEndpoint: string;
118
+ apiVersion: string;
119
+ endpoint: string;
120
+ location: string;
121
+ /**
122
+ * The id for the index endpoint
123
+ */
124
+ indexEndpoint: string;
125
+ /**
126
+ * The id for the index
127
+ */
128
+ index: string;
129
+ /**
130
+ * The id for the "deployed index", which is an identifier in the
131
+ * index endpoint that references the index (but is not the index id)
132
+ */
133
+ deployedIndexId: string;
134
+ callerParams: AsyncCallerParams;
135
+ callerOptions: AsyncCallerCallOptions;
136
+ caller: AsyncCaller;
137
+ indexEndpointClient: IndexEndpointConnection;
138
+ removeDatapointClient: RemoveDatapointConnection;
139
+ upsertDatapointClient: UpsertDatapointConnection;
140
+ constructor(embeddings: Embeddings, args: MatchingEngineArgs);
141
+ _vectorstoreType(): string;
142
+ addDocuments(documents: Document[]): Promise<void>;
143
+ addVectors(vectors: number[][], documents: Document[]): Promise<void>;
144
+ cleanMetadata(documentMetadata: Record<string, any>): {
145
+ [key: string]: string | number | boolean | string[] | null;
146
+ };
147
+ /**
148
+ * Given the metadata from a document, convert it to an array of Restriction
149
+ * objects that may be passed to the Matching Engine and stored.
150
+ * The default implementation flattens any metadata and includes it as
151
+ * an "allowList". Subclasses can choose to convert some of these to
152
+ * "denyList" items or to add additional restrictions (for example, to format
153
+ * dates into a different structure or to add additional restrictions
154
+ * based on the date).
155
+ * @param documentMetadata - The metadata from a document
156
+ * @returns a Restriction[] (or an array of a subclass, from the FilterType)
157
+ */
158
+ metadataToRestrictions(documentMetadata: Record<string, any>): this["FilterType"];
159
+ /**
160
+ * Create an index datapoint for the vector and document id.
161
+ * If an id does not exist, create it and set the document to its value.
162
+ * @param vector
163
+ * @param document
164
+ */
165
+ buildDatapoint(vector: number[], document: IdDocument): IndexDatapoint;
166
+ delete(params: MatchingEngineDeleteParams): Promise<void>;
167
+ similaritySearchVectorWithScore(query: number[], k: number, filter?: this["FilterType"]): Promise<[Document, number][]>;
168
+ /**
169
+ * For this index endpoint, figure out what API Endpoint URL and deployed
170
+ * index ID should be used to do upserts and queries.
171
+ * Also sets the `apiEndpoint` and `deployedIndexId` property for future use.
172
+ * @return The URL
173
+ */
174
+ determinePublicAPIEndpoint(): Promise<PublicAPIEndpointInfo>;
175
+ getPublicAPIEndpoint(): Promise<string>;
176
+ getDeployedIndexId(): Promise<string>;
177
+ static fromTexts(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig: MatchingEngineArgs): Promise<VectorStore>;
178
+ static fromDocuments(docs: Document[], embeddings: Embeddings, dbConfig: MatchingEngineArgs): Promise<VectorStore>;
179
+ }
180
+ export {};
@@ -0,0 +1,519 @@
1
+ import * as uuid from "uuid";
2
+ import flatten from "flat";
3
+ import { VectorStore } from "./base.js";
4
+ import { Document } from "../document.js";
5
+ import { GoogleVertexAIConnection } from "../util/googlevertexai-connection.js";
6
+ import { AsyncCaller, } from "../util/async_caller.js";
7
+ /**
8
+ * A Document that optionally includes the ID of the document.
9
+ */
10
+ export class IdDocument extends Document {
11
+ constructor(fields) {
12
+ super(fields);
13
+ Object.defineProperty(this, "id", {
14
+ enumerable: true,
15
+ configurable: true,
16
+ writable: true,
17
+ value: void 0
18
+ });
19
+ this.id = fields.id;
20
+ }
21
+ }
22
+ class IndexEndpointConnection extends GoogleVertexAIConnection {
23
+ constructor(fields, caller) {
24
+ super(fields, caller);
25
+ Object.defineProperty(this, "indexEndpoint", {
26
+ enumerable: true,
27
+ configurable: true,
28
+ writable: true,
29
+ value: void 0
30
+ });
31
+ this.indexEndpoint = fields.indexEndpoint;
32
+ }
33
+ async buildUrl() {
34
+ const projectId = await this.auth.getProjectId();
35
+ const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexEndpoints/${this.indexEndpoint}`;
36
+ return url;
37
+ }
38
+ buildMethod() {
39
+ return "GET";
40
+ }
41
+ async request(options) {
42
+ return this._request(undefined, options);
43
+ }
44
+ }
45
+ class RemoveDatapointConnection extends GoogleVertexAIConnection {
46
+ constructor(fields, caller) {
47
+ super(fields, caller);
48
+ Object.defineProperty(this, "index", {
49
+ enumerable: true,
50
+ configurable: true,
51
+ writable: true,
52
+ value: void 0
53
+ });
54
+ this.index = fields.index;
55
+ }
56
+ async buildUrl() {
57
+ const projectId = await this.auth.getProjectId();
58
+ const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexes/${this.index}:removeDatapoints`;
59
+ return url;
60
+ }
61
+ buildMethod() {
62
+ return "POST";
63
+ }
64
+ async request(datapointIds, options) {
65
+ const data = {
66
+ datapointIds,
67
+ };
68
+ return this._request(data, options);
69
+ }
70
+ }
71
+ class UpsertDatapointConnection extends GoogleVertexAIConnection {
72
+ constructor(fields, caller) {
73
+ super(fields, caller);
74
+ Object.defineProperty(this, "index", {
75
+ enumerable: true,
76
+ configurable: true,
77
+ writable: true,
78
+ value: void 0
79
+ });
80
+ this.index = fields.index;
81
+ }
82
+ async buildUrl() {
83
+ const projectId = await this.auth.getProjectId();
84
+ const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexes/${this.index}:upsertDatapoints`;
85
+ return url;
86
+ }
87
+ buildMethod() {
88
+ return "POST";
89
+ }
90
+ async request(datapoints, options) {
91
+ const data = {
92
+ datapoints,
93
+ };
94
+ return this._request(data, options);
95
+ }
96
+ }
97
+ class FindNeighborsConnection extends GoogleVertexAIConnection {
98
+ constructor(params, caller) {
99
+ super(params, caller);
100
+ Object.defineProperty(this, "indexEndpoint", {
101
+ enumerable: true,
102
+ configurable: true,
103
+ writable: true,
104
+ value: void 0
105
+ });
106
+ Object.defineProperty(this, "deployedIndexId", {
107
+ enumerable: true,
108
+ configurable: true,
109
+ writable: true,
110
+ value: void 0
111
+ });
112
+ this.indexEndpoint = params.indexEndpoint;
113
+ this.deployedIndexId = params.deployedIndexId;
114
+ }
115
+ async buildUrl() {
116
+ const projectId = await this.auth.getProjectId();
117
+ const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexEndpoints/${this.indexEndpoint}:findNeighbors`;
118
+ return url;
119
+ }
120
+ buildMethod() {
121
+ return "POST";
122
+ }
123
+ async request(request, options) {
124
+ return this._request(request, options);
125
+ }
126
+ }
127
+ /**
128
+ * A class that represents a connection to a Google Vertex AI Matching Engine
129
+ * instance.
130
+ */
131
+ export class MatchingEngine extends VectorStore {
132
+ constructor(embeddings, args) {
133
+ super(embeddings, args);
134
+ /**
135
+ * Docstore that retains the document, stored by ID
136
+ */
137
+ Object.defineProperty(this, "docstore", {
138
+ enumerable: true,
139
+ configurable: true,
140
+ writable: true,
141
+ value: void 0
142
+ });
143
+ /**
144
+ * The host to connect to for queries and upserts.
145
+ */
146
+ Object.defineProperty(this, "apiEndpoint", {
147
+ enumerable: true,
148
+ configurable: true,
149
+ writable: true,
150
+ value: void 0
151
+ });
152
+ Object.defineProperty(this, "apiVersion", {
153
+ enumerable: true,
154
+ configurable: true,
155
+ writable: true,
156
+ value: "v1"
157
+ });
158
+ Object.defineProperty(this, "endpoint", {
159
+ enumerable: true,
160
+ configurable: true,
161
+ writable: true,
162
+ value: "us-central1-aiplatform.googleapis.com"
163
+ });
164
+ Object.defineProperty(this, "location", {
165
+ enumerable: true,
166
+ configurable: true,
167
+ writable: true,
168
+ value: "us-central1"
169
+ });
170
+ /**
171
+ * The id for the index endpoint
172
+ */
173
+ Object.defineProperty(this, "indexEndpoint", {
174
+ enumerable: true,
175
+ configurable: true,
176
+ writable: true,
177
+ value: void 0
178
+ });
179
+ /**
180
+ * The id for the index
181
+ */
182
+ Object.defineProperty(this, "index", {
183
+ enumerable: true,
184
+ configurable: true,
185
+ writable: true,
186
+ value: void 0
187
+ });
188
+ /**
189
+ * The id for the "deployed index", which is an identifier in the
190
+ * index endpoint that references the index (but is not the index id)
191
+ */
192
+ Object.defineProperty(this, "deployedIndexId", {
193
+ enumerable: true,
194
+ configurable: true,
195
+ writable: true,
196
+ value: void 0
197
+ });
198
+ Object.defineProperty(this, "callerParams", {
199
+ enumerable: true,
200
+ configurable: true,
201
+ writable: true,
202
+ value: void 0
203
+ });
204
+ Object.defineProperty(this, "callerOptions", {
205
+ enumerable: true,
206
+ configurable: true,
207
+ writable: true,
208
+ value: void 0
209
+ });
210
+ Object.defineProperty(this, "caller", {
211
+ enumerable: true,
212
+ configurable: true,
213
+ writable: true,
214
+ value: void 0
215
+ });
216
+ Object.defineProperty(this, "indexEndpointClient", {
217
+ enumerable: true,
218
+ configurable: true,
219
+ writable: true,
220
+ value: void 0
221
+ });
222
+ Object.defineProperty(this, "removeDatapointClient", {
223
+ enumerable: true,
224
+ configurable: true,
225
+ writable: true,
226
+ value: void 0
227
+ });
228
+ Object.defineProperty(this, "upsertDatapointClient", {
229
+ enumerable: true,
230
+ configurable: true,
231
+ writable: true,
232
+ value: void 0
233
+ });
234
+ this.embeddings = embeddings;
235
+ this.docstore = args.docstore;
236
+ this.apiEndpoint = args.apiEndpoint ?? this.apiEndpoint;
237
+ this.deployedIndexId = args.deployedIndexId ?? this.deployedIndexId;
238
+ this.apiVersion = args.apiVersion ?? this.apiVersion;
239
+ this.endpoint = args.endpoint ?? this.endpoint;
240
+ this.location = args.location ?? this.location;
241
+ this.indexEndpoint = args.indexEndpoint ?? this.indexEndpoint;
242
+ this.index = args.index ?? this.index;
243
+ this.callerParams = args.callerParams ?? this.callerParams;
244
+ this.callerOptions = args.callerOptions ?? this.callerOptions;
245
+ this.caller = new AsyncCaller(this.callerParams || {});
246
+ const indexClientParams = {
247
+ endpoint: this.endpoint,
248
+ location: this.location,
249
+ apiVersion: this.apiVersion,
250
+ indexEndpoint: this.indexEndpoint,
251
+ };
252
+ this.indexEndpointClient = new IndexEndpointConnection(indexClientParams, this.caller);
253
+ const removeClientParams = {
254
+ endpoint: this.endpoint,
255
+ location: this.location,
256
+ apiVersion: this.apiVersion,
257
+ index: this.index,
258
+ };
259
+ this.removeDatapointClient = new RemoveDatapointConnection(removeClientParams, this.caller);
260
+ const upsertClientParams = {
261
+ endpoint: this.endpoint,
262
+ location: this.location,
263
+ apiVersion: this.apiVersion,
264
+ index: this.index,
265
+ };
266
+ this.upsertDatapointClient = new UpsertDatapointConnection(upsertClientParams, this.caller);
267
+ }
268
+ _vectorstoreType() {
269
+ return "googlevertexai";
270
+ }
271
+ async addDocuments(documents) {
272
+ const texts = documents.map((doc) => doc.pageContent);
273
+ const vectors = await this.embeddings.embedDocuments(texts);
274
+ return this.addVectors(vectors, documents);
275
+ }
276
+ async addVectors(vectors, documents) {
277
+ if (vectors.length !== documents.length) {
278
+ throw new Error(`Vectors and metadata must have the same length`);
279
+ }
280
+ const datapoints = vectors.map((vector, idx) => this.buildDatapoint(vector, documents[idx]));
281
+ const options = {};
282
+ const response = await this.upsertDatapointClient.request(datapoints, options);
283
+ if (Object.keys(response?.data ?? {}).length === 0) {
284
+ // Nothing in the response in the body means we saved it ok
285
+ const idDoc = documents;
286
+ const docsToStore = {};
287
+ idDoc.forEach((doc) => {
288
+ if (doc.id) {
289
+ docsToStore[doc.id] = doc;
290
+ }
291
+ });
292
+ await this.docstore.add(docsToStore);
293
+ }
294
+ }
295
+ // TODO: Refactor this into a utility type and use with pinecone as well?
296
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
297
+ cleanMetadata(documentMetadata) {
298
+ function getStringArrays(prefix,
299
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
300
+ m) {
301
+ let ret = {};
302
+ Object.keys(m).forEach((key) => {
303
+ const newPrefix = prefix.length > 0 ? `${prefix}.${key}` : key;
304
+ const val = m[key];
305
+ if (!val) {
306
+ // Ignore it
307
+ }
308
+ else if (Array.isArray(val)) {
309
+ // Make sure everything in the array is a string
310
+ ret[newPrefix] = val.map((v) => `${v}`);
311
+ }
312
+ else if (typeof val === "object") {
313
+ const subArrays = getStringArrays(newPrefix, val);
314
+ ret = { ...ret, ...subArrays };
315
+ }
316
+ });
317
+ return ret;
318
+ }
319
+ const stringArrays = getStringArrays("", documentMetadata);
320
+ const flatMetadata = flatten(documentMetadata);
321
+ Object.keys(flatMetadata).forEach((key) => {
322
+ Object.keys(stringArrays).forEach((arrayKey) => {
323
+ const matchKey = `${arrayKey}.`;
324
+ if (key.startsWith(matchKey)) {
325
+ delete flatMetadata[key];
326
+ }
327
+ });
328
+ });
329
+ const metadata = {
330
+ ...flatMetadata,
331
+ ...stringArrays,
332
+ };
333
+ return metadata;
334
+ }
335
+ /**
336
+ * Given the metadata from a document, convert it to an array of Restriction
337
+ * objects that may be passed to the Matching Engine and stored.
338
+ * The default implementation flattens any metadata and includes it as
339
+ * an "allowList". Subclasses can choose to convert some of these to
340
+ * "denyList" items or to add additional restrictions (for example, to format
341
+ * dates into a different structure or to add additional restrictions
342
+ * based on the date).
343
+ * @param documentMetadata - The metadata from a document
344
+ * @returns a Restriction[] (or an array of a subclass, from the FilterType)
345
+ */
346
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
347
+ metadataToRestrictions(documentMetadata) {
348
+ const metadata = this.cleanMetadata(documentMetadata);
349
+ const restrictions = [];
350
+ for (const key of Object.keys(metadata)) {
351
+ // Make sure the value is an array (or that we'll ignore it)
352
+ let valArray;
353
+ const val = metadata[key];
354
+ if (val === null) {
355
+ valArray = null;
356
+ }
357
+ else if (Array.isArray(val) && val.length > 0) {
358
+ valArray = val;
359
+ }
360
+ else {
361
+ valArray = [`${val}`];
362
+ }
363
+ // Add to the restrictions if we do have a valid value
364
+ if (valArray) {
365
+ // Determine if this key is for the allowList or denyList
366
+ // TODO: get which ones should be on the deny list
367
+ const listType = "allowList";
368
+ // Create the restriction
369
+ const restriction = {
370
+ namespace: key,
371
+ [listType]: valArray,
372
+ };
373
+ // Add it to the restriction list
374
+ restrictions.push(restriction);
375
+ }
376
+ }
377
+ return restrictions;
378
+ }
379
+ /**
380
+ * Create an index datapoint for the vector and document id.
381
+ * If an id does not exist, create it and set the document to its value.
382
+ * @param vector
383
+ * @param document
384
+ */
385
+ buildDatapoint(vector, document) {
386
+ if (!document.id) {
387
+ // eslint-disable-next-line no-param-reassign
388
+ document.id = uuid.v4();
389
+ }
390
+ const ret = {
391
+ datapointId: document.id,
392
+ featureVector: vector,
393
+ };
394
+ const restrictions = this.metadataToRestrictions(document.metadata);
395
+ if (restrictions?.length > 0) {
396
+ ret.restricts = restrictions;
397
+ }
398
+ return ret;
399
+ }
400
+ async delete(params) {
401
+ const options = {};
402
+ await this.removeDatapointClient.request(params.ids, options);
403
+ }
404
+ async similaritySearchVectorWithScore(query, k, filter) {
405
+ // Format the query into the request
406
+ const deployedIndexId = await this.getDeployedIndexId();
407
+ const requestQuery = {
408
+ neighborCount: k,
409
+ datapoint: {
410
+ datapointId: `0`,
411
+ featureVector: query,
412
+ },
413
+ };
414
+ if (filter) {
415
+ requestQuery.datapoint.restricts = filter;
416
+ }
417
+ const request = {
418
+ deployedIndexId,
419
+ queries: [requestQuery],
420
+ };
421
+ // Build the connection.
422
+ // Has to be done here, since we defer getting the endpoint until
423
+ // we need it.
424
+ const apiEndpoint = await this.getPublicAPIEndpoint();
425
+ const findNeighborsParams = {
426
+ endpoint: apiEndpoint,
427
+ indexEndpoint: this.indexEndpoint,
428
+ apiVersion: this.apiVersion,
429
+ location: this.location,
430
+ deployedIndexId,
431
+ };
432
+ const connection = new FindNeighborsConnection(findNeighborsParams, this.caller);
433
+ // Make the call
434
+ const options = {};
435
+ const response = await connection.request(request, options);
436
+ // Get the document for each datapoint id and return them
437
+ const nearestNeighbors = response?.data?.nearestNeighbors ?? [];
438
+ const nearestNeighbor = nearestNeighbors[0];
439
+ const neighbors = nearestNeighbor?.neighbors ?? [];
440
+ const ret = await Promise.all(neighbors.map(async (neighbor) => {
441
+ const id = neighbor?.datapoint?.datapointId;
442
+ const distance = neighbor?.distance;
443
+ let doc;
444
+ try {
445
+ doc = await this.docstore.search(id);
446
+ }
447
+ catch (xx) {
448
+ // Documents that are in the index are returned, even if they
449
+ // are not in the document store, to allow for some way to get
450
+ // the id so they can be deleted.
451
+ console.error(xx);
452
+ console.warn([
453
+ `Document with id "${id}" is missing from the backing docstore.`,
454
+ `This can occur if you clear the docstore without deleting from the corresponding Matching Engine index.`,
455
+ `To resolve this, you should call .delete() with this id as part of the "ids" parameter.`,
456
+ ].join("\n"));
457
+ doc = new Document({ pageContent: `Missing document ${id}` });
458
+ }
459
+ doc.id ??= id;
460
+ return [doc, distance];
461
+ }));
462
+ return ret;
463
+ }
464
+ /**
465
+ * For this index endpoint, figure out what API Endpoint URL and deployed
466
+ * index ID should be used to do upserts and queries.
467
+ * Also sets the `apiEndpoint` and `deployedIndexId` property for future use.
468
+ * @return The URL
469
+ */
470
+ async determinePublicAPIEndpoint() {
471
+ const response = await this.indexEndpointClient.request(this.callerOptions);
472
+ // Get the endpoint
473
+ const publicEndpointDomainName = response?.data?.publicEndpointDomainName;
474
+ this.apiEndpoint = publicEndpointDomainName;
475
+ // Determine which of the deployed indexes match the index id
476
+ // and get the deployed index id. The list of deployed index ids
477
+ // contain the "index name" or path, but not the index id by itself,
478
+ // so we need to extract it from the name
479
+ const indexPathPattern = /projects\/.+\/locations\/.+\/indexes\/(.+)$/;
480
+ const deployedIndexes = response?.data?.deployedIndexes ?? [];
481
+ const deployedIndex = deployedIndexes.find((index) => {
482
+ const deployedIndexPath = index.index;
483
+ const match = deployedIndexPath.match(indexPathPattern);
484
+ if (match) {
485
+ const [, potentialIndexId] = match;
486
+ if (potentialIndexId === this.index) {
487
+ return true;
488
+ }
489
+ }
490
+ return false;
491
+ });
492
+ if (deployedIndex) {
493
+ this.deployedIndexId = deployedIndex.id;
494
+ }
495
+ return {
496
+ apiEndpoint: this.apiEndpoint,
497
+ deployedIndexId: this.deployedIndexId,
498
+ };
499
+ }
500
+ async getPublicAPIEndpoint() {
501
+ return (this.apiEndpoint ?? (await this.determinePublicAPIEndpoint()).apiEndpoint);
502
+ }
503
+ async getDeployedIndexId() {
504
+ return (this.deployedIndexId ??
505
+ (await this.determinePublicAPIEndpoint()).deployedIndexId);
506
+ }
507
+ static async fromTexts(texts, metadatas, embeddings, dbConfig) {
508
+ const docs = texts.map((text, index) => ({
509
+ pageContent: text,
510
+ metadata: Array.isArray(metadatas) ? metadatas[index] : metadatas,
511
+ }));
512
+ return this.fromDocuments(docs, embeddings, dbConfig);
513
+ }
514
+ static async fromDocuments(docs, embeddings, dbConfig) {
515
+ const ret = new MatchingEngine(embeddings, dbConfig);
516
+ await ret.addDocuments(docs);
517
+ return ret;
518
+ }
519
+ }