@vectorstores/azure 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1636 @@
1
+ import { Document, metadataDictToNode, BaseVectorStore, MetadataMode, nodeToMetadata, FilterOperator, FilterCondition, VectorStoreQueryMode } from '@vectorstores/core';
2
+ export * from './storage.js';
3
+ import { KnownVectorSearchAlgorithmKind, KnownAnalyzerNames, KnownSearchFieldDataType, KnownVectorSearchAlgorithmMetric, KnownVectorSearchCompressionKind, SearchClient, SearchIndexClient, AzureKeyCredential, IndexDocumentsBatch } from '@azure/search-documents';
4
+ import { DefaultAzureCredential, ManagedIdentityCredential } from '@azure/identity';
5
+ import { consoleLogger, getEnv } from '@vectorstores/env';
6
+ import { MongoClient } from 'mongodb';
7
+ import { VectorEmbeddingDistanceFunction, VectorEmbeddingDataType, VectorIndexType, CosmosClient } from '@azure/cosmos';
8
+
9
+ /**
10
+ * Read data from CosmosDB.
11
+ */ class SimpleCosmosDBReader {
12
+ constructor(client){
13
+ this.client = client;
14
+ }
15
+ /**
16
+ * Loads data from a Cosmos DB container
17
+ * @returns {Promise<Document[]>}
18
+ */ async loadData(config) {
19
+ if (!config.databaseName || !config.containerName) {
20
+ throw new Error("databaseName and containerName are required");
21
+ }
22
+ const database = this.client.database(config.databaseName);
23
+ const container = database.container(config.containerName);
24
+ const query = config.query || "SELECT * FROM c";
25
+ const fields = config.fields || [
26
+ "text"
27
+ ];
28
+ const fieldSeparator = config.fieldSeparator || "";
29
+ const metadataFields = config.metadataFields;
30
+ try {
31
+ const res = await container.items.query(query).fetchAll();
32
+ const documents = [];
33
+ for (const item of res.resources){
34
+ const texts = fields.map((name)=>item[name]);
35
+ const flattenedTexts = texts.flat();
36
+ const text = flattenedTexts.join(fieldSeparator);
37
+ let metadata = {};
38
+ if (metadataFields) {
39
+ metadata = Object.fromEntries(metadataFields.map((name)=>[
40
+ name,
41
+ item[name]
42
+ ]));
43
+ }
44
+ documents.push(new Document({
45
+ id_: item.id,
46
+ text,
47
+ metadata
48
+ }));
49
+ }
50
+ return documents;
51
+ } catch (error) {
52
+ throw new Error(`Error loading data from Cosmos DB: ${error}`);
53
+ }
54
+ }
55
+ }
56
+
57
+ const AzureAISearchVectorStoreConfig = {
58
+ ALGORITHM_HNSW_NAME: "myHnsw",
59
+ ALGORITHM_EXHAUSTIVE_KNN_NAME: "myExhaustiveKnn",
60
+ PROFILE_HNSW_NAME: "myHnswProfile",
61
+ PROFILE_EXHAUSTIVE_KNN_NAME: "myExhaustiveKnnProfile",
62
+ COMPRESSION_TYPE_SCALAR: "myScalarCompression",
63
+ COMPRESSION_TYPE_BINARY: "myBinaryCompression",
64
+ SEMANTIC_CONFIG_NAME: "mySemanticConfig",
65
+ // 700 is default the maximum number of documents that can be sent in a single request
66
+ DEFAULT_MAX_BATCH_SIZE: 700,
67
+ // 14MB in bytes
68
+ DEFAULT_MAX_MB_SIZE: 14 * 1024 * 1024,
69
+ DEFAULT_USER_AGENT_PREFIX: "vectorstores-ts",
70
+ DEFAULT_AZURE_API_VERSION: "2024-09-01-preview"
71
+ };
72
+
73
+ class AzureQueryResultSearchBase {
74
+ constructor(query, fieldMapping, odataFilter, searchClient){
75
+ this._query = query;
76
+ this.fieldMapping = fieldMapping;
77
+ this.odataFilter = odataFilter;
78
+ this.searchClient = searchClient;
79
+ }
80
+ get selectFields() {
81
+ return [
82
+ this.fieldMapping["id"],
83
+ this.fieldMapping["chunk"],
84
+ this.fieldMapping["metadata"],
85
+ this.fieldMapping["doc_id"]
86
+ ];
87
+ }
88
+ createSearchQuery() {
89
+ return "*";
90
+ }
91
+ createQueryVector() {
92
+ return null;
93
+ }
94
+ async _createQueryResult(searchQuery, vectorQueries) {
95
+ if (!vectorQueries) {
96
+ vectorQueries = [];
97
+ }
98
+ if (!this.searchClient) {
99
+ throw new Error("SearchClient is not set");
100
+ }
101
+ const searchResults = await this.searchClient.search(searchQuery, {
102
+ top: this._query.similarityTopK,
103
+ select: this.selectFields,
104
+ filter: this.odataFilter || "",
105
+ vectorSearchOptions: {
106
+ queries: vectorQueries
107
+ }
108
+ });
109
+ const idResult = [];
110
+ const nodeResult = [];
111
+ const scoreResult = [];
112
+ for await (const result of searchResults.results){
113
+ const { document } = result;
114
+ // build node metadata from the metadata field in the document
115
+ const nodeId = document[this.fieldMapping["id"]];
116
+ const metadataStr = document[this.fieldMapping["metadata"]];
117
+ const metadata = typeof metadataStr === "string" ? JSON.parse(metadataStr) : {};
118
+ const score = result["score"];
119
+ const chunk = document[this.fieldMapping["chunk"]];
120
+ let node;
121
+ try {
122
+ node = metadataDictToNode(metadata);
123
+ node.setContent(chunk);
124
+ consoleLogger.log(`Retrieved node id ${nodeId}`);
125
+ idResult.push(nodeId);
126
+ nodeResult.push(node);
127
+ scoreResult.push(score);
128
+ } catch (err) {
129
+ consoleLogger.error(`Error while parsing metadata for node id ${nodeId}. Error: ${err}`);
130
+ }
131
+ }
132
+ consoleLogger.log(`Search query '${searchQuery}' returned ${idResult.length} results.`);
133
+ return {
134
+ nodes: nodeResult,
135
+ similarities: scoreResult,
136
+ ids: idResult
137
+ };
138
+ }
139
+ async search() {
140
+ const searchQuery = this.createSearchQuery();
141
+ const vectorQueries = this.createQueryVector();
142
+ return await this._createQueryResult(searchQuery, vectorQueries);
143
+ }
144
+ }
145
+ class AzureQueryResultSearchDefault extends AzureQueryResultSearchBase {
146
+ createQueryVector() {
147
+ if (!this._query.queryEmbedding) {
148
+ throw new Error("query.queryEmbedding is missing");
149
+ }
150
+ return [
151
+ {
152
+ kind: "vector",
153
+ vector: this._query.queryEmbedding,
154
+ kNearestNeighborsCount: this._query.similarityTopK,
155
+ fields: [
156
+ this.fieldMapping["embedding"]
157
+ ]
158
+ }
159
+ ];
160
+ }
161
+ }
162
+ class AzureQueryResultSearchSparse extends AzureQueryResultSearchBase {
163
+ createSearchQuery() {
164
+ if (!this._query.queryStr) {
165
+ throw new Error("Query missing query string");
166
+ }
167
+ return this._query.queryStr;
168
+ }
169
+ }
170
+ class AzureQueryResultSearchHybrid extends AzureQueryResultSearchBase {
171
+ createQueryVector() {
172
+ return new AzureQueryResultSearchDefault(this._query, this.fieldMapping, this.odataFilter, this.searchClient).createQueryVector();
173
+ }
174
+ createSearchQuery() {
175
+ return new AzureQueryResultSearchSparse(this._query, this.fieldMapping, this.odataFilter, this.searchClient).createSearchQuery();
176
+ }
177
+ }
178
+ class AzureQueryResultSearchSemanticHybrid extends AzureQueryResultSearchHybrid {
179
+ createQueryVector() {
180
+ if (!this._query.queryEmbedding) {
181
+ throw new Error("query.queryEmbedding is missing");
182
+ }
183
+ return [
184
+ {
185
+ kind: "vector",
186
+ vector: this._query.queryEmbedding,
187
+ // kNearestNeighborsCount is set to 50 to align with the number of accept document in azure semantic reranking model.
188
+ // https://learn.microsoft.com/azure/search/semantic-search-overview
189
+ kNearestNeighborsCount: 50,
190
+ fields: [
191
+ this.fieldMapping["embedding"]
192
+ ]
193
+ }
194
+ ];
195
+ }
196
+ async _createQueryResult(searchQuery, vectorQueries) {
197
+ if (!this.searchClient) {
198
+ throw new Error("SearchClient not set");
199
+ }
200
+ const searchResults = await this.searchClient.search(searchQuery, {
201
+ vectorSearchOptions: {
202
+ queries: vectorQueries
203
+ },
204
+ semanticSearchOptions: {
205
+ configurationName: AzureAISearchVectorStoreConfig.SEMANTIC_CONFIG_NAME
206
+ },
207
+ top: this._query.similarityTopK,
208
+ select: this.selectFields,
209
+ filter: this.odataFilter || "",
210
+ queryType: "semantic"
211
+ });
212
+ const idResult = [];
213
+ const nodeResult = [];
214
+ const scoreResult = [];
215
+ for await (const result of searchResults.results){
216
+ // build node metadata from the metadata field in the document
217
+ const { document } = result;
218
+ const nodeId = document[this.fieldMapping["id"]];
219
+ const metadataStr = document[this.fieldMapping["metadata"]];
220
+ const metadata = metadataStr ? JSON.parse(metadataStr) : {};
221
+ const chunk = document[this.fieldMapping["chunk"]];
222
+ const score = result["rerankerScore"];
223
+ let node;
224
+ try {
225
+ node = metadataDictToNode(metadata);
226
+ node.setContent(chunk);
227
+ idResult.push(nodeId);
228
+ nodeResult.push(node);
229
+ scoreResult.push(score);
230
+ } catch (err) {
231
+ consoleLogger.error(`Error while parsing metadata for node id ${nodeId}. Error: ${err}`);
232
+ }
233
+ }
234
+ return {
235
+ nodes: nodeResult,
236
+ similarities: scoreResult,
237
+ ids: idResult
238
+ };
239
+ }
240
+ }
241
+
242
+ /**
243
+ * Enumeration representing the supported index management operations
244
+ */ var IndexManagement = /*#__PURE__*/ function(IndexManagement) {
245
+ IndexManagement["NO_VALIDATION"] = "NoValidation";
246
+ IndexManagement["VALIDATE_INDEX"] = "ValidateIndex";
247
+ IndexManagement["CREATE_IF_NOT_EXISTS"] = "CreateIfNotExists";
248
+ return IndexManagement;
249
+ }({});
250
+ /**
251
+ * Enumeration representing the supported types for metadata fields in an
252
+ * Azure AI Search Index, corresponds with types supported in a flat
253
+ * metadata dictionary.
254
+ */ var MetadataIndexFieldType = /*#__PURE__*/ function(MetadataIndexFieldType) {
255
+ MetadataIndexFieldType["STRING"] = "Edm.String";
256
+ MetadataIndexFieldType["BOOLEAN"] = "Edm.Boolean";
257
+ MetadataIndexFieldType["INT32"] = "Edm.Int32";
258
+ MetadataIndexFieldType["INT64"] = "Edm.Int64";
259
+ MetadataIndexFieldType["DOUBLE"] = "Edm.Double";
260
+ MetadataIndexFieldType["COLLECTION"] = "Collection(Edm.String)";
261
+ return MetadataIndexFieldType;
262
+ }({});
263
+ const createSearchRequest = (fieldMapping, filterStr, batchSize, offset)=>{
264
+ return {
265
+ filter: filterStr,
266
+ top: batchSize,
267
+ skip: offset,
268
+ select: Object.keys(fieldMapping)
269
+ };
270
+ };
271
+ /**
272
+ * Azure AI Search vector store.
273
+ *
274
+ * @example
275
+ ```typescript
276
+ import { DefaultAzureCredential, getBearerTokenProvider} from "@azure/identity";
277
+ import {KnownAnalyzerNames, KnownVectorSearchAlgorithmKind } from "@azure/search-documents";
278
+
279
+ // 1- Setup Azure OpenAI
280
+ const azureADTokenProvider = getBearerTokenProvider(
281
+ new DefaultAzureCredential(),
282
+ "https://cognitiveservices.azure.com/.default",
283
+ );
284
+
285
+ // IMPORTANT: You need to deploy your own embedding model as well as your own chat completion model
286
+ // NOTE: You can use whatever embedding model and language model that is supported by vectorstores
287
+ const azure = {
288
+ azureADTokenProvider,
289
+ deployment: process.env.AZURE_DEPLOYMENT_NAME,
290
+ };
291
+ Settings.llm = new OpenAI({ azure });
292
+ Settings.embedModel = new OpenAIEmbedding({
293
+ model: process.env.EMBEDDING_MODEL,
294
+ azure: {
295
+ ...azure,
296
+ deployment: process.env.EMBEDDING_MODEL,
297
+ },
298
+ });
299
+
300
+ // ---------------------------------------------------------
301
+ // 2- Setup Azure AI Search
302
+ // Define env variables in .env file
303
+ // AZURE_AI_SEARCH_ENDPOINT=
304
+ // AZURE_AI_SEARCH_KEY=
305
+ // AZURE_OPENAI_ENDPOINT=
306
+ // EMBEDDING_MODEL=text-embedding-ada-002
307
+ // AZURE_DEPLOYMENT_NAME=gpt-4
308
+ // AZURE_API_VERSION=2024-09-01-preview
309
+
310
+ // Define index name
311
+ const indexName = "vectorstores-vector-store";
312
+
313
+ // ---------------------------------------------------------
314
+ // 3a- Create Index (if it does not exist)
315
+ // id: Edm.String
316
+ // chunk: Edm.String
317
+ // embedding: Collection(Edm.Single)
318
+ // metadata: Edm.String
319
+ // doc_id: Edm.String
320
+ // author: Edm.String
321
+ // theme: Edm.String
322
+ // director: Edm.String
323
+
324
+ // Define metadata fields with their respective configurations
325
+ const metadataFields = {
326
+ author: "author",
327
+ theme: ["theme", MetadataIndexFieldType.STRING],
328
+ director: "director",
329
+ };
330
+
331
+ // Define index parameters and vector store configuration
332
+ // Index validation:
333
+ // - IndexManagement.VALIDATE_INDEX: will validate before creating emnbedding index and will throw a runtime error if the index does not exist
334
+ // - IndexManagement.NO_VALIDATION: will try to access the index and will throw a runtime error if the index does not exist
335
+ // - IndexManagement.CREATE_IF_NOT_EXISTS: will create the index if it does not exist
336
+
337
+ const vectorStore = new AzureAISearchVectorStore({
338
+ filterableMetadataFieldKeys:
339
+ metadataFields as unknown as FilterableMetadataFieldKeysType,
340
+ indexName,
341
+ indexManagement: IndexManagement.CREATE_IF_NOT_EXISTS,
342
+ idFieldKey: "id",
343
+ chunkFieldKey: "chunk",
344
+ embeddingFieldKey: "embedding",
345
+ metadataStringFieldKey: "metadata",
346
+ docIdFieldKey: "doc_id",
347
+ embeddingDimensionality: 1536,
348
+ hiddenFieldKeys: ["embedding"],
349
+ languageAnalyzer: KnownAnalyzerNames.EnLucene,
350
+ // store vectors on disk
351
+ vectorAlgorithmType: KnownVectorSearchAlgorithmKind.ExhaustiveKnn,
352
+
353
+ // Optional: Set to "scalar" or "binary" if using HNSW
354
+ compressionType: KnownVectorSearchCompressionKind.BinaryQuantization,
355
+ });
356
+
357
+ // ---------------------------------------------------------
358
+ // 3a- Loading documents
359
+ // Load the documents stored in the data/paul_graham/ using the SimpleDirectoryReader
360
+ // NOTE: You can use whatever reader that is supported by vectorstores
361
+
362
+ // Load documents using a directory reader
363
+ const documents = await new SimpleDirectoryReader().loadData(
364
+ "data/paul_graham/",
365
+ );
366
+ const storageContext = await storageContextFromDefaults({ vectorStore });
367
+
368
+ // Create index from documents with the specified storage context
369
+ const index = await VectorStoreIndex.fromDocuments(documents, {
370
+ storageContext,
371
+ docStoreStrategy: DocStoreStrategy.UPSERTS,
372
+ });
373
+
374
+ const queryEngine = index.asQueryEngine();
375
+ const response = await queryEngine.query({
376
+ query: "What did the author do growing up?",
377
+ similarityTopK: 3,
378
+ } as any);
379
+ console.log({ response });
380
+ */ class AzureAISearchVectorStore extends BaseVectorStore {
381
+ #languageAnalyzer;
382
+ #embeddingDimensionality;
383
+ #vectorProfileName;
384
+ #compressionType;
385
+ #indexManagement;
386
+ #indexName;
387
+ #fieldMapping;
388
+ #metadataToIndexFieldMap;
389
+ #idFieldKey;
390
+ #chunkFieldKey;
391
+ #embeddingFieldKey;
392
+ #docIdFieldKey;
393
+ #metadataStringFieldKey;
394
+ #serviceApiVersion;
395
+ #indexMapping;
396
+ #hiddenFiledKeys;
397
+ constructor(options){
398
+ super(options), this.storesText = true, this.#metadataToIndexFieldMap = new Map(), this.flatMetadata = true;
399
+ // set default values
400
+ options.vectorAlgorithmType ||= KnownVectorSearchAlgorithmKind.ExhaustiveKnn;
401
+ options.languageAnalyzer ||= KnownAnalyzerNames.EnLucene;
402
+ options.indexManagement ||= "NoValidation";
403
+ options.embeddingDimensionality ||= 1536;
404
+ options.serviceApiVersion ||= getEnv("AZURE_SEARCH_API_VERSION");
405
+ options.hiddenFieldKeys ||= [];
406
+ // set props
407
+ this.#serviceApiVersion = options.serviceApiVersion || AzureAISearchVectorStoreConfig.DEFAULT_AZURE_API_VERSION;
408
+ this.#languageAnalyzer = options.languageAnalyzer;
409
+ this.#compressionType = options.compressionType;
410
+ this.#embeddingDimensionality = options.embeddingDimensionality;
411
+ this.#indexManagement = options.indexManagement;
412
+ this.#indexName = options.indexName;
413
+ this.#idFieldKey = options.idFieldKey;
414
+ this.#docIdFieldKey = options.docIdFieldKey;
415
+ this.#chunkFieldKey = options.chunkFieldKey;
416
+ this.#embeddingFieldKey = options.embeddingFieldKey;
417
+ this.#metadataStringFieldKey = options.metadataStringFieldKey;
418
+ this.#hiddenFiledKeys = options.hiddenFieldKeys;
419
+ this.#indexMapping = options.indexMapping || this.#defaultIndexMapping;
420
+ // Default field mapping
421
+ this.#fieldMapping = {
422
+ ["id"]: options.idFieldKey,
423
+ ["doc_id"]: options.docIdFieldKey,
424
+ ["chunk"]: options.chunkFieldKey,
425
+ ["embedding"]: options.embeddingFieldKey,
426
+ ["metadata"]: options.metadataStringFieldKey
427
+ };
428
+ this.#setVectorProfileName(options.vectorAlgorithmType);
429
+ this.#valideSearchOrIndexClient(options);
430
+ // Normalizing metadata to index fields
431
+ this.#metadataToIndexFieldMap = this.#normalizeMetadataToIndexFields(options.filterableMetadataFieldKeys);
432
+ }
433
+ // private
434
+ #normalizeMetadataToIndexFields(filterableMetadataFieldKeys) {
435
+ const indexFieldSpec = new Map();
436
+ if (Array.isArray(filterableMetadataFieldKeys)) {
437
+ // if filterableMetadataFieldKeys is an array, use the field name as the index field name
438
+ // eg. [
439
+ // "author",
440
+ // "theme",
441
+ // "director"
442
+ // ] => {
443
+ // "author": ["author", "Edm.String"],
444
+ // "theme": ["theme", "Edm.String"],
445
+ // "director": ["director", "Edm.String"]
446
+ // }
447
+ filterableMetadataFieldKeys.forEach((field)=>{
448
+ indexFieldSpec.set(field, [
449
+ field,
450
+ "Edm.String"
451
+ ]);
452
+ });
453
+ } else if (typeof filterableMetadataFieldKeys === "object") {
454
+ // if filterableMetadataFieldKeys is an object, use the key as the index field name
455
+ // and the value as the metadata field name
456
+ // eg. {
457
+ // "author": "author",
458
+ // "theme": ["topic", MetadataIndexFieldType.STRING],
459
+ // "director": "director"
460
+ // } => {
461
+ // "author": ["author", "Edm.String"],
462
+ // "theme": ["topic", "Edm.String"],
463
+ // "director": ["director", "Edm.String"]
464
+ // }
465
+ // we also support specifying the metadata field type
466
+ // MetadataIndexFieldType.INT32 --> "Edm.Int32"
467
+ // MetadataIndexFieldType.INT64 --> "Edm.Int64"
468
+ // MetadataIndexFieldType.DOUBLE --> "Edm.Double"
469
+ // MetadataIndexFieldType.BOOLEAN --> "Edm.Boolean"
470
+ // MetadataIndexFieldType.COLLECTION --> "Collection(Edm.String)"
471
+ Object.entries(filterableMetadataFieldKeys).forEach(([k, v])=>{
472
+ if (Array.isArray(v)) {
473
+ indexFieldSpec.set(k, [
474
+ v[0],
475
+ v[1]
476
+ ]);
477
+ } else {
478
+ switch(v){
479
+ case "Edm.String":
480
+ indexFieldSpec.set(k, [
481
+ v,
482
+ "Edm.String"
483
+ ]);
484
+ break;
485
+ case "Edm.Int32":
486
+ indexFieldSpec.set(k, [
487
+ v,
488
+ "Edm.Int32"
489
+ ]);
490
+ break;
491
+ case "Edm.Int64":
492
+ indexFieldSpec.set(k, [
493
+ v,
494
+ "Edm.Int64"
495
+ ]);
496
+ break;
497
+ case "Edm.Double":
498
+ indexFieldSpec.set(k, [
499
+ v,
500
+ "Edm.Double"
501
+ ]);
502
+ break;
503
+ case "Edm.Boolean":
504
+ indexFieldSpec.set(k, [
505
+ v,
506
+ "Edm.Boolean"
507
+ ]);
508
+ break;
509
+ case "Collection(Edm.String)":
510
+ indexFieldSpec.set(k, [
511
+ v,
512
+ "Collection(Edm.String)"
513
+ ]);
514
+ break;
515
+ default:
516
+ // Index field name and metadata field name may differ
517
+ // Use String as the default index field type
518
+ indexFieldSpec.set(k, [
519
+ v,
520
+ "Edm.String"
521
+ ]);
522
+ break;
523
+ }
524
+ }
525
+ });
526
+ }
527
+ return indexFieldSpec;
528
+ }
529
+ #defaultIndexMapping(node, metadata) {
530
+ // include metadata fields in the index document
531
+ const filterableMetadata = {};
532
+ for (const [fieldName, _fieldType] of this.#metadataToIndexFieldMap.values()){
533
+ filterableMetadata[fieldName] = metadata[fieldName];
534
+ }
535
+ return {
536
+ [this.#embeddingFieldKey]: node.getEmbedding(),
537
+ [this.#idFieldKey]: node.id_,
538
+ [this.#docIdFieldKey]: node.id_,
539
+ [this.#chunkFieldKey]: node.getContent(MetadataMode.NONE),
540
+ [this.#metadataStringFieldKey]: JSON.stringify(metadata),
541
+ ...filterableMetadata
542
+ };
543
+ }
544
+ #setVectorProfileName(vectorAlgorithmType) {
545
+ if (vectorAlgorithmType === KnownVectorSearchAlgorithmKind.ExhaustiveKnn) {
546
+ this.#vectorProfileName = "myExhaustiveKnnProfile";
547
+ } else if (vectorAlgorithmType === KnownVectorSearchAlgorithmKind.Hnsw) {
548
+ this.#vectorProfileName = "myHnswProfile";
549
+ } else {
550
+ throw new Error("Only 'exhaustiveKnn' and 'hnsw' are supported for vectorAlgorithmType");
551
+ }
552
+ }
553
+ /**
554
+ * Create a list of index fields for storing metadata values.
555
+ * @returns List of index fields for storing metadata values
556
+ */ #createMetadataIndexFields() {
557
+ const indexFields = [];
558
+ for (const [fieldName, fieldType] of this.#metadataToIndexFieldMap.values()){
559
+ if (this.#fieldMapping[fieldName]) {
560
+ consoleLogger.log(`Skipping metadata field ${fieldName} as it is already mapped to an index field`);
561
+ continue;
562
+ }
563
+ let indexFieldType;
564
+ switch(fieldType){
565
+ case "Edm.String":
566
+ indexFieldType = KnownSearchFieldDataType.String;
567
+ break;
568
+ case "Edm.Int32":
569
+ indexFieldType = KnownSearchFieldDataType.Int32;
570
+ break;
571
+ case "Edm.Int64":
572
+ indexFieldType = KnownSearchFieldDataType.Int64;
573
+ break;
574
+ case "Edm.Double":
575
+ indexFieldType = KnownSearchFieldDataType.Double;
576
+ break;
577
+ case "Edm.Boolean":
578
+ indexFieldType = KnownSearchFieldDataType.Boolean;
579
+ break;
580
+ case "Collection(Edm.String)":
581
+ indexFieldType = `Collection(${KnownSearchFieldDataType.String})`;
582
+ break;
583
+ default:
584
+ throw new Error(`Unsupported field type: ${fieldType}`);
585
+ }
586
+ indexFields.push({
587
+ name: fieldName,
588
+ type: indexFieldType,
589
+ filterable: true
590
+ });
591
+ }
592
+ return indexFields;
593
+ }
594
+ // index management
595
+ async #indexExists(indexName) {
596
+ if (!indexName) {
597
+ throw new Error(`options.indexName is not valid`);
598
+ }
599
+ const availableIndexNames = await this._indexClient?.listIndexesNames();
600
+ if (!availableIndexNames) {
601
+ return false;
602
+ }
603
+ let listOfIndexNames = await availableIndexNames.next();
604
+ const indexNames = [];
605
+ while(!listOfIndexNames.done){
606
+ indexNames.push(listOfIndexNames.value);
607
+ listOfIndexNames = await availableIndexNames.next();
608
+ }
609
+ return indexNames.includes(indexName);
610
+ }
611
+ async #createIndexIfNotExists(indexName) {
612
+ const indexExists = await this.#indexExists(indexName);
613
+ if (!indexExists) {
614
+ consoleLogger.log(`Index ${indexName} does not exist in Azure AI Search, creating index`);
615
+ await this.#createIndex(indexName);
616
+ }
617
+ }
618
+ /**
619
+ * Creates a default index based on the supplied index name, key field names and
620
+ * metadata filtering keys.
621
+ * @param indexName The name of the index to create
622
+ */ async #createIndex(indexName) {
623
+ consoleLogger.log(`Configuring ${indexName} fields for Azure AI Search`);
624
+ const fields = [
625
+ {
626
+ name: this.#fieldMapping["id"],
627
+ type: KnownSearchFieldDataType.String,
628
+ hidden: this.#hiddenFiledKeys?.includes(this.#fieldMapping["id"]),
629
+ key: true,
630
+ filterable: true,
631
+ retrievable: true,
632
+ searchable: true
633
+ },
634
+ {
635
+ name: this.#fieldMapping["chunk"],
636
+ type: KnownSearchFieldDataType.String,
637
+ hidden: this.#hiddenFiledKeys?.includes(this.#fieldMapping["chunk"]),
638
+ analyzerName: this.#languageAnalyzer,
639
+ searchable: true
640
+ },
641
+ {
642
+ name: this.#fieldMapping["embedding"],
643
+ type: `Collection(${KnownSearchFieldDataType.Single})`,
644
+ vectorSearchDimensions: this.#embeddingDimensionality,
645
+ vectorSearchProfileName: this.#vectorProfileName,
646
+ hidden: this.#hiddenFiledKeys?.includes(this.#fieldMapping["embedding"]),
647
+ searchable: true
648
+ },
649
+ {
650
+ name: this.#fieldMapping["metadata"],
651
+ hidden: this.#hiddenFiledKeys?.includes(this.#fieldMapping["metadata"]),
652
+ type: KnownSearchFieldDataType.String
653
+ },
654
+ {
655
+ name: this.#fieldMapping["doc_id"],
656
+ type: KnownSearchFieldDataType.String,
657
+ hidden: this.#hiddenFiledKeys?.includes(this.#fieldMapping["doc_id"]),
658
+ filterable: true,
659
+ retrievable: true,
660
+ searchable: true
661
+ }
662
+ ];
663
+ consoleLogger.log(`Configuring ${indexName} metadata fields`);
664
+ const metadataIndexFields = this.#createMetadataIndexFields();
665
+ fields.push(...metadataIndexFields);
666
+ consoleLogger.log(`Configuring ${indexName} vector search`);
667
+ const compressions = this.#getCompressions();
668
+ consoleLogger.log(`Configuring ${indexName} vector search with ${this.#compressionType} compression`);
669
+ const vectorSearch = {
670
+ algorithms: [
671
+ {
672
+ name: AzureAISearchVectorStoreConfig.ALGORITHM_HNSW_NAME,
673
+ kind: KnownVectorSearchAlgorithmKind.Hnsw,
674
+ parameters: {
675
+ m: 4,
676
+ efConstruction: 400,
677
+ efSearch: 500,
678
+ metric: KnownVectorSearchAlgorithmMetric.Cosine
679
+ }
680
+ },
681
+ {
682
+ name: AzureAISearchVectorStoreConfig.ALGORITHM_EXHAUSTIVE_KNN_NAME,
683
+ kind: KnownVectorSearchAlgorithmKind.ExhaustiveKnn,
684
+ parameters: {
685
+ metric: KnownVectorSearchAlgorithmMetric.Cosine
686
+ }
687
+ }
688
+ ],
689
+ compressions,
690
+ profiles: [
691
+ {
692
+ name: AzureAISearchVectorStoreConfig.PROFILE_HNSW_NAME,
693
+ algorithmConfigurationName: AzureAISearchVectorStoreConfig.ALGORITHM_HNSW_NAME,
694
+ compressionName: compressions?.[0]?.compressionName
695
+ },
696
+ {
697
+ name: AzureAISearchVectorStoreConfig.PROFILE_EXHAUSTIVE_KNN_NAME,
698
+ algorithmConfigurationName: AzureAISearchVectorStoreConfig.ALGORITHM_EXHAUSTIVE_KNN_NAME
699
+ }
700
+ ]
701
+ };
702
+ consoleLogger.log(`Configuring ${indexName} semantic search`);
703
+ const semanticConfig = {
704
+ name: AzureAISearchVectorStoreConfig.SEMANTIC_CONFIG_NAME,
705
+ prioritizedFields: {
706
+ contentFields: [
707
+ {
708
+ name: this.#fieldMapping["chunk"]
709
+ }
710
+ ],
711
+ keywordsFields: [
712
+ {
713
+ name: this.#fieldMapping["metadata"]
714
+ }
715
+ ],
716
+ titleField: {
717
+ name: this.#fieldMapping["id"]
718
+ }
719
+ }
720
+ };
721
+ const semanticSearch = {
722
+ configurations: [
723
+ semanticConfig
724
+ ]
725
+ };
726
+ const index = {
727
+ name: indexName,
728
+ fields: fields,
729
+ vectorSearch: vectorSearch,
730
+ semanticSearch: semanticSearch
731
+ };
732
+ consoleLogger.log(`Creating ${indexName} search index with configuration:`);
733
+ consoleLogger.log({
734
+ index
735
+ });
736
+ await this._indexClient?.createIndex(index);
737
+ }
738
+ /**
739
+ * Get the compressions for the vector search
740
+ * @returns Array of compressions. See {@link VectorSearchCompression}
741
+ */ #getCompressions() {
742
+ const compressions = [];
743
+ if (this.#compressionType === KnownVectorSearchCompressionKind.BinaryQuantization) {
744
+ compressions.push({
745
+ compressionName: AzureAISearchVectorStoreConfig.COMPRESSION_TYPE_BINARY,
746
+ kind: KnownVectorSearchCompressionKind.BinaryQuantization
747
+ });
748
+ } else if (this.#compressionType === KnownVectorSearchCompressionKind.ScalarQuantization) {
749
+ compressions.push({
750
+ compressionName: AzureAISearchVectorStoreConfig.COMPRESSION_TYPE_SCALAR,
751
+ kind: KnownVectorSearchCompressionKind.ScalarQuantization
752
+ });
753
+ }
754
+ return compressions;
755
+ }
756
+ #valideSearchOrIndexClient(options) {
757
+ if (options.searchClient) {
758
+ if (options.searchClient instanceof SearchClient) {
759
+ consoleLogger.log("Using provided Azure SearchClient");
760
+ this._searchClient = options.searchClient;
761
+ if (options.indexName) {
762
+ throw new Error("options.indexName cannot be supplied if using options.searchClient");
763
+ }
764
+ } else {
765
+ throw new Error("options.searchClient must be an instance of SearchClient");
766
+ }
767
+ } else {
768
+ this.createSearchClient(options);
769
+ }
770
+ if (options.indexClient) {
771
+ if (options.indexClient instanceof SearchIndexClient) {
772
+ if (!options.indexName) {
773
+ throw new Error("options.indexName must be supplied if using options.indexClient");
774
+ }
775
+ this._indexClient = options.indexClient;
776
+ } else {
777
+ throw new Error("options.indexClient must be an instance of SearchIndexClient");
778
+ }
779
+ } else {
780
+ this.createSearchIndexClient(options);
781
+ }
782
+ if (options.indexManagement === "CreateIfNotExists" && !this._indexClient) {
783
+ throw new Error("IndexManagement.CREATE_IF_NOT_EXISTS requires options.indexClient");
784
+ }
785
+ if (!this._searchClient && !this._indexClient) {
786
+ throw new Error("Either options.searchClient or options.indexClient must be supplied");
787
+ }
788
+ }
789
+ #buildCredentials(options) {
790
+ let { credential: credential, key, endpoint, indexName } = options;
791
+ // validate and use credential
792
+ if (credential) {
793
+ // if credential are provided, ensure they are an instance of valid credential instances
794
+ if (!(credential instanceof AzureKeyCredential || credential instanceof DefaultAzureCredential || credential instanceof ManagedIdentityCredential)) {
795
+ throw new Error("options.credential must be an instance of AzureKeyCredential or DefaultAzureCredential or ManagedIdentityCredential");
796
+ }
797
+ } else {
798
+ key ??= getEnv("AZURE_AI_SEARCH_KEY");
799
+ if (key) {
800
+ consoleLogger.log("Using provided Azure Search key");
801
+ credential = new AzureKeyCredential(key);
802
+ } else {
803
+ const clientId = getEnv("AZURE_CLIENT_ID");
804
+ if (clientId) {
805
+ consoleLogger.log("Using Azure Managed identity");
806
+ credential = new ManagedIdentityCredential(clientId);
807
+ } else {
808
+ // if key wasn't provided, try using DefaultAzureCredential
809
+ consoleLogger.log("Using Default Azure Credential");
810
+ credential = new DefaultAzureCredential();
811
+ }
812
+ }
813
+ }
814
+ // validate and use endpoint
815
+ endpoint ??= getEnv("AZURE_AI_SEARCH_ENDPOINT");
816
+ if (!endpoint) {
817
+ throw new Error("options.endpoint must be provided or set as an environment variable: AZURE_AI_SEARCH_ENDPOINT");
818
+ } else {
819
+ // check if enpoint is a valid URL
820
+ try {
821
+ new URL(endpoint);
822
+ } catch (error) {
823
+ throw new Error(`options.endpoint must be a valid URL.`);
824
+ }
825
+ }
826
+ // validate and use indexName
827
+ if (!indexName) {
828
+ if (this._searchClient) {
829
+ indexName = this._searchClient.indexName;
830
+ } else {
831
+ throw new Error("options.indexName must be provided");
832
+ }
833
+ }
834
+ return {
835
+ credential,
836
+ endpoint,
837
+ indexName
838
+ };
839
+ }
840
+ createSearchIndexClient(options) {
841
+ const { credential, endpoint } = this.#buildCredentials(options);
842
+ this._indexClient = new SearchIndexClient(endpoint, credential, {
843
+ serviceVersion: this.#serviceApiVersion,
844
+ userAgentOptions: {
845
+ userAgentPrefix: options.userAgent ?? AzureAISearchVectorStoreConfig.DEFAULT_USER_AGENT_PREFIX
846
+ }
847
+ });
848
+ }
849
+ createSearchClient(options) {
850
+ const { credential, endpoint, indexName } = this.#buildCredentials(options);
851
+ this._searchClient = new SearchClient(endpoint, indexName, credential, {
852
+ serviceVersion: this.#serviceApiVersion,
853
+ userAgentOptions: {
854
+ userAgentPrefix: options.userAgent ?? AzureAISearchVectorStoreConfig.DEFAULT_USER_AGENT_PREFIX
855
+ }
856
+ });
857
+ }
858
+ async #validateIndex(indexName) {
859
+ if (this._indexClient && indexName && !await this.#indexExists(indexName)) {
860
+ throw new Error(`Validation failed, index ${indexName} does not exist.`);
861
+ }
862
+ }
863
+ /**
864
+ * Create AI Search index document from embedding result.
865
+ * @param node The node to create the index document from
866
+ * @returns The mapped index document from the node
867
+ */ #createIndexDocument(node) {
868
+ consoleLogger.log(`Mapping indexed document: ${node.id_}`);
869
+ const metadata = nodeToMetadata(node, true, this.#chunkFieldKey, this.flatMetadata);
870
+ return this.#indexMapping(node, metadata);
871
+ }
872
+ /**
873
+ * Generate an OData filter string using supplied metadata filters.
874
+ * @param metadataFilters
875
+ * @returns
876
+ */ #createOdataFilter(metadataFilters) {
877
+ const odataFilter = [];
878
+ for (const subfilter of metadataFilters.filters){
879
+ // Join values with ' or ' to create an OR condition inside the any function
880
+ const metadataMapping = this.#metadataToIndexFieldMap.get(subfilter.key);
881
+ if (!metadataMapping) {
882
+ throw new Error(`Metadata field '${subfilter.key}' is missing a mapping to an index field. Please provide an entry in 'filterableMetadataFieldKeys' for this vector store.`);
883
+ }
884
+ const indexField = metadataMapping[0];
885
+ if (subfilter.operator === FilterOperator.IN) {
886
+ let valueStr;
887
+ if (Array.isArray(subfilter.value)) {
888
+ valueStr = subfilter.value.map((value)=>typeof value === "string" ? `t eq '${value}'` : `t eq ${value}`).join(" or ");
889
+ } else {
890
+ valueStr = typeof subfilter.value === "string" ? `t eq '${subfilter.value}'` : `t eq ${subfilter.value}`;
891
+ }
892
+ odataFilter.push(`${indexField}/any(t: ${valueStr})`);
893
+ } else if (subfilter.operator === FilterOperator.EQ) {
894
+ const escapedValue = typeof subfilter.value === "string" ? subfilter.value.replace(/'/g, "''") : subfilter.value;
895
+ odataFilter.push(`${indexField} eq '${escapedValue}'`);
896
+ } else {
897
+ throw new Error(`Unsupported filter operator ${subfilter.operator}. Supported operators are 'IN' and 'EQ'`);
898
+ }
899
+ }
900
+ let odataExpr = "";
901
+ if (metadataFilters.condition === FilterCondition.AND) {
902
+ odataExpr = odataFilter.join(" and ");
903
+ } else if (metadataFilters.condition === FilterCondition.OR) {
904
+ odataExpr = odataFilter.join(" or ");
905
+ } else {
906
+ throw new Error(`Unsupported filter condition ${metadataFilters.condition}. Supported conditions are 'AND' and 'OR'`);
907
+ }
908
+ consoleLogger.log(`OData filter: ${odataExpr}`);
909
+ return odataExpr;
910
+ }
911
+ #createNodeFromResult(result, fieldMapping) {
912
+ const { document } = result;
913
+ const metadataStr = document[fieldMapping["metadata"]];
914
+ const metadata = metadataStr ? JSON.parse(metadataStr) : {};
915
+ try {
916
+ const node = metadataDictToNode(metadata);
917
+ node.setContent(document[fieldMapping["chunk"]]);
918
+ node.embedding = document[fieldMapping["embedding"]];
919
+ return node;
920
+ } catch (error) {
921
+ throw new Error(`Failed to create node from search result`);
922
+ }
923
+ }
924
+ #buildFilterString(fieldMapping, nodeIds, filters) {
925
+ let filterStr = "";
926
+ if (nodeIds && nodeIds.length > 0) {
927
+ filterStr = nodeIds.map((nodeId)=>`${fieldMapping["id"]} eq '${nodeId}'`).join(" or ");
928
+ }
929
+ if (filters) {
930
+ const metadataFilter = this.#createOdataFilter(filters);
931
+ if (filterStr) {
932
+ filterStr = `(${filterStr}) or (${metadataFilter})`;
933
+ } else {
934
+ filterStr = metadataFilter;
935
+ }
936
+ }
937
+ return filterStr;
938
+ }
939
+ #processBatchResults(batchNodes, nodes, batchSize, limit) {
940
+ if (batchNodes.length === 0) {
941
+ return [
942
+ nodes,
943
+ false
944
+ ];
945
+ }
946
+ nodes = [
947
+ ...nodes,
948
+ ...batchNodes
949
+ ];
950
+ // If we've hit the requested limit, stop
951
+ if (limit && nodes.length >= limit) {
952
+ return [
953
+ nodes.slice(0, limit),
954
+ false
955
+ ];
956
+ }
957
+ // If we got fewer results than batch size, we've hit the end
958
+ if (batchNodes.length < batchSize) {
959
+ return [
960
+ nodes,
961
+ false
962
+ ];
963
+ }
964
+ return [
965
+ nodes,
966
+ true
967
+ ];
968
+ }
969
+ // public
970
+ /**
971
+ * Get search client
972
+ * @returns Azure AI Search client. See {@link SearchClient}
973
+ */ client() {
974
+ return this._searchClient;
975
+ }
976
+ /**
977
+ * Get index client
978
+ * @returns Azure AI Search index client. See {@link SearchIndexClient}
979
+ */ indexClient() {
980
+ return this._indexClient;
981
+ }
982
+ /**
983
+ * Add nodes to index associated with the configured search client.
984
+ * @param nodes List of nodes with embeddings to add to the index
985
+ * @returns List of node IDs that were added to the index
986
+ */ async add(nodes) {
987
+ if (!this._searchClient) {
988
+ throw new Error("Async Search client not initialized");
989
+ }
990
+ if (!nodes || nodes.length === 0) {
991
+ return [];
992
+ }
993
+ if (nodes.length > 0) {
994
+ if (this.#indexManagement === "CreateIfNotExists" && this.#indexName) {
995
+ await this.#createIndexIfNotExists(this.#indexName);
996
+ }
997
+ if (this.#indexManagement === "ValidateIndex") {
998
+ await this.#validateIndex(this.#indexName);
999
+ }
1000
+ }
1001
+ const accumulator = new IndexDocumentsBatch();
1002
+ let documents = [];
1003
+ const ids = [];
1004
+ let accumulatedSize = 0;
1005
+ const maxSize = AzureAISearchVectorStoreConfig.DEFAULT_MAX_MB_SIZE;
1006
+ const maxDocs = AzureAISearchVectorStoreConfig.DEFAULT_MAX_BATCH_SIZE;
1007
+ for (const node of nodes){
1008
+ consoleLogger.log(`Processing embedding: ${node.id_}`);
1009
+ const indexDocument = this.#createIndexDocument(node);
1010
+ const documentSize = JSON.stringify(indexDocument).length; // in bytes
1011
+ documents.push(indexDocument);
1012
+ accumulatedSize += documentSize;
1013
+ accumulator.upload(documents);
1014
+ if (documents.length >= maxDocs || accumulatedSize >= maxSize) {
1015
+ consoleLogger.log(`Uploading batch of size ${documents.length}, current progress ${ids.length} of ${nodes.length}, accumulated size ${(accumulatedSize / (1024 * 1024)).toFixed(2)} MB`);
1016
+ await this._searchClient.indexDocuments(accumulator);
1017
+ documents = [];
1018
+ accumulatedSize = 0;
1019
+ }
1020
+ ids.push(node.id_);
1021
+ }
1022
+ if (documents.length > 0) {
1023
+ consoleLogger.log(`Uploading remaining batch of size ${documents.length}, current progress ${ids.length} of ${nodes.length}, accumulated size ${(accumulatedSize / (1024 * 1024)).toFixed(2)} MB`);
1024
+ await this._searchClient.indexDocuments(accumulator);
1025
+ }
1026
+ return ids;
1027
+ }
1028
+ /**
1029
+ * Delete documents from the AI Search Index with docIdFieldKey (doc_id) field equal to refDocId.
1030
+ * @param refDocId The reference document ID to delete from the index
1031
+ */ async delete(refDocId) {
1032
+ // Check if index exists
1033
+ if (!await this.#indexExists(this.#indexName)) {
1034
+ return;
1035
+ }
1036
+ if (!this._searchClient) {
1037
+ throw new Error("searchClient is not initialized");
1038
+ }
1039
+ // Define filter and batch size
1040
+ const filterExpr = `${this.#fieldMapping["doc_id"]} eq '${refDocId}'`;
1041
+ const batchSize = 1000;
1042
+ while(true){
1043
+ // Search for documents to delete
1044
+ consoleLogger.log(`Searching with filter ${filterExpr}`);
1045
+ const searchResults = await this._searchClient.search("*", {
1046
+ filter: filterExpr,
1047
+ top: batchSize
1048
+ });
1049
+ // Collect document IDs to delete
1050
+ const docsToDelete = [];
1051
+ for await (const result of searchResults.results){
1052
+ const { document } = result;
1053
+ docsToDelete.push(document);
1054
+ }
1055
+ // Delete documents if found
1056
+ if (docsToDelete.length > 0) {
1057
+ consoleLogger.log(`Deleting ${docsToDelete.length} documents`);
1058
+ await this._searchClient.deleteDocuments(docsToDelete);
1059
+ } else {
1060
+ consoleLogger.log("No documents found to delete");
1061
+ break;
1062
+ }
1063
+ }
1064
+ }
1065
+ /**
1066
+ * Get nodes asynchronously from the Azure AI Search index.
1067
+ * @param nodeIds List of node IDs to retrieve from the index
1068
+ * @param filters Metadata filters to apply to the search
1069
+ * @param limit Maximum number of nodes to retrieve
1070
+ * @returns List of nodes retrieved from the index
1071
+ */ async getNodes(nodeIds, filters, limit) {
1072
+ if (!this._searchClient) {
1073
+ throw new Error("SearchClient not initialized");
1074
+ }
1075
+ const filterStr = this.#buildFilterString(this.#fieldMapping, nodeIds, filters);
1076
+ const nodes = [];
1077
+ const batchSize = 1000; // Azure Search batch size limit
1078
+ while(true){
1079
+ try {
1080
+ const searchRequest = createSearchRequest(this.#fieldMapping, filterStr, batchSize, nodes.length);
1081
+ const results = await this._searchClient.search("*", searchRequest);
1082
+ const batchNodes = [];
1083
+ for await (const result of results.results){
1084
+ batchNodes.push(this.#createNodeFromResult(result, this.#fieldMapping));
1085
+ }
1086
+ const [updatedNodes, continueFetching] = this.#processBatchResults(batchNodes, nodes, batchSize, limit);
1087
+ nodes.push(...updatedNodes);
1088
+ if (!continueFetching) {
1089
+ break;
1090
+ }
1091
+ } catch (error) {
1092
+ throw new Error(`Failed to get nodes from Azure AI Search: ${error}`);
1093
+ }
1094
+ }
1095
+ return nodes;
1096
+ }
1097
+ async query(query) {
1098
+ let odataFilter;
1099
+ if (query.filters) {
1100
+ odataFilter = this.#createOdataFilter(query.filters);
1101
+ consoleLogger.log(`Querying with OData filter: ${odataFilter}`);
1102
+ }
1103
+ consoleLogger.log({
1104
+ query
1105
+ });
1106
+ // Define base AzureQueryResultSearch object based on query mode
1107
+ let azureQueryResultSearch = new AzureQueryResultSearchDefault(query, this.#fieldMapping, odataFilter, this._searchClient);
1108
+ switch(query.mode){
1109
+ case VectorStoreQueryMode.SPARSE:
1110
+ azureQueryResultSearch = new AzureQueryResultSearchSparse(query, this.#fieldMapping, odataFilter, this._searchClient);
1111
+ break;
1112
+ case VectorStoreQueryMode.HYBRID:
1113
+ azureQueryResultSearch = new AzureQueryResultSearchHybrid(query, this.#fieldMapping, odataFilter, this._searchClient);
1114
+ break;
1115
+ case VectorStoreQueryMode.SEMANTIC_HYBRID:
1116
+ azureQueryResultSearch = new AzureQueryResultSearchSemanticHybrid(query, this.#fieldMapping, odataFilter, this._searchClient);
1117
+ break;
1118
+ }
1119
+ // Execute the search and return the result
1120
+ return await azureQueryResultSearch.search();
1121
+ }
1122
+ }
1123
+
1124
+ var version = "0.1.0";
1125
+ var pkg = {
1126
+ version: version};
1127
+
1128
+ /** Azure Cosmos DB for MongoDB vCore Similarity type. */ const AzureCosmosDBMongoDBSimilarityType = {
1129
+ /** Cosine similarity */ COS: "COS",
1130
+ /** Inner - product */ IP: "IP",
1131
+ /** Euclidian distance */ L2: "L2"
1132
+ };
1133
+ /**
1134
+ * Azure Cosmos DB for MongoDB vCore vector store.
1135
+ * To use this, you should have both:
1136
+ * - the `mongodb` NPM package installed
1137
+ * - a connection string associated with a MongoDB VCore Cluster
1138
+ *
1139
+ * You do not need to create a database or collection, it will be created
1140
+ * automatically.
1141
+ *
1142
+ * You also need an index on the collection, which is by default be created
1143
+ * automatically using the `createIndex` method.
1144
+ */ class AzureCosmosDBMongoDBVectorStore extends BaseVectorStore {
1145
+ constructor(init){
1146
+ super(init), this.storesText = true, this.flatMetadata = true;
1147
+ if (init.mongodbClient) {
1148
+ this.mongodbClient = init.mongodbClient;
1149
+ } else {
1150
+ const mongoUri = getEnv("AZURE_COSMOSDB_MONGODB_CONNECTION_STRING");
1151
+ if (!mongoUri) {
1152
+ throw new Error("AzureCosmosDBMongoDBVectorStore client or connection string must be set.");
1153
+ }
1154
+ this.mongodbClient = new MongoClient(mongoUri, {
1155
+ appName: "VECTORSTORES_JS"
1156
+ });
1157
+ }
1158
+ this.mongodbClient.appendMetadata({
1159
+ name: "VECTORSTORES_AZURE_COSMOS_VCORE_VECTOR_STORE",
1160
+ version: pkg.version
1161
+ });
1162
+ this.dbName = init.dbName ?? "documentsDB";
1163
+ this.collectionName = init.collectionName ?? "documents";
1164
+ this.indexedMetadataFields = init.indexedMetadataFields ?? [];
1165
+ this.indexName = init.indexName ?? "vectorSearchIndex";
1166
+ this.embeddingKey = init.embeddingKey ?? "vectorContent";
1167
+ this.idKey = init.idKey ?? "id";
1168
+ this.textKey = init.textKey ?? "text";
1169
+ this.metadataKey = init.metadataKey ?? "metadata";
1170
+ this.indexOptions = init.indexOptions ?? {};
1171
+ this.database = this.mongodbClient.db(this.dbName);
1172
+ }
1173
+ client() {
1174
+ return this.mongodbClient;
1175
+ }
1176
+ async ensureCollection() {
1177
+ if (!this.collection) {
1178
+ const collection = await this.mongodbClient.db(this.dbName).createCollection(this.collectionName);
1179
+ this.collection = collection;
1180
+ }
1181
+ return this.collection;
1182
+ }
1183
+ async add(nodes) {
1184
+ if (!nodes || nodes.length === 0) {
1185
+ return [];
1186
+ }
1187
+ const dataToInsert = nodes.map((node)=>{
1188
+ const metadata = nodeToMetadata(node, true, this.textKey, this.flatMetadata);
1189
+ // Include the specified metadata fields in the top level of the document (to help filter)
1190
+ const populatedMetadata = {};
1191
+ for (const field of this.indexedMetadataFields){
1192
+ populatedMetadata[field] = metadata[field];
1193
+ }
1194
+ return {
1195
+ [this.idKey]: node.id_,
1196
+ [this.embeddingKey]: node.getEmbedding(),
1197
+ [this.textKey]: node.getContent(MetadataMode.NONE) || "",
1198
+ [this.metadataKey]: metadata,
1199
+ ...populatedMetadata
1200
+ };
1201
+ });
1202
+ const collection = await this.ensureCollection();
1203
+ const insertResult = await collection.insertMany(dataToInsert);
1204
+ return Object.values(insertResult.insertedIds).map((id)=>String(id));
1205
+ }
1206
+ /**
1207
+ * Removes specified documents from the AzureCosmosDBMongoDBVectorStore.
1208
+ * @param params Parameters for the delete operation.
1209
+ * @returns A promise that resolves when the documents have been removed.
1210
+ */ async delete(id, deleteOptions) {
1211
+ const collection = await this.ensureCollection();
1212
+ await collection.deleteMany({
1213
+ id: id
1214
+ }, deleteOptions);
1215
+ }
1216
+ async query(query, options) {
1217
+ const pipeline = [
1218
+ {
1219
+ $search: {
1220
+ cosmosSearch: {
1221
+ vector: query.queryEmbedding,
1222
+ path: this.embeddingKey,
1223
+ k: query.similarityTopK ?? 4,
1224
+ lSearch: options.lSearch ?? 40,
1225
+ efSearch: options.efSearch ?? 40,
1226
+ oversampling: options.oversampling ?? 1.0
1227
+ },
1228
+ returnStoredSource: true
1229
+ }
1230
+ }
1231
+ ];
1232
+ const collection = await this.ensureCollection();
1233
+ const cursor = await collection.aggregate(pipeline);
1234
+ const nodes = [];
1235
+ const ids = [];
1236
+ const similarities = [];
1237
+ for await (const res of (await cursor)){
1238
+ const text = res[this.textKey];
1239
+ const score = res.score;
1240
+ const id = res[this.idKey];
1241
+ const metadata = res[this.metadataKey];
1242
+ const node = metadataDictToNode(metadata);
1243
+ node.setContent(text);
1244
+ ids.push(id);
1245
+ nodes.push(node);
1246
+ similarities.push(score);
1247
+ }
1248
+ const result = {
1249
+ nodes,
1250
+ similarities,
1251
+ ids
1252
+ };
1253
+ return result;
1254
+ }
1255
+ /**
1256
+ * Creates an index on the collection with the specified index name during
1257
+ * instance construction.
1258
+ *
1259
+ * Setting the numLists parameter correctly is important for achieving good
1260
+ * accuracy and performance.
1261
+ * Since the vector store uses IVF as the indexing strategy, you should
1262
+ * create the index only after you have loaded a large enough sample
1263
+ * documents to ensure that the centroids for the respective buckets are
1264
+ * faily distributed.
1265
+ *
1266
+ * As for the compression, the following options are available:
1267
+ * - "half" - half precision compression for HNSW and IVF indexes
1268
+ * - "pq" - product quantization compression for DiskANN indexes
1269
+ * More information on the compression options can be found in the:
1270
+ * https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/product-quantization
1271
+ *
1272
+ * @param indexType Index Type for Mongo vCore index.
1273
+ * @param dimensions Number of dimensions for vector similarity.
1274
+ * The maximum number of supported dimensions is 2000.
1275
+ * If no number is provided, it will be determined automatically by
1276
+ * embedding a short text.
1277
+ * @param similarity Similarity metric to use with the IVF index.
1278
+ * Possible options are:
1279
+ * - CosmosDBSimilarityType.COS (cosine distance)
1280
+ * - CosmosDBSimilarityType.L2 (Euclidean distance)
1281
+ * - CosmosDBSimilarityType.IP (inner product)
1282
+ * @returns A promise that resolves when the index has been created.
1283
+ */ async createIndex(dimensions = undefined, indexType = "ivf", similarity = AzureCosmosDBMongoDBSimilarityType.COS) {
1284
+ let vectorLength = dimensions;
1285
+ if (vectorLength === undefined) {
1286
+ vectorLength = 1536;
1287
+ }
1288
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1289
+ const cosmosSearchOptions = {
1290
+ kind: "",
1291
+ similarity,
1292
+ dimensions: vectorLength
1293
+ };
1294
+ if (indexType === "hnsw") {
1295
+ cosmosSearchOptions.kind = "vector-hnsw";
1296
+ cosmosSearchOptions.m = this.indexOptions.m ?? 16;
1297
+ cosmosSearchOptions.efConstruction = this.indexOptions.efConstruction ?? 200;
1298
+ if (this.indexOptions.compression === "half") {
1299
+ cosmosSearchOptions.compression = "half";
1300
+ }
1301
+ } else if (indexType === "diskann") {
1302
+ cosmosSearchOptions.kind = "vector-diskann";
1303
+ cosmosSearchOptions.maxDegree = this.indexOptions.maxDegree ?? 40;
1304
+ cosmosSearchOptions.lBuild = this.indexOptions.lBuild ?? 50;
1305
+ if (this.indexOptions.compression === "pq") {
1306
+ cosmosSearchOptions.compression = "pq";
1307
+ cosmosSearchOptions.pqCompressedDims = this.indexOptions.pqCompressedDims ?? this.indexOptions.dimensions;
1308
+ cosmosSearchOptions.pqSampleSize = this.indexOptions.pqSampleSize ?? 1000;
1309
+ }
1310
+ /** Default to IVF index */ } else {
1311
+ cosmosSearchOptions.kind = "vector-ivf";
1312
+ cosmosSearchOptions.numLists = this.indexOptions.numLists ?? 100;
1313
+ if (this.indexOptions.compression === "half") {
1314
+ cosmosSearchOptions.compression = "half";
1315
+ }
1316
+ }
1317
+ const createIndexCommands = {
1318
+ createIndexes: this.collection?.collectionName,
1319
+ indexes: [
1320
+ {
1321
+ name: this.indexName,
1322
+ key: {
1323
+ [this.embeddingKey]: "cosmosSearch"
1324
+ },
1325
+ cosmosSearchOptions
1326
+ }
1327
+ ]
1328
+ };
1329
+ await this.database.command(createIndexCommands);
1330
+ }
1331
+ /**
1332
+ * Checks if the specified index name during instance construction exists
1333
+ * on the collection.
1334
+ * @returns A promise that resolves to a boolean indicating if the index exists.
1335
+ */ async checkIndexExists() {
1336
+ const collection = await this.ensureCollection();
1337
+ const indexes = await collection.listIndexes().toArray();
1338
+ return indexes.some((index)=>index.name === this.indexName);
1339
+ }
1340
+ /**
1341
+ * Deletes the index specified during instance construction if it exists.
1342
+ * @returns A promise that resolves when the index has been deleted.
1343
+ */ async deleteIndex(indexName) {
1344
+ const collection = await this.ensureCollection();
1345
+ const indexes = await collection.listIndexes().toArray();
1346
+ const indexToDelete = indexes.find((index)=>index.name === indexName);
1347
+ if (indexToDelete) {
1348
+ await collection.dropIndex(indexName);
1349
+ }
1350
+ }
1351
+ }
1352
+
1353
+ const USER_AGENT_SUFFIX = "vectorstores-cdbnosql-vectorstore-javascript";
1354
+ const DEFAULT_VECTOR_EMBEDDING_POLICY = {
1355
+ vectorEmbeddings: [
1356
+ {
1357
+ path: "/embedding",
1358
+ dataType: VectorEmbeddingDataType.Float32,
1359
+ distanceFunction: VectorEmbeddingDistanceFunction.Cosine,
1360
+ dimensions: 1536
1361
+ }
1362
+ ]
1363
+ };
1364
+ const DEFAULT_VECTOR_INDEXING_POLICY = [
1365
+ {
1366
+ path: "/embedding",
1367
+ type: VectorIndexType.QuantizedFlat
1368
+ }
1369
+ ];
1370
+ function parseConnectionString(connectionString) {
1371
+ const parts = connectionString.split(";");
1372
+ let endpoint = "";
1373
+ let accountKey = "";
1374
+ parts.forEach((part)=>{
1375
+ const [key, value] = part.split("=");
1376
+ if (key && key.trim() === "AccountEndpoint") {
1377
+ endpoint = value?.trim() ?? "";
1378
+ } else if ((key ?? "").trim() === "AccountKey") {
1379
+ accountKey = value?.trim() ?? "";
1380
+ }
1381
+ });
1382
+ if (!endpoint || !accountKey) {
1383
+ throw new Error("Invalid connection string: missing AccountEndpoint or AccountKey.");
1384
+ }
1385
+ return {
1386
+ endpoint,
1387
+ key: accountKey
1388
+ };
1389
+ }
1390
+ /**
1391
+ * utility function to build the query string for the CosmosDB query
1392
+ */ function queryBuilder(options) {
1393
+ let initialQuery = "SELECT TOP @k c[@id] as id, c[@text] as text, c[@metadata] as metadata";
1394
+ if (options.includeVectorDistance !== false) {
1395
+ initialQuery += `, VectorDistance(c[@embeddingKey],@embedding) AS SimilarityScore`;
1396
+ }
1397
+ initialQuery += ` FROM c`;
1398
+ if (options.whereClause) {
1399
+ initialQuery += ` WHERE ${options.whereClause}`;
1400
+ }
1401
+ initialQuery += ` ORDER BY VectorDistance(c[@embeddingKey],@embedding)`;
1402
+ return initialQuery;
1403
+ }
1404
+ class AzureCosmosDBNoSqlVectorStore extends BaseVectorStore {
1405
+ client() {
1406
+ return this.cosmosClient;
1407
+ }
1408
+ constructor(dbConfig){
1409
+ super(dbConfig), this.storesText = true, this.flatMetadata = true;
1410
+ if (!dbConfig.client) {
1411
+ throw new Error("CosmosClient is required for AzureCosmosDBNoSQLVectorStore initialization");
1412
+ }
1413
+ this.cosmosClient = dbConfig.client;
1414
+ const databaseName = dbConfig.databaseName ?? "vectorSearchDB";
1415
+ const containerName = dbConfig.containerName ?? "vectorSearchContainer";
1416
+ this.idKey = dbConfig.idKey ?? "id";
1417
+ this.textKey = dbConfig.textKey ?? "text";
1418
+ this.flatMetadata = dbConfig.flatMetadata ?? true;
1419
+ this.metadataKey = dbConfig.metadataKey ?? "metadata";
1420
+ const vectorEmbeddingPolicy = dbConfig.vectorEmbeddingPolicy ?? DEFAULT_VECTOR_EMBEDDING_POLICY;
1421
+ const indexingPolicy = dbConfig.indexingPolicy ?? {
1422
+ vectorIndexes: DEFAULT_VECTOR_INDEXING_POLICY
1423
+ };
1424
+ this.embeddingKey = vectorEmbeddingPolicy.vectorEmbeddings?.[0]?.path?.slice(1) ?? "";
1425
+ if (!this.embeddingKey) {
1426
+ throw new Error("AzureCosmosDBNoSQLVectorStore requires a valid vectorEmbeddings path");
1427
+ }
1428
+ // Deferring initialization to the first call to `initialize`
1429
+ this.initialize = ()=>{
1430
+ if (this.initPromise === undefined) {
1431
+ this.initPromise = this.init(this.cosmosClient, databaseName, containerName, {
1432
+ vectorEmbeddingPolicy,
1433
+ indexingPolicy,
1434
+ createContainerOptions: dbConfig.createContainerOptions,
1435
+ createDatabaseOptions: dbConfig.createDatabaseOptions
1436
+ }).catch((error)=>{
1437
+ console.error("Error during AzureCosmosDBNoSQLVectorStore initialization", error);
1438
+ });
1439
+ }
1440
+ return this.initPromise;
1441
+ };
1442
+ }
1443
+ /**
1444
+ * Static method for creating an instance using a connection string.
1445
+ * If no connection string is provided, it will attempt to use the env variable `AZURE_COSMOSDB_NOSQL_CONNECTION_STRING` as connection string.
1446
+ * @returns Instance of AzureCosmosDBNoSqlVectorStore
1447
+ */ static fromConnectionString(config = {}) {
1448
+ const cosmosConnectionString = config.connectionString || getEnv("AZURE_COSMOSDB_NOSQL_CONNECTION_STRING");
1449
+ if (!cosmosConnectionString) {
1450
+ throw new Error("Azure CosmosDB connection string must be provided");
1451
+ }
1452
+ const { endpoint, key } = parseConnectionString(cosmosConnectionString);
1453
+ const client = new CosmosClient({
1454
+ endpoint,
1455
+ key,
1456
+ userAgentSuffix: USER_AGENT_SUFFIX
1457
+ });
1458
+ return new AzureCosmosDBNoSqlVectorStore({
1459
+ ...config,
1460
+ client
1461
+ });
1462
+ }
1463
+ /**
1464
+ * Static method for creating an instance using a account endpoint and key.
1465
+ * If no endpoint and key is provided, it will attempt to use the env variable `AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT` as enpoint and `AZURE_COSMOSDB_NOSQL_ACCOUNT_KEY` as key.
1466
+ * @returns Instance of AzureCosmosDBNoSqlVectorStore
1467
+ */ static fromAccountAndKey(config = {}) {
1468
+ const cosmosEndpoint = config.endpoint || getEnv("AZURE_COSMOSDB_NOSQL_ENDPOINT");
1469
+ const cosmosKey = config.key || getEnv("AZURE_COSMOSDB_NOSQL_KEY");
1470
+ if (!cosmosEndpoint || !cosmosKey) {
1471
+ throw new Error("Azure CosmosDB account endpoint and key must be provided");
1472
+ }
1473
+ const client = new CosmosClient({
1474
+ endpoint: cosmosEndpoint,
1475
+ key: cosmosKey,
1476
+ userAgentSuffix: USER_AGENT_SUFFIX
1477
+ });
1478
+ return new AzureCosmosDBNoSqlVectorStore({
1479
+ ...config,
1480
+ client
1481
+ });
1482
+ }
1483
+ /**
1484
+ * Static method for creating an instance using account endpoint and managed identity.
1485
+ * If no endpoint and credentials are provided, it will attempt to use the env variable `AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT` as endpoint and use DefaultAzureCredential() as credentials.
1486
+ * @returns Instance of AzureCosmosDBNoSqlVectorStore
1487
+ */ static fromUriAndManagedIdentity(config = {}) {
1488
+ const cosmosEndpoint = config.endpoint || getEnv("AZURE_COSMOSDB_NOSQL_ACCOUNT_ENDPOINT");
1489
+ if (!cosmosEndpoint) {
1490
+ throw new Error("Azure CosmosDB account endpoint must be provided");
1491
+ }
1492
+ const credentials = config.credential ?? new DefaultAzureCredential();
1493
+ const client = new CosmosClient({
1494
+ endpoint: cosmosEndpoint,
1495
+ aadCredentials: credentials,
1496
+ userAgentSuffix: USER_AGENT_SUFFIX
1497
+ });
1498
+ return new AzureCosmosDBNoSqlVectorStore({
1499
+ ...config,
1500
+ client
1501
+ });
1502
+ }
1503
+ /**
1504
+ * Adds document to the CosmosDB container.
1505
+ *
1506
+ * @returns an array of document ids which were added
1507
+ */ async add(nodes) {
1508
+ await this.initialize();
1509
+ if (!nodes || nodes.length === 0) {
1510
+ return [];
1511
+ }
1512
+ const docs = nodes.map((node)=>{
1513
+ const metadata = nodeToMetadata(node, true, this.textKey, this.flatMetadata);
1514
+ return {
1515
+ [this.idKey]: node.id_,
1516
+ [this.embeddingKey]: node.getEmbedding(),
1517
+ [this.textKey]: node.getContent(MetadataMode.NONE) || "",
1518
+ [this.metadataKey]: metadata
1519
+ };
1520
+ });
1521
+ const ids = [];
1522
+ const results = await Promise.allSettled(docs.map((doc)=>this.container.items.create(doc)));
1523
+ for (const result of results){
1524
+ if (result.status === "fulfilled") {
1525
+ ids.push(result.value.resource?.id ?? "");
1526
+ } else {
1527
+ ids.push("error: could not create item");
1528
+ }
1529
+ }
1530
+ return ids;
1531
+ }
1532
+ /**
1533
+ * Delete a document from the CosmosDB container.
1534
+ *
1535
+ * @param refDocId - The id of the document to delete
1536
+ * @param deleteOptions - Any options to pass to the container.item.delete function
1537
+ * @returns Promise that resolves if the delete query did not throw an error.
1538
+ */ async delete(refDocId, deleteOptions) {
1539
+ await this.initialize();
1540
+ await this.container.item(refDocId).delete(deleteOptions);
1541
+ }
1542
+ /**
1543
+ * Performs a vector similarity search query in the CosmosDB container.
1544
+ *
1545
+ * @param query VectorStoreQuery
1546
+ * @returns List of nodes along with similarityScore
1547
+ */ async query(query, options = {}) {
1548
+ await this.initialize();
1549
+ if (!query.queryEmbedding || query.queryEmbedding.length === 0) {
1550
+ throw new Error("queryEmbedding is required for AzureCosmosDBNoSqlVectorStore query");
1551
+ }
1552
+ const params = {
1553
+ vector: query.queryEmbedding,
1554
+ k: query.similarityTopK
1555
+ };
1556
+ const builtQuery = queryBuilder(options);
1557
+ const nodes = [];
1558
+ const ids = [];
1559
+ const similarities = [];
1560
+ const queryResults = await this.container.items.query({
1561
+ query: builtQuery,
1562
+ parameters: [
1563
+ {
1564
+ name: "@k",
1565
+ value: params.k
1566
+ },
1567
+ {
1568
+ name: "@id",
1569
+ value: this.idKey
1570
+ },
1571
+ {
1572
+ name: "@text",
1573
+ value: this.textKey
1574
+ },
1575
+ {
1576
+ name: "@metadata",
1577
+ value: this.metadataKey
1578
+ },
1579
+ {
1580
+ name: "@embedding",
1581
+ value: params.vector
1582
+ },
1583
+ {
1584
+ name: "@embeddingKey",
1585
+ value: this.embeddingKey
1586
+ }
1587
+ ]
1588
+ }).fetchAll();
1589
+ for (const item of queryResults.resources){
1590
+ const node = metadataDictToNode(item["metadata"], {
1591
+ fallback: {
1592
+ id_: item["id"],
1593
+ text: item["text"],
1594
+ ...item["metadata"]
1595
+ }
1596
+ });
1597
+ node.setContent(item["text"]);
1598
+ const nodeId = item["id"];
1599
+ const nodeScore = item["SimilarityScore"];
1600
+ nodes.push(node);
1601
+ ids.push(nodeId);
1602
+ similarities.push(nodeScore);
1603
+ }
1604
+ const result = {
1605
+ nodes,
1606
+ similarities,
1607
+ ids
1608
+ };
1609
+ return result;
1610
+ }
1611
+ /**
1612
+ * Initialize the CosmosDB container.
1613
+ */ async init(client, databaseName, containerName, initOptions) {
1614
+ const { database } = await client.databases.createIfNotExists({
1615
+ ...initOptions?.createDatabaseOptions ?? {},
1616
+ id: databaseName
1617
+ });
1618
+ const { container } = await database.containers.createIfNotExists({
1619
+ ...initOptions?.createContainerOptions ?? {
1620
+ partitionKey: {
1621
+ paths: [
1622
+ "/id"
1623
+ ]
1624
+ }
1625
+ },
1626
+ indexingPolicy: initOptions.indexingPolicy || {
1627
+ vectorIndexes: DEFAULT_VECTOR_INDEXING_POLICY
1628
+ },
1629
+ vectorEmbeddingPolicy: initOptions?.vectorEmbeddingPolicy || DEFAULT_VECTOR_EMBEDDING_POLICY,
1630
+ id: containerName
1631
+ });
1632
+ this.container = container;
1633
+ }
1634
+ }
1635
+
1636
+ export { AzureAISearchVectorStore, AzureAISearchVectorStoreConfig, AzureCosmosDBMongoDBSimilarityType, AzureCosmosDBMongoDBVectorStore, AzureCosmosDBNoSqlVectorStore, AzureQueryResultSearchBase, AzureQueryResultSearchDefault, AzureQueryResultSearchHybrid, AzureQueryResultSearchSemanticHybrid, AzureQueryResultSearchSparse, IndexManagement, MetadataIndexFieldType, SimpleCosmosDBReader };