@vectorstores/elastic-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) vectorstores contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
package/dist/index.cjs ADDED
@@ -0,0 +1,266 @@
1
+ Object.defineProperty(exports, '__esModule', { value: true });
2
+
3
+ var core = require('@vectorstores/core');
4
+ var elasticsearch = require('@elastic/elasticsearch');
5
+
6
+ function getElasticSearchClient({ esUrl, esCloudId, esApiKey, esUsername, esPassword }) {
7
+ const clientOptions = {};
8
+ if (esUrl && esCloudId) {
9
+ throw new Error("Both esUrl and esCloudId cannot be provided");
10
+ }
11
+ if (esUrl) {
12
+ clientOptions.node = esUrl;
13
+ } else if (esCloudId) {
14
+ clientOptions.cloud = {
15
+ id: esCloudId
16
+ };
17
+ } else {
18
+ throw new Error("Either elasticsearch url or cloud id must be provided");
19
+ }
20
+ if (esApiKey) {
21
+ clientOptions.auth = {
22
+ apiKey: esApiKey
23
+ };
24
+ } else if (esUsername && esPassword) {
25
+ clientOptions.auth = {
26
+ username: esUsername,
27
+ password: esPassword
28
+ };
29
+ }
30
+ return new elasticsearch.Client(clientOptions);
31
+ }
32
+
33
+ /**
34
+ * ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
35
+ * It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
36
+ */ class ElasticSearchVectorStore extends core.BaseVectorStore {
37
+ /**
38
+ * Creates a new instance of ElasticSearchVectorStore
39
+ * @param init - Configuration parameters for Elasticsearch connection and indexing
40
+ */ constructor(init){
41
+ super(), this.storesText = true;
42
+ this.indexName = init.indexName;
43
+ this.esUrl = init.esUrl ?? undefined;
44
+ this.esCloudId = init.esCloudId ?? undefined;
45
+ this.esApiKey = init.esApiKey ?? undefined;
46
+ this.esUsername = init.esUsername ?? undefined;
47
+ this.esPassword = init.esPassword ?? undefined;
48
+ this.textField = init.textField ?? "content";
49
+ this.vectorField = init.vectorField ?? "embedding";
50
+ this.distanceStrategy = init.distanceStrategy ?? "cosine";
51
+ if (!init.esClient) {
52
+ this.elasticSearchClient = getElasticSearchClient({
53
+ esUrl: this.esUrl,
54
+ esCloudId: this.esCloudId,
55
+ esApiKey: this.esApiKey,
56
+ esUsername: this.esUsername,
57
+ esPassword: this.esPassword
58
+ });
59
+ } else {
60
+ this.elasticSearchClient = init.esClient;
61
+ }
62
+ }
63
+ /**
64
+ * Returns the Elasticsearch client instance
65
+ * @returns The configured Elasticsearch client
66
+ */ client() {
67
+ return this.elasticSearchClient;
68
+ }
69
+ /**
70
+ * Creates an Elasticsearch index if it doesn't exist
71
+ * @param dimensions - Number of dimensions in the vector embedding
72
+ * @private
73
+ */ async createIndexIfNotExists(dimensions) {
74
+ const indexExists = await this.elasticSearchClient.indices.exists({
75
+ index: this.indexName
76
+ });
77
+ if (!indexExists) {
78
+ await this.elasticSearchClient.indices.create({
79
+ index: this.indexName,
80
+ body: {
81
+ mappings: {
82
+ properties: {
83
+ [this.textField]: {
84
+ type: "text"
85
+ },
86
+ [this.vectorField]: {
87
+ type: "dense_vector",
88
+ dims: dimensions,
89
+ index: true,
90
+ similarity: this.distanceStrategy
91
+ },
92
+ metadata: {
93
+ properties: {
94
+ document_id: {
95
+ type: "keyword"
96
+ },
97
+ doc_id: {
98
+ type: "keyword"
99
+ },
100
+ ref_doc_id: {
101
+ type: "keyword"
102
+ }
103
+ }
104
+ }
105
+ }
106
+ }
107
+ }
108
+ });
109
+ }
110
+ }
111
+ /**
112
+ * Adds nodes to the vector store
113
+ * @param nodes - Array of BaseNode objects to store
114
+ * @returns Array of node IDs that were successfully stored
115
+ * @throws Error if nodes don't have embeddings
116
+ */ async add(nodes) {
117
+ if (!nodes.length) {
118
+ return [];
119
+ }
120
+ const dimensions = nodes[0]?.getEmbedding()?.length;
121
+ if (!dimensions) {
122
+ throw new Error("Embedding is required");
123
+ }
124
+ await this.createIndexIfNotExists(dimensions);
125
+ const operations = nodes.flatMap((node)=>[
126
+ {
127
+ index: {
128
+ _index: this.indexName,
129
+ _id: node.id_
130
+ }
131
+ },
132
+ {
133
+ [this.vectorField]: node.getEmbedding(),
134
+ [this.textField]: node.getContent(core.MetadataMode.NONE),
135
+ metadata: core.nodeToMetadata(node, true)
136
+ }
137
+ ]);
138
+ const results = await this.elasticSearchClient.bulk({
139
+ operations,
140
+ refresh: true
141
+ });
142
+ if (results.errors) {
143
+ const reasons = results.items.map((result)=>result.index?.error?.reason);
144
+ throw new Error(`Failed to insert documents:\n${reasons.join("\n")}`);
145
+ }
146
+ return nodes.map((node)=>node.id_);
147
+ }
148
+ /**
149
+ * Deletes nodes from the vector store by reference document ID
150
+ * @param refDocId - Reference document ID to delete
151
+ * @param deleteOptions - Optional deletion parameters
152
+ */ async delete(refDocId, deleteOptions) {
153
+ await this.elasticSearchClient.deleteByQuery({
154
+ index: this.indexName,
155
+ query: {
156
+ term: {
157
+ "metadata.ref_doc_id": refDocId
158
+ }
159
+ },
160
+ refresh: true
161
+ });
162
+ }
163
+ /**
164
+ * Converts metadata filters to Elasticsearch query format
165
+ * @param queryFilters - Metadata filters to convert
166
+ * @returns Elasticsearch compatible filter object
167
+ * @private
168
+ */ toElasticSearchFilter(queryFilters) {
169
+ if (queryFilters.filters.length === 1) {
170
+ const filter = queryFilters.filters[0];
171
+ if (filter) {
172
+ return {
173
+ term: {
174
+ [`metadata.${filter.key}`]: filter.value
175
+ }
176
+ };
177
+ }
178
+ }
179
+ return {
180
+ bool: {
181
+ must: queryFilters.filters.map((filter)=>({
182
+ term: {
183
+ [`metadata.${filter.key}`]: filter.value
184
+ }
185
+ }))
186
+ }
187
+ };
188
+ }
189
+ /**
190
+ * Normalizes similarity scores to range [0,1]
191
+ * @param scores - Array of raw similarity scores
192
+ * @returns Array of normalized similarity scores
193
+ * @private
194
+ */ toLlamaSimilarity(scores) {
195
+ if (!scores.length) {
196
+ return [];
197
+ }
198
+ const maxScore = Math.max(...scores);
199
+ const minScore = Math.min(...scores);
200
+ if (maxScore === minScore) {
201
+ return scores.map(()=>maxScore > 0 ? 1 : 0);
202
+ }
203
+ return scores.map((score)=>(score - minScore) / (maxScore - minScore));
204
+ }
205
+ /**
206
+ * Performs a vector similarity search query
207
+ * @param query - Vector store query parameters
208
+ * @param options - Optional query parameters
209
+ * @returns Query results containing matching nodes, similarities, and IDs
210
+ * @throws Error if query embedding is not provided
211
+ */ async query(query, options) {
212
+ if (!query.queryEmbedding) {
213
+ throw new Error("query embedding is not provided");
214
+ }
215
+ let elasticSearchFilter = [];
216
+ if (query.filters) {
217
+ elasticSearchFilter = [
218
+ this.toElasticSearchFilter(query.filters)
219
+ ];
220
+ }
221
+ const searchResponse = await this.elasticSearchClient.search({
222
+ index: this.indexName,
223
+ size: query.similarityTopK,
224
+ knn: {
225
+ field: this.vectorField,
226
+ query_vector: query.queryEmbedding,
227
+ k: query.similarityTopK,
228
+ num_candidates: query.similarityTopK * 10,
229
+ filter: elasticSearchFilter
230
+ }
231
+ });
232
+ return this.getVectorSearchQueryResultFromResponse(searchResponse);
233
+ }
234
+ /**
235
+ * Processes Elasticsearch response into VectorStoreQueryResult format
236
+ * @param res - Elasticsearch search response
237
+ * @returns Formatted query results
238
+ * @private
239
+ */ getVectorSearchQueryResultFromResponse(res) {
240
+ const hits = res.hits.hits;
241
+ const topKNodes = [];
242
+ const topKIDs = [];
243
+ const topKScores = [];
244
+ for (const hit of hits){
245
+ const source = hit._source;
246
+ const metadata = source?.metadata ?? {};
247
+ const text = source?.[this.textField] ?? "";
248
+ const embedding = source?.[this.vectorField] ?? [];
249
+ const nodeId = hit._id ?? "";
250
+ const score = hit._score ?? 0;
251
+ const node = core.metadataDictToNode(metadata);
252
+ node.setContent(text);
253
+ node.embedding = embedding;
254
+ topKNodes.push(node);
255
+ topKIDs.push(nodeId);
256
+ topKScores.push(score);
257
+ }
258
+ return {
259
+ nodes: topKNodes,
260
+ similarities: this.toLlamaSimilarity(topKScores),
261
+ ids: topKIDs
262
+ };
263
+ }
264
+ }
265
+
266
+ exports.ElasticSearchVectorStore = ElasticSearchVectorStore;
@@ -0,0 +1,97 @@
1
+ import { Client } from '@elastic/elasticsearch';
2
+ import { BaseVectorStore, BaseNode, VectorStoreQuery, VectorStoreQueryResult } from '@vectorstores/core';
3
+
4
+ type ElasticSearchParams = {
5
+ indexName: string;
6
+ esClient?: Client;
7
+ esUrl?: string;
8
+ esCloudId?: string;
9
+ esApiKey?: string;
10
+ esUsername?: string;
11
+ esPassword?: string;
12
+ textField?: string;
13
+ vectorField?: string;
14
+ distanceStrategy?: DISTANCE_STARTEGIES;
15
+ };
16
+ declare enum DISTANCE_STARTEGIES {
17
+ COSINE = "cosine",
18
+ EUCLIDEAN = "euclidean",
19
+ MANHATTAN = "manhattan"
20
+ }
21
+ /**
22
+ * ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
23
+ * It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
24
+ */
25
+ declare class ElasticSearchVectorStore extends BaseVectorStore {
26
+ storesText: boolean;
27
+ private elasticSearchClient;
28
+ private indexName;
29
+ private esUrl?;
30
+ private esCloudId?;
31
+ private esApiKey?;
32
+ private esUsername?;
33
+ private esPassword?;
34
+ private textField;
35
+ private vectorField;
36
+ private distanceStrategy;
37
+ /**
38
+ * Creates a new instance of ElasticSearchVectorStore
39
+ * @param init - Configuration parameters for Elasticsearch connection and indexing
40
+ */
41
+ constructor(init: ElasticSearchParams);
42
+ /**
43
+ * Returns the Elasticsearch client instance
44
+ * @returns The configured Elasticsearch client
45
+ */
46
+ client(): Client;
47
+ /**
48
+ * Creates an Elasticsearch index if it doesn't exist
49
+ * @param dimensions - Number of dimensions in the vector embedding
50
+ * @private
51
+ */
52
+ private createIndexIfNotExists;
53
+ /**
54
+ * Adds nodes to the vector store
55
+ * @param nodes - Array of BaseNode objects to store
56
+ * @returns Array of node IDs that were successfully stored
57
+ * @throws Error if nodes don't have embeddings
58
+ */
59
+ add(nodes: BaseNode[]): Promise<string[]>;
60
+ /**
61
+ * Deletes nodes from the vector store by reference document ID
62
+ * @param refDocId - Reference document ID to delete
63
+ * @param deleteOptions - Optional deletion parameters
64
+ */
65
+ delete(refDocId: string, deleteOptions?: object): Promise<void>;
66
+ /**
67
+ * Converts metadata filters to Elasticsearch query format
68
+ * @param queryFilters - Metadata filters to convert
69
+ * @returns Elasticsearch compatible filter object
70
+ * @private
71
+ */
72
+ private toElasticSearchFilter;
73
+ /**
74
+ * Normalizes similarity scores to range [0,1]
75
+ * @param scores - Array of raw similarity scores
76
+ * @returns Array of normalized similarity scores
77
+ * @private
78
+ */
79
+ private toLlamaSimilarity;
80
+ /**
81
+ * Performs a vector similarity search query
82
+ * @param query - Vector store query parameters
83
+ * @param options - Optional query parameters
84
+ * @returns Query results containing matching nodes, similarities, and IDs
85
+ * @throws Error if query embedding is not provided
86
+ */
87
+ query(query: VectorStoreQuery, options?: object): Promise<VectorStoreQueryResult>;
88
+ /**
89
+ * Processes Elasticsearch response into VectorStoreQueryResult format
90
+ * @param res - Elasticsearch search response
91
+ * @returns Formatted query results
92
+ * @private
93
+ */
94
+ private getVectorSearchQueryResultFromResponse;
95
+ }
96
+
97
+ export { ElasticSearchVectorStore };
@@ -0,0 +1,97 @@
1
+ import { Client } from '@elastic/elasticsearch';
2
+ import { BaseVectorStore, BaseNode, VectorStoreQuery, VectorStoreQueryResult } from '@vectorstores/core';
3
+
4
+ type ElasticSearchParams = {
5
+ indexName: string;
6
+ esClient?: Client;
7
+ esUrl?: string;
8
+ esCloudId?: string;
9
+ esApiKey?: string;
10
+ esUsername?: string;
11
+ esPassword?: string;
12
+ textField?: string;
13
+ vectorField?: string;
14
+ distanceStrategy?: DISTANCE_STARTEGIES;
15
+ };
16
+ declare enum DISTANCE_STARTEGIES {
17
+ COSINE = "cosine",
18
+ EUCLIDEAN = "euclidean",
19
+ MANHATTAN = "manhattan"
20
+ }
21
+ /**
22
+ * ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
23
+ * It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
24
+ */
25
+ declare class ElasticSearchVectorStore extends BaseVectorStore {
26
+ storesText: boolean;
27
+ private elasticSearchClient;
28
+ private indexName;
29
+ private esUrl?;
30
+ private esCloudId?;
31
+ private esApiKey?;
32
+ private esUsername?;
33
+ private esPassword?;
34
+ private textField;
35
+ private vectorField;
36
+ private distanceStrategy;
37
+ /**
38
+ * Creates a new instance of ElasticSearchVectorStore
39
+ * @param init - Configuration parameters for Elasticsearch connection and indexing
40
+ */
41
+ constructor(init: ElasticSearchParams);
42
+ /**
43
+ * Returns the Elasticsearch client instance
44
+ * @returns The configured Elasticsearch client
45
+ */
46
+ client(): Client;
47
+ /**
48
+ * Creates an Elasticsearch index if it doesn't exist
49
+ * @param dimensions - Number of dimensions in the vector embedding
50
+ * @private
51
+ */
52
+ private createIndexIfNotExists;
53
+ /**
54
+ * Adds nodes to the vector store
55
+ * @param nodes - Array of BaseNode objects to store
56
+ * @returns Array of node IDs that were successfully stored
57
+ * @throws Error if nodes don't have embeddings
58
+ */
59
+ add(nodes: BaseNode[]): Promise<string[]>;
60
+ /**
61
+ * Deletes nodes from the vector store by reference document ID
62
+ * @param refDocId - Reference document ID to delete
63
+ * @param deleteOptions - Optional deletion parameters
64
+ */
65
+ delete(refDocId: string, deleteOptions?: object): Promise<void>;
66
+ /**
67
+ * Converts metadata filters to Elasticsearch query format
68
+ * @param queryFilters - Metadata filters to convert
69
+ * @returns Elasticsearch compatible filter object
70
+ * @private
71
+ */
72
+ private toElasticSearchFilter;
73
+ /**
74
+ * Normalizes similarity scores to range [0,1]
75
+ * @param scores - Array of raw similarity scores
76
+ * @returns Array of normalized similarity scores
77
+ * @private
78
+ */
79
+ private toLlamaSimilarity;
80
+ /**
81
+ * Performs a vector similarity search query
82
+ * @param query - Vector store query parameters
83
+ * @param options - Optional query parameters
84
+ * @returns Query results containing matching nodes, similarities, and IDs
85
+ * @throws Error if query embedding is not provided
86
+ */
87
+ query(query: VectorStoreQuery, options?: object): Promise<VectorStoreQueryResult>;
88
+ /**
89
+ * Processes Elasticsearch response into VectorStoreQueryResult format
90
+ * @param res - Elasticsearch search response
91
+ * @returns Formatted query results
92
+ * @private
93
+ */
94
+ private getVectorSearchQueryResultFromResponse;
95
+ }
96
+
97
+ export { ElasticSearchVectorStore };
@@ -0,0 +1,97 @@
1
+ import { Client } from '@elastic/elasticsearch';
2
+ import { BaseVectorStore, BaseNode, VectorStoreQuery, VectorStoreQueryResult } from '@vectorstores/core';
3
+
4
+ type ElasticSearchParams = {
5
+ indexName: string;
6
+ esClient?: Client;
7
+ esUrl?: string;
8
+ esCloudId?: string;
9
+ esApiKey?: string;
10
+ esUsername?: string;
11
+ esPassword?: string;
12
+ textField?: string;
13
+ vectorField?: string;
14
+ distanceStrategy?: DISTANCE_STARTEGIES;
15
+ };
16
+ declare enum DISTANCE_STARTEGIES {
17
+ COSINE = "cosine",
18
+ EUCLIDEAN = "euclidean",
19
+ MANHATTAN = "manhattan"
20
+ }
21
+ /**
22
+ * ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
23
+ * It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
24
+ */
25
+ declare class ElasticSearchVectorStore extends BaseVectorStore {
26
+ storesText: boolean;
27
+ private elasticSearchClient;
28
+ private indexName;
29
+ private esUrl?;
30
+ private esCloudId?;
31
+ private esApiKey?;
32
+ private esUsername?;
33
+ private esPassword?;
34
+ private textField;
35
+ private vectorField;
36
+ private distanceStrategy;
37
+ /**
38
+ * Creates a new instance of ElasticSearchVectorStore
39
+ * @param init - Configuration parameters for Elasticsearch connection and indexing
40
+ */
41
+ constructor(init: ElasticSearchParams);
42
+ /**
43
+ * Returns the Elasticsearch client instance
44
+ * @returns The configured Elasticsearch client
45
+ */
46
+ client(): Client;
47
+ /**
48
+ * Creates an Elasticsearch index if it doesn't exist
49
+ * @param dimensions - Number of dimensions in the vector embedding
50
+ * @private
51
+ */
52
+ private createIndexIfNotExists;
53
+ /**
54
+ * Adds nodes to the vector store
55
+ * @param nodes - Array of BaseNode objects to store
56
+ * @returns Array of node IDs that were successfully stored
57
+ * @throws Error if nodes don't have embeddings
58
+ */
59
+ add(nodes: BaseNode[]): Promise<string[]>;
60
+ /**
61
+ * Deletes nodes from the vector store by reference document ID
62
+ * @param refDocId - Reference document ID to delete
63
+ * @param deleteOptions - Optional deletion parameters
64
+ */
65
+ delete(refDocId: string, deleteOptions?: object): Promise<void>;
66
+ /**
67
+ * Converts metadata filters to Elasticsearch query format
68
+ * @param queryFilters - Metadata filters to convert
69
+ * @returns Elasticsearch compatible filter object
70
+ * @private
71
+ */
72
+ private toElasticSearchFilter;
73
+ /**
74
+ * Normalizes similarity scores to range [0,1]
75
+ * @param scores - Array of raw similarity scores
76
+ * @returns Array of normalized similarity scores
77
+ * @private
78
+ */
79
+ private toLlamaSimilarity;
80
+ /**
81
+ * Performs a vector similarity search query
82
+ * @param query - Vector store query parameters
83
+ * @param options - Optional query parameters
84
+ * @returns Query results containing matching nodes, similarities, and IDs
85
+ * @throws Error if query embedding is not provided
86
+ */
87
+ query(query: VectorStoreQuery, options?: object): Promise<VectorStoreQueryResult>;
88
+ /**
89
+ * Processes Elasticsearch response into VectorStoreQueryResult format
90
+ * @param res - Elasticsearch search response
91
+ * @returns Formatted query results
92
+ * @private
93
+ */
94
+ private getVectorSearchQueryResultFromResponse;
95
+ }
96
+
97
+ export { ElasticSearchVectorStore };
@@ -0,0 +1,264 @@
1
+ import { BaseVectorStore, nodeToMetadata, MetadataMode, metadataDictToNode } from '@vectorstores/core';
2
+ import { Client } from '@elastic/elasticsearch';
3
+
4
+ function getElasticSearchClient({ esUrl, esCloudId, esApiKey, esUsername, esPassword }) {
5
+ const clientOptions = {};
6
+ if (esUrl && esCloudId) {
7
+ throw new Error("Both esUrl and esCloudId cannot be provided");
8
+ }
9
+ if (esUrl) {
10
+ clientOptions.node = esUrl;
11
+ } else if (esCloudId) {
12
+ clientOptions.cloud = {
13
+ id: esCloudId
14
+ };
15
+ } else {
16
+ throw new Error("Either elasticsearch url or cloud id must be provided");
17
+ }
18
+ if (esApiKey) {
19
+ clientOptions.auth = {
20
+ apiKey: esApiKey
21
+ };
22
+ } else if (esUsername && esPassword) {
23
+ clientOptions.auth = {
24
+ username: esUsername,
25
+ password: esPassword
26
+ };
27
+ }
28
+ return new Client(clientOptions);
29
+ }
30
+
31
+ /**
32
+ * ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
33
+ * It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
34
+ */ class ElasticSearchVectorStore extends BaseVectorStore {
35
+ /**
36
+ * Creates a new instance of ElasticSearchVectorStore
37
+ * @param init - Configuration parameters for Elasticsearch connection and indexing
38
+ */ constructor(init){
39
+ super(), this.storesText = true;
40
+ this.indexName = init.indexName;
41
+ this.esUrl = init.esUrl ?? undefined;
42
+ this.esCloudId = init.esCloudId ?? undefined;
43
+ this.esApiKey = init.esApiKey ?? undefined;
44
+ this.esUsername = init.esUsername ?? undefined;
45
+ this.esPassword = init.esPassword ?? undefined;
46
+ this.textField = init.textField ?? "content";
47
+ this.vectorField = init.vectorField ?? "embedding";
48
+ this.distanceStrategy = init.distanceStrategy ?? "cosine";
49
+ if (!init.esClient) {
50
+ this.elasticSearchClient = getElasticSearchClient({
51
+ esUrl: this.esUrl,
52
+ esCloudId: this.esCloudId,
53
+ esApiKey: this.esApiKey,
54
+ esUsername: this.esUsername,
55
+ esPassword: this.esPassword
56
+ });
57
+ } else {
58
+ this.elasticSearchClient = init.esClient;
59
+ }
60
+ }
61
+ /**
62
+ * Returns the Elasticsearch client instance
63
+ * @returns The configured Elasticsearch client
64
+ */ client() {
65
+ return this.elasticSearchClient;
66
+ }
67
+ /**
68
+ * Creates an Elasticsearch index if it doesn't exist
69
+ * @param dimensions - Number of dimensions in the vector embedding
70
+ * @private
71
+ */ async createIndexIfNotExists(dimensions) {
72
+ const indexExists = await this.elasticSearchClient.indices.exists({
73
+ index: this.indexName
74
+ });
75
+ if (!indexExists) {
76
+ await this.elasticSearchClient.indices.create({
77
+ index: this.indexName,
78
+ body: {
79
+ mappings: {
80
+ properties: {
81
+ [this.textField]: {
82
+ type: "text"
83
+ },
84
+ [this.vectorField]: {
85
+ type: "dense_vector",
86
+ dims: dimensions,
87
+ index: true,
88
+ similarity: this.distanceStrategy
89
+ },
90
+ metadata: {
91
+ properties: {
92
+ document_id: {
93
+ type: "keyword"
94
+ },
95
+ doc_id: {
96
+ type: "keyword"
97
+ },
98
+ ref_doc_id: {
99
+ type: "keyword"
100
+ }
101
+ }
102
+ }
103
+ }
104
+ }
105
+ }
106
+ });
107
+ }
108
+ }
109
+ /**
110
+ * Adds nodes to the vector store
111
+ * @param nodes - Array of BaseNode objects to store
112
+ * @returns Array of node IDs that were successfully stored
113
+ * @throws Error if nodes don't have embeddings
114
+ */ async add(nodes) {
115
+ if (!nodes.length) {
116
+ return [];
117
+ }
118
+ const dimensions = nodes[0]?.getEmbedding()?.length;
119
+ if (!dimensions) {
120
+ throw new Error("Embedding is required");
121
+ }
122
+ await this.createIndexIfNotExists(dimensions);
123
+ const operations = nodes.flatMap((node)=>[
124
+ {
125
+ index: {
126
+ _index: this.indexName,
127
+ _id: node.id_
128
+ }
129
+ },
130
+ {
131
+ [this.vectorField]: node.getEmbedding(),
132
+ [this.textField]: node.getContent(MetadataMode.NONE),
133
+ metadata: nodeToMetadata(node, true)
134
+ }
135
+ ]);
136
+ const results = await this.elasticSearchClient.bulk({
137
+ operations,
138
+ refresh: true
139
+ });
140
+ if (results.errors) {
141
+ const reasons = results.items.map((result)=>result.index?.error?.reason);
142
+ throw new Error(`Failed to insert documents:\n${reasons.join("\n")}`);
143
+ }
144
+ return nodes.map((node)=>node.id_);
145
+ }
146
+ /**
147
+ * Deletes nodes from the vector store by reference document ID
148
+ * @param refDocId - Reference document ID to delete
149
+ * @param deleteOptions - Optional deletion parameters
150
+ */ async delete(refDocId, deleteOptions) {
151
+ await this.elasticSearchClient.deleteByQuery({
152
+ index: this.indexName,
153
+ query: {
154
+ term: {
155
+ "metadata.ref_doc_id": refDocId
156
+ }
157
+ },
158
+ refresh: true
159
+ });
160
+ }
161
+ /**
162
+ * Converts metadata filters to Elasticsearch query format
163
+ * @param queryFilters - Metadata filters to convert
164
+ * @returns Elasticsearch compatible filter object
165
+ * @private
166
+ */ toElasticSearchFilter(queryFilters) {
167
+ if (queryFilters.filters.length === 1) {
168
+ const filter = queryFilters.filters[0];
169
+ if (filter) {
170
+ return {
171
+ term: {
172
+ [`metadata.${filter.key}`]: filter.value
173
+ }
174
+ };
175
+ }
176
+ }
177
+ return {
178
+ bool: {
179
+ must: queryFilters.filters.map((filter)=>({
180
+ term: {
181
+ [`metadata.${filter.key}`]: filter.value
182
+ }
183
+ }))
184
+ }
185
+ };
186
+ }
187
+ /**
188
+ * Normalizes similarity scores to range [0,1]
189
+ * @param scores - Array of raw similarity scores
190
+ * @returns Array of normalized similarity scores
191
+ * @private
192
+ */ toLlamaSimilarity(scores) {
193
+ if (!scores.length) {
194
+ return [];
195
+ }
196
+ const maxScore = Math.max(...scores);
197
+ const minScore = Math.min(...scores);
198
+ if (maxScore === minScore) {
199
+ return scores.map(()=>maxScore > 0 ? 1 : 0);
200
+ }
201
+ return scores.map((score)=>(score - minScore) / (maxScore - minScore));
202
+ }
203
+ /**
204
+ * Performs a vector similarity search query
205
+ * @param query - Vector store query parameters
206
+ * @param options - Optional query parameters
207
+ * @returns Query results containing matching nodes, similarities, and IDs
208
+ * @throws Error if query embedding is not provided
209
+ */ async query(query, options) {
210
+ if (!query.queryEmbedding) {
211
+ throw new Error("query embedding is not provided");
212
+ }
213
+ let elasticSearchFilter = [];
214
+ if (query.filters) {
215
+ elasticSearchFilter = [
216
+ this.toElasticSearchFilter(query.filters)
217
+ ];
218
+ }
219
+ const searchResponse = await this.elasticSearchClient.search({
220
+ index: this.indexName,
221
+ size: query.similarityTopK,
222
+ knn: {
223
+ field: this.vectorField,
224
+ query_vector: query.queryEmbedding,
225
+ k: query.similarityTopK,
226
+ num_candidates: query.similarityTopK * 10,
227
+ filter: elasticSearchFilter
228
+ }
229
+ });
230
+ return this.getVectorSearchQueryResultFromResponse(searchResponse);
231
+ }
232
+ /**
233
+ * Processes Elasticsearch response into VectorStoreQueryResult format
234
+ * @param res - Elasticsearch search response
235
+ * @returns Formatted query results
236
+ * @private
237
+ */ getVectorSearchQueryResultFromResponse(res) {
238
+ const hits = res.hits.hits;
239
+ const topKNodes = [];
240
+ const topKIDs = [];
241
+ const topKScores = [];
242
+ for (const hit of hits){
243
+ const source = hit._source;
244
+ const metadata = source?.metadata ?? {};
245
+ const text = source?.[this.textField] ?? "";
246
+ const embedding = source?.[this.vectorField] ?? [];
247
+ const nodeId = hit._id ?? "";
248
+ const score = hit._score ?? 0;
249
+ const node = metadataDictToNode(metadata);
250
+ node.setContent(text);
251
+ node.embedding = embedding;
252
+ topKNodes.push(node);
253
+ topKIDs.push(nodeId);
254
+ topKScores.push(score);
255
+ }
256
+ return {
257
+ nodes: topKNodes,
258
+ similarities: this.toLlamaSimilarity(topKScores),
259
+ ids: topKIDs
260
+ };
261
+ }
262
+ }
263
+
264
+ export { ElasticSearchVectorStore };
package/dist/index.js ADDED
@@ -0,0 +1,264 @@
1
+ import { BaseVectorStore, nodeToMetadata, MetadataMode, metadataDictToNode } from '@vectorstores/core';
2
+ import { Client } from '@elastic/elasticsearch';
3
+
4
+ function getElasticSearchClient({ esUrl, esCloudId, esApiKey, esUsername, esPassword }) {
5
+ const clientOptions = {};
6
+ if (esUrl && esCloudId) {
7
+ throw new Error("Both esUrl and esCloudId cannot be provided");
8
+ }
9
+ if (esUrl) {
10
+ clientOptions.node = esUrl;
11
+ } else if (esCloudId) {
12
+ clientOptions.cloud = {
13
+ id: esCloudId
14
+ };
15
+ } else {
16
+ throw new Error("Either elasticsearch url or cloud id must be provided");
17
+ }
18
+ if (esApiKey) {
19
+ clientOptions.auth = {
20
+ apiKey: esApiKey
21
+ };
22
+ } else if (esUsername && esPassword) {
23
+ clientOptions.auth = {
24
+ username: esUsername,
25
+ password: esPassword
26
+ };
27
+ }
28
+ return new Client(clientOptions);
29
+ }
30
+
31
+ /**
32
+ * ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
33
+ * It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
34
+ */ class ElasticSearchVectorStore extends BaseVectorStore {
35
+ /**
36
+ * Creates a new instance of ElasticSearchVectorStore
37
+ * @param init - Configuration parameters for Elasticsearch connection and indexing
38
+ */ constructor(init){
39
+ super(), this.storesText = true;
40
+ this.indexName = init.indexName;
41
+ this.esUrl = init.esUrl ?? undefined;
42
+ this.esCloudId = init.esCloudId ?? undefined;
43
+ this.esApiKey = init.esApiKey ?? undefined;
44
+ this.esUsername = init.esUsername ?? undefined;
45
+ this.esPassword = init.esPassword ?? undefined;
46
+ this.textField = init.textField ?? "content";
47
+ this.vectorField = init.vectorField ?? "embedding";
48
+ this.distanceStrategy = init.distanceStrategy ?? "cosine";
49
+ if (!init.esClient) {
50
+ this.elasticSearchClient = getElasticSearchClient({
51
+ esUrl: this.esUrl,
52
+ esCloudId: this.esCloudId,
53
+ esApiKey: this.esApiKey,
54
+ esUsername: this.esUsername,
55
+ esPassword: this.esPassword
56
+ });
57
+ } else {
58
+ this.elasticSearchClient = init.esClient;
59
+ }
60
+ }
61
+ /**
62
+ * Returns the Elasticsearch client instance
63
+ * @returns The configured Elasticsearch client
64
+ */ client() {
65
+ return this.elasticSearchClient;
66
+ }
67
+ /**
68
+ * Creates an Elasticsearch index if it doesn't exist
69
+ * @param dimensions - Number of dimensions in the vector embedding
70
+ * @private
71
+ */ async createIndexIfNotExists(dimensions) {
72
+ const indexExists = await this.elasticSearchClient.indices.exists({
73
+ index: this.indexName
74
+ });
75
+ if (!indexExists) {
76
+ await this.elasticSearchClient.indices.create({
77
+ index: this.indexName,
78
+ body: {
79
+ mappings: {
80
+ properties: {
81
+ [this.textField]: {
82
+ type: "text"
83
+ },
84
+ [this.vectorField]: {
85
+ type: "dense_vector",
86
+ dims: dimensions,
87
+ index: true,
88
+ similarity: this.distanceStrategy
89
+ },
90
+ metadata: {
91
+ properties: {
92
+ document_id: {
93
+ type: "keyword"
94
+ },
95
+ doc_id: {
96
+ type: "keyword"
97
+ },
98
+ ref_doc_id: {
99
+ type: "keyword"
100
+ }
101
+ }
102
+ }
103
+ }
104
+ }
105
+ }
106
+ });
107
+ }
108
+ }
109
+ /**
110
+ * Adds nodes to the vector store
111
+ * @param nodes - Array of BaseNode objects to store
112
+ * @returns Array of node IDs that were successfully stored
113
+ * @throws Error if nodes don't have embeddings
114
+ */ async add(nodes) {
115
+ if (!nodes.length) {
116
+ return [];
117
+ }
118
+ const dimensions = nodes[0]?.getEmbedding()?.length;
119
+ if (!dimensions) {
120
+ throw new Error("Embedding is required");
121
+ }
122
+ await this.createIndexIfNotExists(dimensions);
123
+ const operations = nodes.flatMap((node)=>[
124
+ {
125
+ index: {
126
+ _index: this.indexName,
127
+ _id: node.id_
128
+ }
129
+ },
130
+ {
131
+ [this.vectorField]: node.getEmbedding(),
132
+ [this.textField]: node.getContent(MetadataMode.NONE),
133
+ metadata: nodeToMetadata(node, true)
134
+ }
135
+ ]);
136
+ const results = await this.elasticSearchClient.bulk({
137
+ operations,
138
+ refresh: true
139
+ });
140
+ if (results.errors) {
141
+ const reasons = results.items.map((result)=>result.index?.error?.reason);
142
+ throw new Error(`Failed to insert documents:\n${reasons.join("\n")}`);
143
+ }
144
+ return nodes.map((node)=>node.id_);
145
+ }
146
+ /**
147
+ * Deletes nodes from the vector store by reference document ID
148
+ * @param refDocId - Reference document ID to delete
149
+ * @param deleteOptions - Optional deletion parameters
150
+ */ async delete(refDocId, deleteOptions) {
151
+ await this.elasticSearchClient.deleteByQuery({
152
+ index: this.indexName,
153
+ query: {
154
+ term: {
155
+ "metadata.ref_doc_id": refDocId
156
+ }
157
+ },
158
+ refresh: true
159
+ });
160
+ }
161
+ /**
162
+ * Converts metadata filters to Elasticsearch query format
163
+ * @param queryFilters - Metadata filters to convert
164
+ * @returns Elasticsearch compatible filter object
165
+ * @private
166
+ */ toElasticSearchFilter(queryFilters) {
167
+ if (queryFilters.filters.length === 1) {
168
+ const filter = queryFilters.filters[0];
169
+ if (filter) {
170
+ return {
171
+ term: {
172
+ [`metadata.${filter.key}`]: filter.value
173
+ }
174
+ };
175
+ }
176
+ }
177
+ return {
178
+ bool: {
179
+ must: queryFilters.filters.map((filter)=>({
180
+ term: {
181
+ [`metadata.${filter.key}`]: filter.value
182
+ }
183
+ }))
184
+ }
185
+ };
186
+ }
187
+ /**
188
+ * Normalizes similarity scores to range [0,1]
189
+ * @param scores - Array of raw similarity scores
190
+ * @returns Array of normalized similarity scores
191
+ * @private
192
+ */ toLlamaSimilarity(scores) {
193
+ if (!scores.length) {
194
+ return [];
195
+ }
196
+ const maxScore = Math.max(...scores);
197
+ const minScore = Math.min(...scores);
198
+ if (maxScore === minScore) {
199
+ return scores.map(()=>maxScore > 0 ? 1 : 0);
200
+ }
201
+ return scores.map((score)=>(score - minScore) / (maxScore - minScore));
202
+ }
203
+ /**
204
+ * Performs a vector similarity search query
205
+ * @param query - Vector store query parameters
206
+ * @param options - Optional query parameters
207
+ * @returns Query results containing matching nodes, similarities, and IDs
208
+ * @throws Error if query embedding is not provided
209
+ */ async query(query, options) {
210
+ if (!query.queryEmbedding) {
211
+ throw new Error("query embedding is not provided");
212
+ }
213
+ let elasticSearchFilter = [];
214
+ if (query.filters) {
215
+ elasticSearchFilter = [
216
+ this.toElasticSearchFilter(query.filters)
217
+ ];
218
+ }
219
+ const searchResponse = await this.elasticSearchClient.search({
220
+ index: this.indexName,
221
+ size: query.similarityTopK,
222
+ knn: {
223
+ field: this.vectorField,
224
+ query_vector: query.queryEmbedding,
225
+ k: query.similarityTopK,
226
+ num_candidates: query.similarityTopK * 10,
227
+ filter: elasticSearchFilter
228
+ }
229
+ });
230
+ return this.getVectorSearchQueryResultFromResponse(searchResponse);
231
+ }
232
+ /**
233
+ * Processes Elasticsearch response into VectorStoreQueryResult format
234
+ * @param res - Elasticsearch search response
235
+ * @returns Formatted query results
236
+ * @private
237
+ */ getVectorSearchQueryResultFromResponse(res) {
238
+ const hits = res.hits.hits;
239
+ const topKNodes = [];
240
+ const topKIDs = [];
241
+ const topKScores = [];
242
+ for (const hit of hits){
243
+ const source = hit._source;
244
+ const metadata = source?.metadata ?? {};
245
+ const text = source?.[this.textField] ?? "";
246
+ const embedding = source?.[this.vectorField] ?? [];
247
+ const nodeId = hit._id ?? "";
248
+ const score = hit._score ?? 0;
249
+ const node = metadataDictToNode(metadata);
250
+ node.setContent(text);
251
+ node.embedding = embedding;
252
+ topKNodes.push(node);
253
+ topKIDs.push(nodeId);
254
+ topKScores.push(score);
255
+ }
256
+ return {
257
+ nodes: topKNodes,
258
+ similarities: this.toLlamaSimilarity(topKScores),
259
+ ids: topKIDs
260
+ };
261
+ }
262
+ }
263
+
264
+ export { ElasticSearchVectorStore };
package/package.json ADDED
@@ -0,0 +1,53 @@
1
+ {
2
+ "name": "@vectorstores/elastic-search",
3
+ "description": "Elastic Search Storage for vectorstores",
4
+ "version": "0.1.0",
5
+ "type": "module",
6
+ "main": "./dist/index.cjs",
7
+ "module": "./dist/index.js",
8
+ "exports": {
9
+ ".": {
10
+ "edge-light": {
11
+ "types": "./dist/index.edge-light.d.ts",
12
+ "default": "./dist/index.edge-light.js"
13
+ },
14
+ "workerd": {
15
+ "types": "./dist/index.edge-light.d.ts",
16
+ "default": "./dist/index.edge-light.js"
17
+ },
18
+ "require": {
19
+ "types": "./dist/index.d.cts",
20
+ "default": "./dist/index.cjs"
21
+ },
22
+ "import": {
23
+ "types": "./dist/index.d.ts",
24
+ "default": "./dist/index.js"
25
+ }
26
+ }
27
+ },
28
+ "files": [
29
+ "dist"
30
+ ],
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "git+https://github.com/schiesser/vectorstores.git",
34
+ "directory": "packages/providers/storage/elastic-search"
35
+ },
36
+ "devDependencies": {
37
+ "vitest": "^3.0.9",
38
+ "@vectorstores/core": "0.1.0",
39
+ "@vectorstores/env": "0.1.0"
40
+ },
41
+ "peerDependencies": {
42
+ "@vectorstores/core": "0.1.0",
43
+ "@vectorstores/env": "0.1.0"
44
+ },
45
+ "dependencies": {
46
+ "@elastic/elasticsearch": "^8.17.1"
47
+ },
48
+ "scripts": {
49
+ "build": "bunchee",
50
+ "dev": "bunchee --watch",
51
+ "test": "vitest run"
52
+ }
53
+ }