@vectorstores/elastic-search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.cjs +266 -0
- package/dist/index.d.cts +97 -0
- package/dist/index.d.ts +97 -0
- package/dist/index.edge-light.d.ts +97 -0
- package/dist/index.edge-light.js +264 -0
- package/dist/index.js +264 -0
- package/package.json +53 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) vectorstores contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
2
|
+
|
|
3
|
+
var core = require('@vectorstores/core');
|
|
4
|
+
var elasticsearch = require('@elastic/elasticsearch');
|
|
5
|
+
|
|
6
|
+
function getElasticSearchClient({ esUrl, esCloudId, esApiKey, esUsername, esPassword }) {
|
|
7
|
+
const clientOptions = {};
|
|
8
|
+
if (esUrl && esCloudId) {
|
|
9
|
+
throw new Error("Both esUrl and esCloudId cannot be provided");
|
|
10
|
+
}
|
|
11
|
+
if (esUrl) {
|
|
12
|
+
clientOptions.node = esUrl;
|
|
13
|
+
} else if (esCloudId) {
|
|
14
|
+
clientOptions.cloud = {
|
|
15
|
+
id: esCloudId
|
|
16
|
+
};
|
|
17
|
+
} else {
|
|
18
|
+
throw new Error("Either elasticsearch url or cloud id must be provided");
|
|
19
|
+
}
|
|
20
|
+
if (esApiKey) {
|
|
21
|
+
clientOptions.auth = {
|
|
22
|
+
apiKey: esApiKey
|
|
23
|
+
};
|
|
24
|
+
} else if (esUsername && esPassword) {
|
|
25
|
+
clientOptions.auth = {
|
|
26
|
+
username: esUsername,
|
|
27
|
+
password: esPassword
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
return new elasticsearch.Client(clientOptions);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
|
|
35
|
+
* It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
|
|
36
|
+
*/ class ElasticSearchVectorStore extends core.BaseVectorStore {
|
|
37
|
+
/**
|
|
38
|
+
* Creates a new instance of ElasticSearchVectorStore
|
|
39
|
+
* @param init - Configuration parameters for Elasticsearch connection and indexing
|
|
40
|
+
*/ constructor(init){
|
|
41
|
+
super(), this.storesText = true;
|
|
42
|
+
this.indexName = init.indexName;
|
|
43
|
+
this.esUrl = init.esUrl ?? undefined;
|
|
44
|
+
this.esCloudId = init.esCloudId ?? undefined;
|
|
45
|
+
this.esApiKey = init.esApiKey ?? undefined;
|
|
46
|
+
this.esUsername = init.esUsername ?? undefined;
|
|
47
|
+
this.esPassword = init.esPassword ?? undefined;
|
|
48
|
+
this.textField = init.textField ?? "content";
|
|
49
|
+
this.vectorField = init.vectorField ?? "embedding";
|
|
50
|
+
this.distanceStrategy = init.distanceStrategy ?? "cosine";
|
|
51
|
+
if (!init.esClient) {
|
|
52
|
+
this.elasticSearchClient = getElasticSearchClient({
|
|
53
|
+
esUrl: this.esUrl,
|
|
54
|
+
esCloudId: this.esCloudId,
|
|
55
|
+
esApiKey: this.esApiKey,
|
|
56
|
+
esUsername: this.esUsername,
|
|
57
|
+
esPassword: this.esPassword
|
|
58
|
+
});
|
|
59
|
+
} else {
|
|
60
|
+
this.elasticSearchClient = init.esClient;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Returns the Elasticsearch client instance
|
|
65
|
+
* @returns The configured Elasticsearch client
|
|
66
|
+
*/ client() {
|
|
67
|
+
return this.elasticSearchClient;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Creates an Elasticsearch index if it doesn't exist
|
|
71
|
+
* @param dimensions - Number of dimensions in the vector embedding
|
|
72
|
+
* @private
|
|
73
|
+
*/ async createIndexIfNotExists(dimensions) {
|
|
74
|
+
const indexExists = await this.elasticSearchClient.indices.exists({
|
|
75
|
+
index: this.indexName
|
|
76
|
+
});
|
|
77
|
+
if (!indexExists) {
|
|
78
|
+
await this.elasticSearchClient.indices.create({
|
|
79
|
+
index: this.indexName,
|
|
80
|
+
body: {
|
|
81
|
+
mappings: {
|
|
82
|
+
properties: {
|
|
83
|
+
[this.textField]: {
|
|
84
|
+
type: "text"
|
|
85
|
+
},
|
|
86
|
+
[this.vectorField]: {
|
|
87
|
+
type: "dense_vector",
|
|
88
|
+
dims: dimensions,
|
|
89
|
+
index: true,
|
|
90
|
+
similarity: this.distanceStrategy
|
|
91
|
+
},
|
|
92
|
+
metadata: {
|
|
93
|
+
properties: {
|
|
94
|
+
document_id: {
|
|
95
|
+
type: "keyword"
|
|
96
|
+
},
|
|
97
|
+
doc_id: {
|
|
98
|
+
type: "keyword"
|
|
99
|
+
},
|
|
100
|
+
ref_doc_id: {
|
|
101
|
+
type: "keyword"
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Adds nodes to the vector store
|
|
113
|
+
* @param nodes - Array of BaseNode objects to store
|
|
114
|
+
* @returns Array of node IDs that were successfully stored
|
|
115
|
+
* @throws Error if nodes don't have embeddings
|
|
116
|
+
*/ async add(nodes) {
|
|
117
|
+
if (!nodes.length) {
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
const dimensions = nodes[0]?.getEmbedding()?.length;
|
|
121
|
+
if (!dimensions) {
|
|
122
|
+
throw new Error("Embedding is required");
|
|
123
|
+
}
|
|
124
|
+
await this.createIndexIfNotExists(dimensions);
|
|
125
|
+
const operations = nodes.flatMap((node)=>[
|
|
126
|
+
{
|
|
127
|
+
index: {
|
|
128
|
+
_index: this.indexName,
|
|
129
|
+
_id: node.id_
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
[this.vectorField]: node.getEmbedding(),
|
|
134
|
+
[this.textField]: node.getContent(core.MetadataMode.NONE),
|
|
135
|
+
metadata: core.nodeToMetadata(node, true)
|
|
136
|
+
}
|
|
137
|
+
]);
|
|
138
|
+
const results = await this.elasticSearchClient.bulk({
|
|
139
|
+
operations,
|
|
140
|
+
refresh: true
|
|
141
|
+
});
|
|
142
|
+
if (results.errors) {
|
|
143
|
+
const reasons = results.items.map((result)=>result.index?.error?.reason);
|
|
144
|
+
throw new Error(`Failed to insert documents:\n${reasons.join("\n")}`);
|
|
145
|
+
}
|
|
146
|
+
return nodes.map((node)=>node.id_);
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Deletes nodes from the vector store by reference document ID
|
|
150
|
+
* @param refDocId - Reference document ID to delete
|
|
151
|
+
* @param deleteOptions - Optional deletion parameters
|
|
152
|
+
*/ async delete(refDocId, deleteOptions) {
|
|
153
|
+
await this.elasticSearchClient.deleteByQuery({
|
|
154
|
+
index: this.indexName,
|
|
155
|
+
query: {
|
|
156
|
+
term: {
|
|
157
|
+
"metadata.ref_doc_id": refDocId
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
refresh: true
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Converts metadata filters to Elasticsearch query format
|
|
165
|
+
* @param queryFilters - Metadata filters to convert
|
|
166
|
+
* @returns Elasticsearch compatible filter object
|
|
167
|
+
* @private
|
|
168
|
+
*/ toElasticSearchFilter(queryFilters) {
|
|
169
|
+
if (queryFilters.filters.length === 1) {
|
|
170
|
+
const filter = queryFilters.filters[0];
|
|
171
|
+
if (filter) {
|
|
172
|
+
return {
|
|
173
|
+
term: {
|
|
174
|
+
[`metadata.${filter.key}`]: filter.value
|
|
175
|
+
}
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return {
|
|
180
|
+
bool: {
|
|
181
|
+
must: queryFilters.filters.map((filter)=>({
|
|
182
|
+
term: {
|
|
183
|
+
[`metadata.${filter.key}`]: filter.value
|
|
184
|
+
}
|
|
185
|
+
}))
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Normalizes similarity scores to range [0,1]
|
|
191
|
+
* @param scores - Array of raw similarity scores
|
|
192
|
+
* @returns Array of normalized similarity scores
|
|
193
|
+
* @private
|
|
194
|
+
*/ toLlamaSimilarity(scores) {
|
|
195
|
+
if (!scores.length) {
|
|
196
|
+
return [];
|
|
197
|
+
}
|
|
198
|
+
const maxScore = Math.max(...scores);
|
|
199
|
+
const minScore = Math.min(...scores);
|
|
200
|
+
if (maxScore === minScore) {
|
|
201
|
+
return scores.map(()=>maxScore > 0 ? 1 : 0);
|
|
202
|
+
}
|
|
203
|
+
return scores.map((score)=>(score - minScore) / (maxScore - minScore));
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Performs a vector similarity search query
|
|
207
|
+
* @param query - Vector store query parameters
|
|
208
|
+
* @param options - Optional query parameters
|
|
209
|
+
* @returns Query results containing matching nodes, similarities, and IDs
|
|
210
|
+
* @throws Error if query embedding is not provided
|
|
211
|
+
*/ async query(query, options) {
|
|
212
|
+
if (!query.queryEmbedding) {
|
|
213
|
+
throw new Error("query embedding is not provided");
|
|
214
|
+
}
|
|
215
|
+
let elasticSearchFilter = [];
|
|
216
|
+
if (query.filters) {
|
|
217
|
+
elasticSearchFilter = [
|
|
218
|
+
this.toElasticSearchFilter(query.filters)
|
|
219
|
+
];
|
|
220
|
+
}
|
|
221
|
+
const searchResponse = await this.elasticSearchClient.search({
|
|
222
|
+
index: this.indexName,
|
|
223
|
+
size: query.similarityTopK,
|
|
224
|
+
knn: {
|
|
225
|
+
field: this.vectorField,
|
|
226
|
+
query_vector: query.queryEmbedding,
|
|
227
|
+
k: query.similarityTopK,
|
|
228
|
+
num_candidates: query.similarityTopK * 10,
|
|
229
|
+
filter: elasticSearchFilter
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
return this.getVectorSearchQueryResultFromResponse(searchResponse);
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Processes Elasticsearch response into VectorStoreQueryResult format
|
|
236
|
+
* @param res - Elasticsearch search response
|
|
237
|
+
* @returns Formatted query results
|
|
238
|
+
* @private
|
|
239
|
+
*/ getVectorSearchQueryResultFromResponse(res) {
|
|
240
|
+
const hits = res.hits.hits;
|
|
241
|
+
const topKNodes = [];
|
|
242
|
+
const topKIDs = [];
|
|
243
|
+
const topKScores = [];
|
|
244
|
+
for (const hit of hits){
|
|
245
|
+
const source = hit._source;
|
|
246
|
+
const metadata = source?.metadata ?? {};
|
|
247
|
+
const text = source?.[this.textField] ?? "";
|
|
248
|
+
const embedding = source?.[this.vectorField] ?? [];
|
|
249
|
+
const nodeId = hit._id ?? "";
|
|
250
|
+
const score = hit._score ?? 0;
|
|
251
|
+
const node = core.metadataDictToNode(metadata);
|
|
252
|
+
node.setContent(text);
|
|
253
|
+
node.embedding = embedding;
|
|
254
|
+
topKNodes.push(node);
|
|
255
|
+
topKIDs.push(nodeId);
|
|
256
|
+
topKScores.push(score);
|
|
257
|
+
}
|
|
258
|
+
return {
|
|
259
|
+
nodes: topKNodes,
|
|
260
|
+
similarities: this.toLlamaSimilarity(topKScores),
|
|
261
|
+
ids: topKIDs
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
exports.ElasticSearchVectorStore = ElasticSearchVectorStore;
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { Client } from '@elastic/elasticsearch';
|
|
2
|
+
import { BaseVectorStore, BaseNode, VectorStoreQuery, VectorStoreQueryResult } from '@vectorstores/core';
|
|
3
|
+
|
|
4
|
+
type ElasticSearchParams = {
|
|
5
|
+
indexName: string;
|
|
6
|
+
esClient?: Client;
|
|
7
|
+
esUrl?: string;
|
|
8
|
+
esCloudId?: string;
|
|
9
|
+
esApiKey?: string;
|
|
10
|
+
esUsername?: string;
|
|
11
|
+
esPassword?: string;
|
|
12
|
+
textField?: string;
|
|
13
|
+
vectorField?: string;
|
|
14
|
+
distanceStrategy?: DISTANCE_STARTEGIES;
|
|
15
|
+
};
|
|
16
|
+
declare enum DISTANCE_STARTEGIES {
|
|
17
|
+
COSINE = "cosine",
|
|
18
|
+
EUCLIDEAN = "euclidean",
|
|
19
|
+
MANHATTAN = "manhattan"
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
|
|
23
|
+
* It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
|
|
24
|
+
*/
|
|
25
|
+
declare class ElasticSearchVectorStore extends BaseVectorStore {
|
|
26
|
+
storesText: boolean;
|
|
27
|
+
private elasticSearchClient;
|
|
28
|
+
private indexName;
|
|
29
|
+
private esUrl?;
|
|
30
|
+
private esCloudId?;
|
|
31
|
+
private esApiKey?;
|
|
32
|
+
private esUsername?;
|
|
33
|
+
private esPassword?;
|
|
34
|
+
private textField;
|
|
35
|
+
private vectorField;
|
|
36
|
+
private distanceStrategy;
|
|
37
|
+
/**
|
|
38
|
+
* Creates a new instance of ElasticSearchVectorStore
|
|
39
|
+
* @param init - Configuration parameters for Elasticsearch connection and indexing
|
|
40
|
+
*/
|
|
41
|
+
constructor(init: ElasticSearchParams);
|
|
42
|
+
/**
|
|
43
|
+
* Returns the Elasticsearch client instance
|
|
44
|
+
* @returns The configured Elasticsearch client
|
|
45
|
+
*/
|
|
46
|
+
client(): Client;
|
|
47
|
+
/**
|
|
48
|
+
* Creates an Elasticsearch index if it doesn't exist
|
|
49
|
+
* @param dimensions - Number of dimensions in the vector embedding
|
|
50
|
+
* @private
|
|
51
|
+
*/
|
|
52
|
+
private createIndexIfNotExists;
|
|
53
|
+
/**
|
|
54
|
+
* Adds nodes to the vector store
|
|
55
|
+
* @param nodes - Array of BaseNode objects to store
|
|
56
|
+
* @returns Array of node IDs that were successfully stored
|
|
57
|
+
* @throws Error if nodes don't have embeddings
|
|
58
|
+
*/
|
|
59
|
+
add(nodes: BaseNode[]): Promise<string[]>;
|
|
60
|
+
/**
|
|
61
|
+
* Deletes nodes from the vector store by reference document ID
|
|
62
|
+
* @param refDocId - Reference document ID to delete
|
|
63
|
+
* @param deleteOptions - Optional deletion parameters
|
|
64
|
+
*/
|
|
65
|
+
delete(refDocId: string, deleteOptions?: object): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Converts metadata filters to Elasticsearch query format
|
|
68
|
+
* @param queryFilters - Metadata filters to convert
|
|
69
|
+
* @returns Elasticsearch compatible filter object
|
|
70
|
+
* @private
|
|
71
|
+
*/
|
|
72
|
+
private toElasticSearchFilter;
|
|
73
|
+
/**
|
|
74
|
+
* Normalizes similarity scores to range [0,1]
|
|
75
|
+
* @param scores - Array of raw similarity scores
|
|
76
|
+
* @returns Array of normalized similarity scores
|
|
77
|
+
* @private
|
|
78
|
+
*/
|
|
79
|
+
private toLlamaSimilarity;
|
|
80
|
+
/**
|
|
81
|
+
* Performs a vector similarity search query
|
|
82
|
+
* @param query - Vector store query parameters
|
|
83
|
+
* @param options - Optional query parameters
|
|
84
|
+
* @returns Query results containing matching nodes, similarities, and IDs
|
|
85
|
+
* @throws Error if query embedding is not provided
|
|
86
|
+
*/
|
|
87
|
+
query(query: VectorStoreQuery, options?: object): Promise<VectorStoreQueryResult>;
|
|
88
|
+
/**
|
|
89
|
+
* Processes Elasticsearch response into VectorStoreQueryResult format
|
|
90
|
+
* @param res - Elasticsearch search response
|
|
91
|
+
* @returns Formatted query results
|
|
92
|
+
* @private
|
|
93
|
+
*/
|
|
94
|
+
private getVectorSearchQueryResultFromResponse;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export { ElasticSearchVectorStore };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { Client } from '@elastic/elasticsearch';
|
|
2
|
+
import { BaseVectorStore, BaseNode, VectorStoreQuery, VectorStoreQueryResult } from '@vectorstores/core';
|
|
3
|
+
|
|
4
|
+
type ElasticSearchParams = {
|
|
5
|
+
indexName: string;
|
|
6
|
+
esClient?: Client;
|
|
7
|
+
esUrl?: string;
|
|
8
|
+
esCloudId?: string;
|
|
9
|
+
esApiKey?: string;
|
|
10
|
+
esUsername?: string;
|
|
11
|
+
esPassword?: string;
|
|
12
|
+
textField?: string;
|
|
13
|
+
vectorField?: string;
|
|
14
|
+
distanceStrategy?: DISTANCE_STARTEGIES;
|
|
15
|
+
};
|
|
16
|
+
declare enum DISTANCE_STARTEGIES {
|
|
17
|
+
COSINE = "cosine",
|
|
18
|
+
EUCLIDEAN = "euclidean",
|
|
19
|
+
MANHATTAN = "manhattan"
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
|
|
23
|
+
* It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
|
|
24
|
+
*/
|
|
25
|
+
declare class ElasticSearchVectorStore extends BaseVectorStore {
|
|
26
|
+
storesText: boolean;
|
|
27
|
+
private elasticSearchClient;
|
|
28
|
+
private indexName;
|
|
29
|
+
private esUrl?;
|
|
30
|
+
private esCloudId?;
|
|
31
|
+
private esApiKey?;
|
|
32
|
+
private esUsername?;
|
|
33
|
+
private esPassword?;
|
|
34
|
+
private textField;
|
|
35
|
+
private vectorField;
|
|
36
|
+
private distanceStrategy;
|
|
37
|
+
/**
|
|
38
|
+
* Creates a new instance of ElasticSearchVectorStore
|
|
39
|
+
* @param init - Configuration parameters for Elasticsearch connection and indexing
|
|
40
|
+
*/
|
|
41
|
+
constructor(init: ElasticSearchParams);
|
|
42
|
+
/**
|
|
43
|
+
* Returns the Elasticsearch client instance
|
|
44
|
+
* @returns The configured Elasticsearch client
|
|
45
|
+
*/
|
|
46
|
+
client(): Client;
|
|
47
|
+
/**
|
|
48
|
+
* Creates an Elasticsearch index if it doesn't exist
|
|
49
|
+
* @param dimensions - Number of dimensions in the vector embedding
|
|
50
|
+
* @private
|
|
51
|
+
*/
|
|
52
|
+
private createIndexIfNotExists;
|
|
53
|
+
/**
|
|
54
|
+
* Adds nodes to the vector store
|
|
55
|
+
* @param nodes - Array of BaseNode objects to store
|
|
56
|
+
* @returns Array of node IDs that were successfully stored
|
|
57
|
+
* @throws Error if nodes don't have embeddings
|
|
58
|
+
*/
|
|
59
|
+
add(nodes: BaseNode[]): Promise<string[]>;
|
|
60
|
+
/**
|
|
61
|
+
* Deletes nodes from the vector store by reference document ID
|
|
62
|
+
* @param refDocId - Reference document ID to delete
|
|
63
|
+
* @param deleteOptions - Optional deletion parameters
|
|
64
|
+
*/
|
|
65
|
+
delete(refDocId: string, deleteOptions?: object): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Converts metadata filters to Elasticsearch query format
|
|
68
|
+
* @param queryFilters - Metadata filters to convert
|
|
69
|
+
* @returns Elasticsearch compatible filter object
|
|
70
|
+
* @private
|
|
71
|
+
*/
|
|
72
|
+
private toElasticSearchFilter;
|
|
73
|
+
/**
|
|
74
|
+
* Normalizes similarity scores to range [0,1]
|
|
75
|
+
* @param scores - Array of raw similarity scores
|
|
76
|
+
* @returns Array of normalized similarity scores
|
|
77
|
+
* @private
|
|
78
|
+
*/
|
|
79
|
+
private toLlamaSimilarity;
|
|
80
|
+
/**
|
|
81
|
+
* Performs a vector similarity search query
|
|
82
|
+
* @param query - Vector store query parameters
|
|
83
|
+
* @param options - Optional query parameters
|
|
84
|
+
* @returns Query results containing matching nodes, similarities, and IDs
|
|
85
|
+
* @throws Error if query embedding is not provided
|
|
86
|
+
*/
|
|
87
|
+
query(query: VectorStoreQuery, options?: object): Promise<VectorStoreQueryResult>;
|
|
88
|
+
/**
|
|
89
|
+
* Processes Elasticsearch response into VectorStoreQueryResult format
|
|
90
|
+
* @param res - Elasticsearch search response
|
|
91
|
+
* @returns Formatted query results
|
|
92
|
+
* @private
|
|
93
|
+
*/
|
|
94
|
+
private getVectorSearchQueryResultFromResponse;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export { ElasticSearchVectorStore };
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { Client } from '@elastic/elasticsearch';
|
|
2
|
+
import { BaseVectorStore, BaseNode, VectorStoreQuery, VectorStoreQueryResult } from '@vectorstores/core';
|
|
3
|
+
|
|
4
|
+
type ElasticSearchParams = {
|
|
5
|
+
indexName: string;
|
|
6
|
+
esClient?: Client;
|
|
7
|
+
esUrl?: string;
|
|
8
|
+
esCloudId?: string;
|
|
9
|
+
esApiKey?: string;
|
|
10
|
+
esUsername?: string;
|
|
11
|
+
esPassword?: string;
|
|
12
|
+
textField?: string;
|
|
13
|
+
vectorField?: string;
|
|
14
|
+
distanceStrategy?: DISTANCE_STARTEGIES;
|
|
15
|
+
};
|
|
16
|
+
declare enum DISTANCE_STARTEGIES {
|
|
17
|
+
COSINE = "cosine",
|
|
18
|
+
EUCLIDEAN = "euclidean",
|
|
19
|
+
MANHATTAN = "manhattan"
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
|
|
23
|
+
* It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
|
|
24
|
+
*/
|
|
25
|
+
declare class ElasticSearchVectorStore extends BaseVectorStore {
|
|
26
|
+
storesText: boolean;
|
|
27
|
+
private elasticSearchClient;
|
|
28
|
+
private indexName;
|
|
29
|
+
private esUrl?;
|
|
30
|
+
private esCloudId?;
|
|
31
|
+
private esApiKey?;
|
|
32
|
+
private esUsername?;
|
|
33
|
+
private esPassword?;
|
|
34
|
+
private textField;
|
|
35
|
+
private vectorField;
|
|
36
|
+
private distanceStrategy;
|
|
37
|
+
/**
|
|
38
|
+
* Creates a new instance of ElasticSearchVectorStore
|
|
39
|
+
* @param init - Configuration parameters for Elasticsearch connection and indexing
|
|
40
|
+
*/
|
|
41
|
+
constructor(init: ElasticSearchParams);
|
|
42
|
+
/**
|
|
43
|
+
* Returns the Elasticsearch client instance
|
|
44
|
+
* @returns The configured Elasticsearch client
|
|
45
|
+
*/
|
|
46
|
+
client(): Client;
|
|
47
|
+
/**
|
|
48
|
+
* Creates an Elasticsearch index if it doesn't exist
|
|
49
|
+
* @param dimensions - Number of dimensions in the vector embedding
|
|
50
|
+
* @private
|
|
51
|
+
*/
|
|
52
|
+
private createIndexIfNotExists;
|
|
53
|
+
/**
|
|
54
|
+
* Adds nodes to the vector store
|
|
55
|
+
* @param nodes - Array of BaseNode objects to store
|
|
56
|
+
* @returns Array of node IDs that were successfully stored
|
|
57
|
+
* @throws Error if nodes don't have embeddings
|
|
58
|
+
*/
|
|
59
|
+
add(nodes: BaseNode[]): Promise<string[]>;
|
|
60
|
+
/**
|
|
61
|
+
* Deletes nodes from the vector store by reference document ID
|
|
62
|
+
* @param refDocId - Reference document ID to delete
|
|
63
|
+
* @param deleteOptions - Optional deletion parameters
|
|
64
|
+
*/
|
|
65
|
+
delete(refDocId: string, deleteOptions?: object): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Converts metadata filters to Elasticsearch query format
|
|
68
|
+
* @param queryFilters - Metadata filters to convert
|
|
69
|
+
* @returns Elasticsearch compatible filter object
|
|
70
|
+
* @private
|
|
71
|
+
*/
|
|
72
|
+
private toElasticSearchFilter;
|
|
73
|
+
/**
|
|
74
|
+
* Normalizes similarity scores to range [0,1]
|
|
75
|
+
* @param scores - Array of raw similarity scores
|
|
76
|
+
* @returns Array of normalized similarity scores
|
|
77
|
+
* @private
|
|
78
|
+
*/
|
|
79
|
+
private toLlamaSimilarity;
|
|
80
|
+
/**
|
|
81
|
+
* Performs a vector similarity search query
|
|
82
|
+
* @param query - Vector store query parameters
|
|
83
|
+
* @param options - Optional query parameters
|
|
84
|
+
* @returns Query results containing matching nodes, similarities, and IDs
|
|
85
|
+
* @throws Error if query embedding is not provided
|
|
86
|
+
*/
|
|
87
|
+
query(query: VectorStoreQuery, options?: object): Promise<VectorStoreQueryResult>;
|
|
88
|
+
/**
|
|
89
|
+
* Processes Elasticsearch response into VectorStoreQueryResult format
|
|
90
|
+
* @param res - Elasticsearch search response
|
|
91
|
+
* @returns Formatted query results
|
|
92
|
+
* @private
|
|
93
|
+
*/
|
|
94
|
+
private getVectorSearchQueryResultFromResponse;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export { ElasticSearchVectorStore };
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import { BaseVectorStore, nodeToMetadata, MetadataMode, metadataDictToNode } from '@vectorstores/core';
|
|
2
|
+
import { Client } from '@elastic/elasticsearch';
|
|
3
|
+
|
|
4
|
+
function getElasticSearchClient({ esUrl, esCloudId, esApiKey, esUsername, esPassword }) {
|
|
5
|
+
const clientOptions = {};
|
|
6
|
+
if (esUrl && esCloudId) {
|
|
7
|
+
throw new Error("Both esUrl and esCloudId cannot be provided");
|
|
8
|
+
}
|
|
9
|
+
if (esUrl) {
|
|
10
|
+
clientOptions.node = esUrl;
|
|
11
|
+
} else if (esCloudId) {
|
|
12
|
+
clientOptions.cloud = {
|
|
13
|
+
id: esCloudId
|
|
14
|
+
};
|
|
15
|
+
} else {
|
|
16
|
+
throw new Error("Either elasticsearch url or cloud id must be provided");
|
|
17
|
+
}
|
|
18
|
+
if (esApiKey) {
|
|
19
|
+
clientOptions.auth = {
|
|
20
|
+
apiKey: esApiKey
|
|
21
|
+
};
|
|
22
|
+
} else if (esUsername && esPassword) {
|
|
23
|
+
clientOptions.auth = {
|
|
24
|
+
username: esUsername,
|
|
25
|
+
password: esPassword
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
return new Client(clientOptions);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
|
|
33
|
+
* It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
|
|
34
|
+
*/ class ElasticSearchVectorStore extends BaseVectorStore {
|
|
35
|
+
/**
|
|
36
|
+
* Creates a new instance of ElasticSearchVectorStore
|
|
37
|
+
* @param init - Configuration parameters for Elasticsearch connection and indexing
|
|
38
|
+
*/ constructor(init){
|
|
39
|
+
super(), this.storesText = true;
|
|
40
|
+
this.indexName = init.indexName;
|
|
41
|
+
this.esUrl = init.esUrl ?? undefined;
|
|
42
|
+
this.esCloudId = init.esCloudId ?? undefined;
|
|
43
|
+
this.esApiKey = init.esApiKey ?? undefined;
|
|
44
|
+
this.esUsername = init.esUsername ?? undefined;
|
|
45
|
+
this.esPassword = init.esPassword ?? undefined;
|
|
46
|
+
this.textField = init.textField ?? "content";
|
|
47
|
+
this.vectorField = init.vectorField ?? "embedding";
|
|
48
|
+
this.distanceStrategy = init.distanceStrategy ?? "cosine";
|
|
49
|
+
if (!init.esClient) {
|
|
50
|
+
this.elasticSearchClient = getElasticSearchClient({
|
|
51
|
+
esUrl: this.esUrl,
|
|
52
|
+
esCloudId: this.esCloudId,
|
|
53
|
+
esApiKey: this.esApiKey,
|
|
54
|
+
esUsername: this.esUsername,
|
|
55
|
+
esPassword: this.esPassword
|
|
56
|
+
});
|
|
57
|
+
} else {
|
|
58
|
+
this.elasticSearchClient = init.esClient;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Returns the Elasticsearch client instance
|
|
63
|
+
* @returns The configured Elasticsearch client
|
|
64
|
+
*/ client() {
|
|
65
|
+
return this.elasticSearchClient;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Creates an Elasticsearch index if it doesn't exist
|
|
69
|
+
* @param dimensions - Number of dimensions in the vector embedding
|
|
70
|
+
* @private
|
|
71
|
+
*/ async createIndexIfNotExists(dimensions) {
|
|
72
|
+
const indexExists = await this.elasticSearchClient.indices.exists({
|
|
73
|
+
index: this.indexName
|
|
74
|
+
});
|
|
75
|
+
if (!indexExists) {
|
|
76
|
+
await this.elasticSearchClient.indices.create({
|
|
77
|
+
index: this.indexName,
|
|
78
|
+
body: {
|
|
79
|
+
mappings: {
|
|
80
|
+
properties: {
|
|
81
|
+
[this.textField]: {
|
|
82
|
+
type: "text"
|
|
83
|
+
},
|
|
84
|
+
[this.vectorField]: {
|
|
85
|
+
type: "dense_vector",
|
|
86
|
+
dims: dimensions,
|
|
87
|
+
index: true,
|
|
88
|
+
similarity: this.distanceStrategy
|
|
89
|
+
},
|
|
90
|
+
metadata: {
|
|
91
|
+
properties: {
|
|
92
|
+
document_id: {
|
|
93
|
+
type: "keyword"
|
|
94
|
+
},
|
|
95
|
+
doc_id: {
|
|
96
|
+
type: "keyword"
|
|
97
|
+
},
|
|
98
|
+
ref_doc_id: {
|
|
99
|
+
type: "keyword"
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Adds nodes to the vector store
|
|
111
|
+
* @param nodes - Array of BaseNode objects to store
|
|
112
|
+
* @returns Array of node IDs that were successfully stored
|
|
113
|
+
* @throws Error if nodes don't have embeddings
|
|
114
|
+
*/ async add(nodes) {
|
|
115
|
+
if (!nodes.length) {
|
|
116
|
+
return [];
|
|
117
|
+
}
|
|
118
|
+
const dimensions = nodes[0]?.getEmbedding()?.length;
|
|
119
|
+
if (!dimensions) {
|
|
120
|
+
throw new Error("Embedding is required");
|
|
121
|
+
}
|
|
122
|
+
await this.createIndexIfNotExists(dimensions);
|
|
123
|
+
const operations = nodes.flatMap((node)=>[
|
|
124
|
+
{
|
|
125
|
+
index: {
|
|
126
|
+
_index: this.indexName,
|
|
127
|
+
_id: node.id_
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
[this.vectorField]: node.getEmbedding(),
|
|
132
|
+
[this.textField]: node.getContent(MetadataMode.NONE),
|
|
133
|
+
metadata: nodeToMetadata(node, true)
|
|
134
|
+
}
|
|
135
|
+
]);
|
|
136
|
+
const results = await this.elasticSearchClient.bulk({
|
|
137
|
+
operations,
|
|
138
|
+
refresh: true
|
|
139
|
+
});
|
|
140
|
+
if (results.errors) {
|
|
141
|
+
const reasons = results.items.map((result)=>result.index?.error?.reason);
|
|
142
|
+
throw new Error(`Failed to insert documents:\n${reasons.join("\n")}`);
|
|
143
|
+
}
|
|
144
|
+
return nodes.map((node)=>node.id_);
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Deletes nodes from the vector store by reference document ID
|
|
148
|
+
* @param refDocId - Reference document ID to delete
|
|
149
|
+
* @param deleteOptions - Optional deletion parameters
|
|
150
|
+
*/ async delete(refDocId, deleteOptions) {
|
|
151
|
+
await this.elasticSearchClient.deleteByQuery({
|
|
152
|
+
index: this.indexName,
|
|
153
|
+
query: {
|
|
154
|
+
term: {
|
|
155
|
+
"metadata.ref_doc_id": refDocId
|
|
156
|
+
}
|
|
157
|
+
},
|
|
158
|
+
refresh: true
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Converts metadata filters to Elasticsearch query format
|
|
163
|
+
* @param queryFilters - Metadata filters to convert
|
|
164
|
+
* @returns Elasticsearch compatible filter object
|
|
165
|
+
* @private
|
|
166
|
+
*/ toElasticSearchFilter(queryFilters) {
|
|
167
|
+
if (queryFilters.filters.length === 1) {
|
|
168
|
+
const filter = queryFilters.filters[0];
|
|
169
|
+
if (filter) {
|
|
170
|
+
return {
|
|
171
|
+
term: {
|
|
172
|
+
[`metadata.${filter.key}`]: filter.value
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return {
|
|
178
|
+
bool: {
|
|
179
|
+
must: queryFilters.filters.map((filter)=>({
|
|
180
|
+
term: {
|
|
181
|
+
[`metadata.${filter.key}`]: filter.value
|
|
182
|
+
}
|
|
183
|
+
}))
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Normalizes similarity scores to range [0,1]
|
|
189
|
+
* @param scores - Array of raw similarity scores
|
|
190
|
+
* @returns Array of normalized similarity scores
|
|
191
|
+
* @private
|
|
192
|
+
*/ toLlamaSimilarity(scores) {
|
|
193
|
+
if (!scores.length) {
|
|
194
|
+
return [];
|
|
195
|
+
}
|
|
196
|
+
const maxScore = Math.max(...scores);
|
|
197
|
+
const minScore = Math.min(...scores);
|
|
198
|
+
if (maxScore === minScore) {
|
|
199
|
+
return scores.map(()=>maxScore > 0 ? 1 : 0);
|
|
200
|
+
}
|
|
201
|
+
return scores.map((score)=>(score - minScore) / (maxScore - minScore));
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Performs a vector similarity search query
|
|
205
|
+
* @param query - Vector store query parameters
|
|
206
|
+
* @param options - Optional query parameters
|
|
207
|
+
* @returns Query results containing matching nodes, similarities, and IDs
|
|
208
|
+
* @throws Error if query embedding is not provided
|
|
209
|
+
*/ async query(query, options) {
|
|
210
|
+
if (!query.queryEmbedding) {
|
|
211
|
+
throw new Error("query embedding is not provided");
|
|
212
|
+
}
|
|
213
|
+
let elasticSearchFilter = [];
|
|
214
|
+
if (query.filters) {
|
|
215
|
+
elasticSearchFilter = [
|
|
216
|
+
this.toElasticSearchFilter(query.filters)
|
|
217
|
+
];
|
|
218
|
+
}
|
|
219
|
+
const searchResponse = await this.elasticSearchClient.search({
|
|
220
|
+
index: this.indexName,
|
|
221
|
+
size: query.similarityTopK,
|
|
222
|
+
knn: {
|
|
223
|
+
field: this.vectorField,
|
|
224
|
+
query_vector: query.queryEmbedding,
|
|
225
|
+
k: query.similarityTopK,
|
|
226
|
+
num_candidates: query.similarityTopK * 10,
|
|
227
|
+
filter: elasticSearchFilter
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
return this.getVectorSearchQueryResultFromResponse(searchResponse);
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Processes Elasticsearch response into VectorStoreQueryResult format
|
|
234
|
+
* @param res - Elasticsearch search response
|
|
235
|
+
* @returns Formatted query results
|
|
236
|
+
* @private
|
|
237
|
+
*/ getVectorSearchQueryResultFromResponse(res) {
|
|
238
|
+
const hits = res.hits.hits;
|
|
239
|
+
const topKNodes = [];
|
|
240
|
+
const topKIDs = [];
|
|
241
|
+
const topKScores = [];
|
|
242
|
+
for (const hit of hits){
|
|
243
|
+
const source = hit._source;
|
|
244
|
+
const metadata = source?.metadata ?? {};
|
|
245
|
+
const text = source?.[this.textField] ?? "";
|
|
246
|
+
const embedding = source?.[this.vectorField] ?? [];
|
|
247
|
+
const nodeId = hit._id ?? "";
|
|
248
|
+
const score = hit._score ?? 0;
|
|
249
|
+
const node = metadataDictToNode(metadata);
|
|
250
|
+
node.setContent(text);
|
|
251
|
+
node.embedding = embedding;
|
|
252
|
+
topKNodes.push(node);
|
|
253
|
+
topKIDs.push(nodeId);
|
|
254
|
+
topKScores.push(score);
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
nodes: topKNodes,
|
|
258
|
+
similarities: this.toLlamaSimilarity(topKScores),
|
|
259
|
+
ids: topKIDs
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
export { ElasticSearchVectorStore };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import { BaseVectorStore, nodeToMetadata, MetadataMode, metadataDictToNode } from '@vectorstores/core';
|
|
2
|
+
import { Client } from '@elastic/elasticsearch';
|
|
3
|
+
|
|
4
|
+
function getElasticSearchClient({ esUrl, esCloudId, esApiKey, esUsername, esPassword }) {
|
|
5
|
+
const clientOptions = {};
|
|
6
|
+
if (esUrl && esCloudId) {
|
|
7
|
+
throw new Error("Both esUrl and esCloudId cannot be provided");
|
|
8
|
+
}
|
|
9
|
+
if (esUrl) {
|
|
10
|
+
clientOptions.node = esUrl;
|
|
11
|
+
} else if (esCloudId) {
|
|
12
|
+
clientOptions.cloud = {
|
|
13
|
+
id: esCloudId
|
|
14
|
+
};
|
|
15
|
+
} else {
|
|
16
|
+
throw new Error("Either elasticsearch url or cloud id must be provided");
|
|
17
|
+
}
|
|
18
|
+
if (esApiKey) {
|
|
19
|
+
clientOptions.auth = {
|
|
20
|
+
apiKey: esApiKey
|
|
21
|
+
};
|
|
22
|
+
} else if (esUsername && esPassword) {
|
|
23
|
+
clientOptions.auth = {
|
|
24
|
+
username: esUsername,
|
|
25
|
+
password: esPassword
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
return new Client(clientOptions);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* ElasticSearchVectorStore provides vector storage and similarity search capabilities using Elasticsearch.
|
|
33
|
+
* It extends BaseVectorStore to implement vector storage operations with Elasticsearch as the backend.
|
|
34
|
+
*/ class ElasticSearchVectorStore extends BaseVectorStore {
|
|
35
|
+
/**
|
|
36
|
+
* Creates a new instance of ElasticSearchVectorStore
|
|
37
|
+
* @param init - Configuration parameters for Elasticsearch connection and indexing
|
|
38
|
+
*/ constructor(init){
|
|
39
|
+
super(), this.storesText = true;
|
|
40
|
+
this.indexName = init.indexName;
|
|
41
|
+
this.esUrl = init.esUrl ?? undefined;
|
|
42
|
+
this.esCloudId = init.esCloudId ?? undefined;
|
|
43
|
+
this.esApiKey = init.esApiKey ?? undefined;
|
|
44
|
+
this.esUsername = init.esUsername ?? undefined;
|
|
45
|
+
this.esPassword = init.esPassword ?? undefined;
|
|
46
|
+
this.textField = init.textField ?? "content";
|
|
47
|
+
this.vectorField = init.vectorField ?? "embedding";
|
|
48
|
+
this.distanceStrategy = init.distanceStrategy ?? "cosine";
|
|
49
|
+
if (!init.esClient) {
|
|
50
|
+
this.elasticSearchClient = getElasticSearchClient({
|
|
51
|
+
esUrl: this.esUrl,
|
|
52
|
+
esCloudId: this.esCloudId,
|
|
53
|
+
esApiKey: this.esApiKey,
|
|
54
|
+
esUsername: this.esUsername,
|
|
55
|
+
esPassword: this.esPassword
|
|
56
|
+
});
|
|
57
|
+
} else {
|
|
58
|
+
this.elasticSearchClient = init.esClient;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Returns the Elasticsearch client instance
|
|
63
|
+
* @returns The configured Elasticsearch client
|
|
64
|
+
*/ client() {
|
|
65
|
+
return this.elasticSearchClient;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Creates an Elasticsearch index if it doesn't exist
|
|
69
|
+
* @param dimensions - Number of dimensions in the vector embedding
|
|
70
|
+
* @private
|
|
71
|
+
*/ async createIndexIfNotExists(dimensions) {
|
|
72
|
+
const indexExists = await this.elasticSearchClient.indices.exists({
|
|
73
|
+
index: this.indexName
|
|
74
|
+
});
|
|
75
|
+
if (!indexExists) {
|
|
76
|
+
await this.elasticSearchClient.indices.create({
|
|
77
|
+
index: this.indexName,
|
|
78
|
+
body: {
|
|
79
|
+
mappings: {
|
|
80
|
+
properties: {
|
|
81
|
+
[this.textField]: {
|
|
82
|
+
type: "text"
|
|
83
|
+
},
|
|
84
|
+
[this.vectorField]: {
|
|
85
|
+
type: "dense_vector",
|
|
86
|
+
dims: dimensions,
|
|
87
|
+
index: true,
|
|
88
|
+
similarity: this.distanceStrategy
|
|
89
|
+
},
|
|
90
|
+
metadata: {
|
|
91
|
+
properties: {
|
|
92
|
+
document_id: {
|
|
93
|
+
type: "keyword"
|
|
94
|
+
},
|
|
95
|
+
doc_id: {
|
|
96
|
+
type: "keyword"
|
|
97
|
+
},
|
|
98
|
+
ref_doc_id: {
|
|
99
|
+
type: "keyword"
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Adds nodes to the vector store
|
|
111
|
+
* @param nodes - Array of BaseNode objects to store
|
|
112
|
+
* @returns Array of node IDs that were successfully stored
|
|
113
|
+
* @throws Error if nodes don't have embeddings
|
|
114
|
+
*/ async add(nodes) {
|
|
115
|
+
if (!nodes.length) {
|
|
116
|
+
return [];
|
|
117
|
+
}
|
|
118
|
+
const dimensions = nodes[0]?.getEmbedding()?.length;
|
|
119
|
+
if (!dimensions) {
|
|
120
|
+
throw new Error("Embedding is required");
|
|
121
|
+
}
|
|
122
|
+
await this.createIndexIfNotExists(dimensions);
|
|
123
|
+
const operations = nodes.flatMap((node)=>[
|
|
124
|
+
{
|
|
125
|
+
index: {
|
|
126
|
+
_index: this.indexName,
|
|
127
|
+
_id: node.id_
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
[this.vectorField]: node.getEmbedding(),
|
|
132
|
+
[this.textField]: node.getContent(MetadataMode.NONE),
|
|
133
|
+
metadata: nodeToMetadata(node, true)
|
|
134
|
+
}
|
|
135
|
+
]);
|
|
136
|
+
const results = await this.elasticSearchClient.bulk({
|
|
137
|
+
operations,
|
|
138
|
+
refresh: true
|
|
139
|
+
});
|
|
140
|
+
if (results.errors) {
|
|
141
|
+
const reasons = results.items.map((result)=>result.index?.error?.reason);
|
|
142
|
+
throw new Error(`Failed to insert documents:\n${reasons.join("\n")}`);
|
|
143
|
+
}
|
|
144
|
+
return nodes.map((node)=>node.id_);
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Deletes nodes from the vector store by reference document ID
|
|
148
|
+
* @param refDocId - Reference document ID to delete
|
|
149
|
+
* @param deleteOptions - Optional deletion parameters
|
|
150
|
+
*/ async delete(refDocId, deleteOptions) {
|
|
151
|
+
await this.elasticSearchClient.deleteByQuery({
|
|
152
|
+
index: this.indexName,
|
|
153
|
+
query: {
|
|
154
|
+
term: {
|
|
155
|
+
"metadata.ref_doc_id": refDocId
|
|
156
|
+
}
|
|
157
|
+
},
|
|
158
|
+
refresh: true
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Converts metadata filters to Elasticsearch query format
|
|
163
|
+
* @param queryFilters - Metadata filters to convert
|
|
164
|
+
* @returns Elasticsearch compatible filter object
|
|
165
|
+
* @private
|
|
166
|
+
*/ toElasticSearchFilter(queryFilters) {
|
|
167
|
+
if (queryFilters.filters.length === 1) {
|
|
168
|
+
const filter = queryFilters.filters[0];
|
|
169
|
+
if (filter) {
|
|
170
|
+
return {
|
|
171
|
+
term: {
|
|
172
|
+
[`metadata.${filter.key}`]: filter.value
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return {
|
|
178
|
+
bool: {
|
|
179
|
+
must: queryFilters.filters.map((filter)=>({
|
|
180
|
+
term: {
|
|
181
|
+
[`metadata.${filter.key}`]: filter.value
|
|
182
|
+
}
|
|
183
|
+
}))
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Normalizes similarity scores to range [0,1]
|
|
189
|
+
* @param scores - Array of raw similarity scores
|
|
190
|
+
* @returns Array of normalized similarity scores
|
|
191
|
+
* @private
|
|
192
|
+
*/ toLlamaSimilarity(scores) {
|
|
193
|
+
if (!scores.length) {
|
|
194
|
+
return [];
|
|
195
|
+
}
|
|
196
|
+
const maxScore = Math.max(...scores);
|
|
197
|
+
const minScore = Math.min(...scores);
|
|
198
|
+
if (maxScore === minScore) {
|
|
199
|
+
return scores.map(()=>maxScore > 0 ? 1 : 0);
|
|
200
|
+
}
|
|
201
|
+
return scores.map((score)=>(score - minScore) / (maxScore - minScore));
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Performs a vector similarity search query
|
|
205
|
+
* @param query - Vector store query parameters
|
|
206
|
+
* @param options - Optional query parameters
|
|
207
|
+
* @returns Query results containing matching nodes, similarities, and IDs
|
|
208
|
+
* @throws Error if query embedding is not provided
|
|
209
|
+
*/ async query(query, options) {
|
|
210
|
+
if (!query.queryEmbedding) {
|
|
211
|
+
throw new Error("query embedding is not provided");
|
|
212
|
+
}
|
|
213
|
+
let elasticSearchFilter = [];
|
|
214
|
+
if (query.filters) {
|
|
215
|
+
elasticSearchFilter = [
|
|
216
|
+
this.toElasticSearchFilter(query.filters)
|
|
217
|
+
];
|
|
218
|
+
}
|
|
219
|
+
const searchResponse = await this.elasticSearchClient.search({
|
|
220
|
+
index: this.indexName,
|
|
221
|
+
size: query.similarityTopK,
|
|
222
|
+
knn: {
|
|
223
|
+
field: this.vectorField,
|
|
224
|
+
query_vector: query.queryEmbedding,
|
|
225
|
+
k: query.similarityTopK,
|
|
226
|
+
num_candidates: query.similarityTopK * 10,
|
|
227
|
+
filter: elasticSearchFilter
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
return this.getVectorSearchQueryResultFromResponse(searchResponse);
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Processes Elasticsearch response into VectorStoreQueryResult format
|
|
234
|
+
* @param res - Elasticsearch search response
|
|
235
|
+
* @returns Formatted query results
|
|
236
|
+
* @private
|
|
237
|
+
*/ getVectorSearchQueryResultFromResponse(res) {
|
|
238
|
+
const hits = res.hits.hits;
|
|
239
|
+
const topKNodes = [];
|
|
240
|
+
const topKIDs = [];
|
|
241
|
+
const topKScores = [];
|
|
242
|
+
for (const hit of hits){
|
|
243
|
+
const source = hit._source;
|
|
244
|
+
const metadata = source?.metadata ?? {};
|
|
245
|
+
const text = source?.[this.textField] ?? "";
|
|
246
|
+
const embedding = source?.[this.vectorField] ?? [];
|
|
247
|
+
const nodeId = hit._id ?? "";
|
|
248
|
+
const score = hit._score ?? 0;
|
|
249
|
+
const node = metadataDictToNode(metadata);
|
|
250
|
+
node.setContent(text);
|
|
251
|
+
node.embedding = embedding;
|
|
252
|
+
topKNodes.push(node);
|
|
253
|
+
topKIDs.push(nodeId);
|
|
254
|
+
topKScores.push(score);
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
nodes: topKNodes,
|
|
258
|
+
similarities: this.toLlamaSimilarity(topKScores),
|
|
259
|
+
ids: topKIDs
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
export { ElasticSearchVectorStore };
|
package/package.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@vectorstores/elastic-search",
|
|
3
|
+
"description": "Elastic Search Storage for vectorstores",
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.cjs",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"edge-light": {
|
|
11
|
+
"types": "./dist/index.edge-light.d.ts",
|
|
12
|
+
"default": "./dist/index.edge-light.js"
|
|
13
|
+
},
|
|
14
|
+
"workerd": {
|
|
15
|
+
"types": "./dist/index.edge-light.d.ts",
|
|
16
|
+
"default": "./dist/index.edge-light.js"
|
|
17
|
+
},
|
|
18
|
+
"require": {
|
|
19
|
+
"types": "./dist/index.d.cts",
|
|
20
|
+
"default": "./dist/index.cjs"
|
|
21
|
+
},
|
|
22
|
+
"import": {
|
|
23
|
+
"types": "./dist/index.d.ts",
|
|
24
|
+
"default": "./dist/index.js"
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"files": [
|
|
29
|
+
"dist"
|
|
30
|
+
],
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "git+https://github.com/schiesser/vectorstores.git",
|
|
34
|
+
"directory": "packages/providers/storage/elastic-search"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"vitest": "^3.0.9",
|
|
38
|
+
"@vectorstores/core": "0.1.0",
|
|
39
|
+
"@vectorstores/env": "0.1.0"
|
|
40
|
+
},
|
|
41
|
+
"peerDependencies": {
|
|
42
|
+
"@vectorstores/core": "0.1.0",
|
|
43
|
+
"@vectorstores/env": "0.1.0"
|
|
44
|
+
},
|
|
45
|
+
"dependencies": {
|
|
46
|
+
"@elastic/elasticsearch": "^8.17.1"
|
|
47
|
+
},
|
|
48
|
+
"scripts": {
|
|
49
|
+
"build": "bunchee",
|
|
50
|
+
"dev": "bunchee --watch",
|
|
51
|
+
"test": "vitest run"
|
|
52
|
+
}
|
|
53
|
+
}
|