langchain 0.0.165 → 0.0.167
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache/momento.cjs +7 -7
- package/dist/cache/momento.d.ts +1 -1
- package/dist/cache/momento.js +1 -1
- package/dist/document_loaders/fs/unstructured.cjs +40 -0
- package/dist/document_loaders/fs/unstructured.d.ts +27 -0
- package/dist/document_loaders/fs/unstructured.js +40 -0
- package/dist/embeddings/bedrock.cjs +43 -22
- package/dist/embeddings/bedrock.d.ts +11 -4
- package/dist/embeddings/bedrock.js +43 -22
- package/dist/experimental/chat_models/anthropic_functions.cjs +34 -2
- package/dist/experimental/chat_models/anthropic_functions.d.ts +15 -5
- package/dist/experimental/chat_models/anthropic_functions.js +34 -2
- package/dist/llms/yandex.cjs +100 -0
- package/dist/llms/yandex.d.ts +40 -0
- package/dist/llms/yandex.js +96 -0
- package/dist/load/import_constants.cjs +3 -0
- package/dist/load/import_constants.js +3 -0
- package/dist/load/import_map.cjs +6 -2
- package/dist/load/import_map.d.ts +4 -0
- package/dist/load/import_map.js +4 -0
- package/dist/prompts/chat.cjs +19 -2
- package/dist/prompts/chat.d.ts +1 -0
- package/dist/prompts/chat.js +19 -2
- package/dist/retrievers/multi_vector.d.ts +3 -3
- package/dist/retrievers/parent_document.cjs +6 -16
- package/dist/retrievers/parent_document.d.ts +5 -12
- package/dist/retrievers/parent_document.js +6 -16
- package/dist/retrievers/self_query/base.cjs +4 -1
- package/dist/retrievers/self_query/base.d.ts +3 -2
- package/dist/retrievers/self_query/base.js +4 -1
- package/dist/retrievers/self_query/index.cjs +2 -2
- package/dist/retrievers/self_query/index.d.ts +2 -0
- package/dist/retrievers/self_query/index.js +2 -2
- package/dist/runnables/remote.cjs +225 -0
- package/dist/runnables/remote.d.ts +28 -0
- package/dist/runnables/remote.js +221 -0
- package/dist/schema/index.cjs +1 -1
- package/dist/schema/index.d.ts +1 -1
- package/dist/schema/index.js +1 -1
- package/dist/schema/runnable/base.cjs +4 -4
- package/dist/schema/runnable/base.d.ts +9 -7
- package/dist/schema/runnable/base.js +4 -4
- package/dist/schema/runnable/remote.cjs +225 -0
- package/dist/schema/runnable/remote.d.ts +28 -0
- package/dist/schema/runnable/remote.js +221 -0
- package/dist/schema/storage.d.ts +28 -1
- package/dist/storage/encoder_backed.cjs +14 -2
- package/dist/storage/encoder_backed.d.ts +2 -0
- package/dist/storage/encoder_backed.js +12 -1
- package/dist/storage/in_memory.cjs +1 -1
- package/dist/storage/in_memory.js +1 -1
- package/dist/storage/ioredis.cjs +4 -4
- package/dist/storage/ioredis.js +4 -4
- package/dist/storage/vercel_kv.cjs +146 -0
- package/dist/storage/vercel_kv.d.ts +46 -0
- package/dist/storage/vercel_kv.js +142 -0
- package/dist/stores/doc/in_memory.cjs +13 -0
- package/dist/stores/doc/in_memory.d.ts +6 -1
- package/dist/stores/doc/in_memory.js +13 -0
- package/dist/stores/message/momento.cjs +11 -11
- package/dist/stores/message/momento.d.ts +1 -1
- package/dist/stores/message/momento.js +1 -1
- package/dist/util/time.cjs +14 -0
- package/dist/util/time.d.ts +6 -0
- package/dist/util/time.js +10 -0
- package/dist/vectorstores/cassandra.cjs +4 -2
- package/dist/vectorstores/cassandra.js +4 -2
- package/dist/vectorstores/elasticsearch.cjs +3 -1
- package/dist/vectorstores/elasticsearch.js +3 -1
- package/dist/vectorstores/momento_vector_index.cjs +292 -0
- package/dist/vectorstores/momento_vector_index.d.ts +135 -0
- package/dist/vectorstores/momento_vector_index.js +265 -0
- package/dist/vectorstores/neo4j_vector.cjs +578 -0
- package/dist/vectorstores/neo4j_vector.d.ts +61 -0
- package/dist/vectorstores/neo4j_vector.js +548 -0
- package/dist/vectorstores/supabase.cjs +37 -8
- package/dist/vectorstores/supabase.d.ts +28 -1
- package/dist/vectorstores/supabase.js +37 -8
- package/llms/yandex.cjs +1 -0
- package/llms/yandex.d.ts +1 -0
- package/llms/yandex.js +1 -0
- package/package.json +73 -3
- package/runnables/remote.cjs +1 -0
- package/runnables/remote.d.ts +1 -0
- package/runnables/remote.js +1 -0
- package/storage/encoder_backed.cjs +1 -0
- package/storage/encoder_backed.d.ts +1 -0
- package/storage/encoder_backed.js +1 -0
- package/storage/vercel_kv.cjs +1 -0
- package/storage/vercel_kv.d.ts +1 -0
- package/storage/vercel_kv.js +1 -0
- package/util/time.cjs +1 -0
- package/util/time.d.ts +1 -0
- package/util/time.js +1 -0
- package/vectorstores/momento_vector_index.cjs +1 -0
- package/vectorstores/momento_vector_index.d.ts +1 -0
- package/vectorstores/momento_vector_index.js +1 -0
- package/vectorstores/neo4j_vector.cjs +1 -0
- package/vectorstores/neo4j_vector.d.ts +1 -0
- package/vectorstores/neo4j_vector.js +1 -0
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
import neo4j from "neo4j-driver";
|
|
2
|
+
import * as uuid from "uuid";
|
|
3
|
+
import { Document } from "../document.js";
|
|
4
|
+
import { VectorStore } from "./base.js";
|
|
5
|
+
const DEFAULT_SEARCH_TYPE = "vector";
|
|
6
|
+
const DEFAULT_DISTANCE_STRATEGY = "cosine";
|
|
7
|
+
export class Neo4jVectorStore extends VectorStore {
|
|
8
|
+
_vectorstoreType() {
|
|
9
|
+
return "neo4jvector";
|
|
10
|
+
}
|
|
11
|
+
constructor(embeddings, config) {
|
|
12
|
+
super(embeddings, config);
|
|
13
|
+
Object.defineProperty(this, "driver", {
|
|
14
|
+
enumerable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
writable: true,
|
|
17
|
+
value: void 0
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(this, "database", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
configurable: true,
|
|
22
|
+
writable: true,
|
|
23
|
+
value: void 0
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(this, "preDeleteCollection", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "nodeLabel", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: void 0
|
|
36
|
+
});
|
|
37
|
+
Object.defineProperty(this, "embeddingNodeProperty", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: void 0
|
|
42
|
+
});
|
|
43
|
+
Object.defineProperty(this, "embeddingDimension", {
|
|
44
|
+
enumerable: true,
|
|
45
|
+
configurable: true,
|
|
46
|
+
writable: true,
|
|
47
|
+
value: void 0
|
|
48
|
+
});
|
|
49
|
+
Object.defineProperty(this, "textNodeProperty", {
|
|
50
|
+
enumerable: true,
|
|
51
|
+
configurable: true,
|
|
52
|
+
writable: true,
|
|
53
|
+
value: void 0
|
|
54
|
+
});
|
|
55
|
+
Object.defineProperty(this, "keywordIndexName", {
|
|
56
|
+
enumerable: true,
|
|
57
|
+
configurable: true,
|
|
58
|
+
writable: true,
|
|
59
|
+
value: void 0
|
|
60
|
+
});
|
|
61
|
+
Object.defineProperty(this, "indexName", {
|
|
62
|
+
enumerable: true,
|
|
63
|
+
configurable: true,
|
|
64
|
+
writable: true,
|
|
65
|
+
value: void 0
|
|
66
|
+
});
|
|
67
|
+
Object.defineProperty(this, "retrievalQuery", {
|
|
68
|
+
enumerable: true,
|
|
69
|
+
configurable: true,
|
|
70
|
+
writable: true,
|
|
71
|
+
value: void 0
|
|
72
|
+
});
|
|
73
|
+
Object.defineProperty(this, "searchType", {
|
|
74
|
+
enumerable: true,
|
|
75
|
+
configurable: true,
|
|
76
|
+
writable: true,
|
|
77
|
+
value: void 0
|
|
78
|
+
});
|
|
79
|
+
Object.defineProperty(this, "distanceStrategy", {
|
|
80
|
+
enumerable: true,
|
|
81
|
+
configurable: true,
|
|
82
|
+
writable: true,
|
|
83
|
+
value: DEFAULT_DISTANCE_STRATEGY
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
static async initialize(embeddings, config) {
|
|
87
|
+
const store = new Neo4jVectorStore(embeddings, config);
|
|
88
|
+
await store._initializeDriver(config);
|
|
89
|
+
await store._verifyConnectivity();
|
|
90
|
+
const { preDeleteCollection = false, nodeLabel = "Chunk", textNodeProperty = "text", embeddingNodeProperty = "embedding", keywordIndexName = "keyword", indexName = "vector", retrievalQuery = "", searchType = DEFAULT_SEARCH_TYPE, } = config;
|
|
91
|
+
store.embeddingDimension = (await embeddings.embedQuery("foo")).length;
|
|
92
|
+
store.preDeleteCollection = preDeleteCollection;
|
|
93
|
+
store.nodeLabel = nodeLabel;
|
|
94
|
+
store.textNodeProperty = textNodeProperty;
|
|
95
|
+
store.embeddingNodeProperty = embeddingNodeProperty;
|
|
96
|
+
store.keywordIndexName = keywordIndexName;
|
|
97
|
+
store.indexName = indexName;
|
|
98
|
+
store.retrievalQuery = retrievalQuery;
|
|
99
|
+
store.searchType = searchType;
|
|
100
|
+
if (store.preDeleteCollection) {
|
|
101
|
+
await store._dropIndex();
|
|
102
|
+
}
|
|
103
|
+
return store;
|
|
104
|
+
}
|
|
105
|
+
async _initializeDriver({ url, username, password, database = "neo4j", }) {
|
|
106
|
+
try {
|
|
107
|
+
this.driver = neo4j.driver(url, neo4j.auth.basic(username, password));
|
|
108
|
+
this.database = database;
|
|
109
|
+
}
|
|
110
|
+
catch (error) {
|
|
111
|
+
throw new Error("Could not create a Neo4j driver instance. Please check the connection details.");
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
async _verifyConnectivity() {
|
|
115
|
+
await this.driver.verifyAuthentication();
|
|
116
|
+
}
|
|
117
|
+
async close() {
|
|
118
|
+
await this.driver.close();
|
|
119
|
+
}
|
|
120
|
+
async _dropIndex() {
|
|
121
|
+
try {
|
|
122
|
+
await this.query(`
|
|
123
|
+
MATCH (n:\`${this.nodeLabel}\`)
|
|
124
|
+
CALL {
|
|
125
|
+
WITH n
|
|
126
|
+
DETACH DELETE n
|
|
127
|
+
}
|
|
128
|
+
IN TRANSACTIONS OF 10000 ROWS;
|
|
129
|
+
`);
|
|
130
|
+
await this.query(`DROP INDEX ${this.indexName}`);
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
console.error("An error occurred while dropping the index:", error);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
137
|
+
async query(query, params = {}) {
|
|
138
|
+
const session = this.driver.session({ database: this.database });
|
|
139
|
+
const result = await session.run(query, params);
|
|
140
|
+
return toObjects(result.records);
|
|
141
|
+
}
|
|
142
|
+
static async fromTexts(texts,
|
|
143
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
144
|
+
metadatas, embeddings, config) {
|
|
145
|
+
const docs = [];
|
|
146
|
+
for (let i = 0; i < texts.length; i += 1) {
|
|
147
|
+
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
|
148
|
+
const newDoc = new Document({
|
|
149
|
+
pageContent: texts[i],
|
|
150
|
+
metadata,
|
|
151
|
+
});
|
|
152
|
+
docs.push(newDoc);
|
|
153
|
+
}
|
|
154
|
+
return Neo4jVectorStore.fromDocuments(docs, embeddings, config);
|
|
155
|
+
}
|
|
156
|
+
static async fromDocuments(docs, embeddings, config) {
|
|
157
|
+
const { searchType = DEFAULT_SEARCH_TYPE, createIdIndex = true, textNodeProperties = [], } = config;
|
|
158
|
+
const store = await this.initialize(embeddings, config);
|
|
159
|
+
const embeddingDimension = await store.retrieveExistingIndex();
|
|
160
|
+
if (!embeddingDimension) {
|
|
161
|
+
await store.createNewIndex();
|
|
162
|
+
}
|
|
163
|
+
else if (store.embeddingDimension !== embeddingDimension) {
|
|
164
|
+
throw new Error(`Index with name "${store.indexName}" already exists. The provided embedding function and vector index dimensions do not match.
|
|
165
|
+
Embedding function dimension: ${store.embeddingDimension}
|
|
166
|
+
Vector index dimension: ${embeddingDimension}`);
|
|
167
|
+
}
|
|
168
|
+
if (searchType === "hybrid") {
|
|
169
|
+
const ftsNodeLabel = await store.retrieveExistingFtsIndex();
|
|
170
|
+
if (!ftsNodeLabel) {
|
|
171
|
+
await store.createNewKeywordIndex(textNodeProperties);
|
|
172
|
+
}
|
|
173
|
+
else {
|
|
174
|
+
if (ftsNodeLabel !== store.nodeLabel) {
|
|
175
|
+
throw Error("Vector and keyword index don't index the same node label");
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
if (createIdIndex) {
|
|
180
|
+
await store.query(`CREATE CONSTRAINT IF NOT EXISTS FOR (n:${store.nodeLabel}) REQUIRE n.id IS UNIQUE;`);
|
|
181
|
+
}
|
|
182
|
+
await store.addDocuments(docs);
|
|
183
|
+
return store;
|
|
184
|
+
}
|
|
185
|
+
static async fromExistingIndex(embeddings, config) {
|
|
186
|
+
const { searchType = DEFAULT_SEARCH_TYPE, keywordIndexName = "keyword" } = config;
|
|
187
|
+
if (searchType === "hybrid" && !keywordIndexName) {
|
|
188
|
+
throw Error("keyword_index name has to be specified when using hybrid search option");
|
|
189
|
+
}
|
|
190
|
+
const store = await this.initialize(embeddings, config);
|
|
191
|
+
const embeddingDimension = await store.retrieveExistingIndex();
|
|
192
|
+
if (!embeddingDimension) {
|
|
193
|
+
throw Error("The specified vector index name does not exist. Make sure to check if you spelled it correctly");
|
|
194
|
+
}
|
|
195
|
+
if (store.embeddingDimension !== embeddingDimension) {
|
|
196
|
+
throw new Error(`The provided embedding function and vector index dimensions do not match.
|
|
197
|
+
Embedding function dimension: ${store.embeddingDimension}
|
|
198
|
+
Vector index dimension: ${embeddingDimension}`);
|
|
199
|
+
}
|
|
200
|
+
if (searchType === "hybrid") {
|
|
201
|
+
const ftsNodeLabel = await store.retrieveExistingFtsIndex();
|
|
202
|
+
if (!ftsNodeLabel) {
|
|
203
|
+
throw Error("The specified keyword index name does not exist. Make sure to check if you spelled it correctly");
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
if (ftsNodeLabel !== store.nodeLabel) {
|
|
207
|
+
throw Error("Vector and keyword index don't index the same node label");
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return store;
|
|
212
|
+
}
|
|
213
|
+
static async fromExistingGraph(embeddings, config) {
|
|
214
|
+
const { textNodeProperties = [], embeddingNodeProperty, searchType = DEFAULT_SEARCH_TYPE, retrievalQuery = "", nodeLabel, } = config;
|
|
215
|
+
let _retrievalQuery = retrievalQuery;
|
|
216
|
+
if (textNodeProperties.length === 0) {
|
|
217
|
+
throw Error("Parameter `text_node_properties` must not be an empty array");
|
|
218
|
+
}
|
|
219
|
+
if (!retrievalQuery) {
|
|
220
|
+
_retrievalQuery = `
|
|
221
|
+
RETURN reduce(str='', k IN ${JSON.stringify(textNodeProperties)} |
|
|
222
|
+
str + '\\n' + k + ': ' + coalesce(node[k], '')) AS text,
|
|
223
|
+
node {.*, \`${embeddingNodeProperty}\`: Null, id: Null, ${textNodeProperties
|
|
224
|
+
.map((prop) => `\`${prop}\`: Null`)
|
|
225
|
+
.join(", ")} } AS metadata, score
|
|
226
|
+
`;
|
|
227
|
+
}
|
|
228
|
+
const store = await this.initialize(embeddings, {
|
|
229
|
+
...config,
|
|
230
|
+
retrievalQuery: _retrievalQuery,
|
|
231
|
+
});
|
|
232
|
+
const embeddingDimension = await store.retrieveExistingIndex();
|
|
233
|
+
if (!embeddingDimension) {
|
|
234
|
+
await store.createNewIndex();
|
|
235
|
+
}
|
|
236
|
+
else if (store.embeddingDimension !== embeddingDimension) {
|
|
237
|
+
throw new Error(`Index with name ${store.indexName} already exists. The provided embedding function and vector index dimensions do not match.\nEmbedding function dimension: ${store.embeddingDimension}\nVector index dimension: ${embeddingDimension}`);
|
|
238
|
+
}
|
|
239
|
+
if (searchType === "hybrid") {
|
|
240
|
+
const ftsNodeLabel = await store.retrieveExistingFtsIndex(textNodeProperties);
|
|
241
|
+
if (!ftsNodeLabel) {
|
|
242
|
+
await store.createNewKeywordIndex(textNodeProperties);
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
if (ftsNodeLabel !== store.nodeLabel) {
|
|
246
|
+
throw Error("Vector and keyword index don't index the same node label");
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// eslint-disable-next-line no-constant-condition
|
|
251
|
+
while (true) {
|
|
252
|
+
const fetchQuery = `
|
|
253
|
+
MATCH (n:\`${nodeLabel}\`)
|
|
254
|
+
WHERE n.${embeddingNodeProperty} IS null
|
|
255
|
+
AND any(k in $props WHERE n[k] IS NOT null)
|
|
256
|
+
RETURN elementId(n) AS id, reduce(str='', k IN $props |
|
|
257
|
+
str + '\\n' + k + ':' + coalesce(n[k], '')) AS text
|
|
258
|
+
LIMIT 1000
|
|
259
|
+
`;
|
|
260
|
+
const data = await store.query(fetchQuery, { props: textNodeProperties });
|
|
261
|
+
if (!data) {
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
const textEmbeddings = await embeddings.embedDocuments(data.map((el) => el.text));
|
|
265
|
+
const params = {
|
|
266
|
+
data: data.map((el, index) => ({
|
|
267
|
+
id: el.id,
|
|
268
|
+
embedding: textEmbeddings[index],
|
|
269
|
+
})),
|
|
270
|
+
};
|
|
271
|
+
await store.query(`
|
|
272
|
+
UNWIND $data AS row
|
|
273
|
+
MATCH (n:\`${nodeLabel}\`)
|
|
274
|
+
WHERE elementId(n) = row.id
|
|
275
|
+
CALL db.create.setVectorProperty(n, '${embeddingNodeProperty}', row.embedding)
|
|
276
|
+
YIELD node RETURN count(*)
|
|
277
|
+
`, params);
|
|
278
|
+
if (data.length < 1000) {
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
return store;
|
|
283
|
+
}
|
|
284
|
+
async createNewIndex() {
|
|
285
|
+
const indexQuery = `
|
|
286
|
+
CALL db.index.vector.createNodeIndex(
|
|
287
|
+
$index_name,
|
|
288
|
+
$node_label,
|
|
289
|
+
$embedding_node_property,
|
|
290
|
+
toInteger($embedding_dimension),
|
|
291
|
+
$similarity_metric
|
|
292
|
+
)
|
|
293
|
+
`;
|
|
294
|
+
const parameters = {
|
|
295
|
+
index_name: this.indexName,
|
|
296
|
+
node_label: this.nodeLabel,
|
|
297
|
+
embedding_node_property: this.embeddingNodeProperty,
|
|
298
|
+
embedding_dimension: this.embeddingDimension,
|
|
299
|
+
similarity_metric: this.distanceStrategy,
|
|
300
|
+
};
|
|
301
|
+
await this.query(indexQuery, parameters);
|
|
302
|
+
}
|
|
303
|
+
async retrieveExistingIndex() {
|
|
304
|
+
let indexInformation = await this.query(`
|
|
305
|
+
SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options
|
|
306
|
+
WHERE type = 'VECTOR' AND (name = $index_name
|
|
307
|
+
OR (labelsOrTypes[0] = $node_label AND
|
|
308
|
+
properties[0] = $embedding_node_property))
|
|
309
|
+
RETURN name, labelsOrTypes, properties, options
|
|
310
|
+
`, {
|
|
311
|
+
index_name: this.indexName,
|
|
312
|
+
node_label: this.nodeLabel,
|
|
313
|
+
embedding_node_property: this.embeddingNodeProperty,
|
|
314
|
+
});
|
|
315
|
+
if (indexInformation) {
|
|
316
|
+
indexInformation = this.sortByIndexName(indexInformation, this.indexName);
|
|
317
|
+
try {
|
|
318
|
+
const [index] = indexInformation;
|
|
319
|
+
const [labelOrType] = index.labelsOrTypes;
|
|
320
|
+
const [property] = index.properties;
|
|
321
|
+
this.indexName = index.name;
|
|
322
|
+
this.nodeLabel = labelOrType;
|
|
323
|
+
this.embeddingNodeProperty = property;
|
|
324
|
+
const embeddingDimension = index.options.indexConfig["vector.dimensions"];
|
|
325
|
+
return Number(embeddingDimension);
|
|
326
|
+
}
|
|
327
|
+
catch (error) {
|
|
328
|
+
return null;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
return null;
|
|
332
|
+
}
|
|
333
|
+
async retrieveExistingFtsIndex(textNodeProperties = []) {
|
|
334
|
+
const indexInformation = await this.query(`
|
|
335
|
+
SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options
|
|
336
|
+
WHERE type = 'FULLTEXT' AND (name = $keyword_index_name
|
|
337
|
+
OR (labelsOrTypes = [$node_label] AND
|
|
338
|
+
properties = $text_node_property))
|
|
339
|
+
RETURN name, labelsOrTypes, properties, options
|
|
340
|
+
`, {
|
|
341
|
+
keyword_index_name: this.keywordIndexName,
|
|
342
|
+
node_label: this.nodeLabel,
|
|
343
|
+
text_node_property: textNodeProperties.length > 0
|
|
344
|
+
? textNodeProperties
|
|
345
|
+
: [this.textNodeProperty],
|
|
346
|
+
});
|
|
347
|
+
if (indexInformation) {
|
|
348
|
+
// Sort the index information by index name
|
|
349
|
+
const sortedIndexInformation = this.sortByIndexName(indexInformation, this.indexName);
|
|
350
|
+
try {
|
|
351
|
+
const [index] = sortedIndexInformation;
|
|
352
|
+
const [labelOrType] = index.labelsOrTypes;
|
|
353
|
+
const [property] = index.properties;
|
|
354
|
+
this.keywordIndexName = index.name;
|
|
355
|
+
this.textNodeProperty = property;
|
|
356
|
+
this.nodeLabel = labelOrType;
|
|
357
|
+
return labelOrType;
|
|
358
|
+
}
|
|
359
|
+
catch (error) {
|
|
360
|
+
return null;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
return null;
|
|
364
|
+
}
|
|
365
|
+
async createNewKeywordIndex(textNodeProperties = []) {
|
|
366
|
+
const nodeProps = textNodeProperties.length > 0
|
|
367
|
+
? textNodeProperties
|
|
368
|
+
: [this.textNodeProperty];
|
|
369
|
+
// Construct the Cypher query to create a new full text index
|
|
370
|
+
const ftsIndexQuery = `
|
|
371
|
+
CREATE FULLTEXT INDEX ${this.keywordIndexName}
|
|
372
|
+
FOR (n:\`${this.nodeLabel}\`) ON EACH
|
|
373
|
+
[${nodeProps.map((prop) => `n.\`${prop}\``).join(", ")}]
|
|
374
|
+
`;
|
|
375
|
+
await this.query(ftsIndexQuery);
|
|
376
|
+
}
|
|
377
|
+
sortByIndexName(
|
|
378
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
379
|
+
values, indexName
|
|
380
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
381
|
+
) {
|
|
382
|
+
return values.sort((a, b) => (a.index_name === indexName ? -1 : 0) -
|
|
383
|
+
(b.index_name === indexName ? -1 : 0));
|
|
384
|
+
}
|
|
385
|
+
async addVectors(vectors, documents,
|
|
386
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
387
|
+
metadatas, ids) {
|
|
388
|
+
let _ids = ids;
|
|
389
|
+
let _metadatas = metadatas;
|
|
390
|
+
if (!_ids) {
|
|
391
|
+
_ids = documents.map(() => uuid.v1());
|
|
392
|
+
}
|
|
393
|
+
if (!metadatas) {
|
|
394
|
+
_metadatas = documents.map(() => ({}));
|
|
395
|
+
}
|
|
396
|
+
const importQuery = `
|
|
397
|
+
UNWIND $data AS row
|
|
398
|
+
CALL {
|
|
399
|
+
WITH row
|
|
400
|
+
MERGE (c:\`${this.nodeLabel}\` {id: row.id})
|
|
401
|
+
WITH c, row
|
|
402
|
+
CALL db.create.setVectorProperty(c, '${this.embeddingNodeProperty}', row.embedding)
|
|
403
|
+
YIELD node
|
|
404
|
+
SET c.\`${this.textNodeProperty}\` = row.text
|
|
405
|
+
SET c += row.metadata
|
|
406
|
+
} IN TRANSACTIONS OF 1000 ROWS
|
|
407
|
+
`;
|
|
408
|
+
const parameters = {
|
|
409
|
+
data: documents.map(({ pageContent, metadata }, index) => ({
|
|
410
|
+
text: pageContent,
|
|
411
|
+
metadata: _metadatas ? _metadatas[index] : metadata,
|
|
412
|
+
embedding: vectors[index],
|
|
413
|
+
id: _ids ? _ids[index] : null,
|
|
414
|
+
})),
|
|
415
|
+
};
|
|
416
|
+
await this.query(importQuery, parameters);
|
|
417
|
+
return _ids;
|
|
418
|
+
}
|
|
419
|
+
async addDocuments(documents) {
|
|
420
|
+
const texts = documents.map(({ pageContent }) => pageContent);
|
|
421
|
+
return this.addVectors(await this.embeddings.embedDocuments(texts), documents);
|
|
422
|
+
}
|
|
423
|
+
async similaritySearch(query, k = 4) {
|
|
424
|
+
const embedding = await this.embeddings.embedQuery(query);
|
|
425
|
+
const results = await this.similaritySearchVectorWithScore(embedding, k, query);
|
|
426
|
+
return results.map((result) => result[0]);
|
|
427
|
+
}
|
|
428
|
+
async similaritySearchVectorWithScore(vector, k, query) {
|
|
429
|
+
const defaultRetrieval = `
|
|
430
|
+
RETURN node.${this.textNodeProperty} AS text, score,
|
|
431
|
+
node {.*, ${this.textNodeProperty}: Null,
|
|
432
|
+
${this.embeddingNodeProperty}: Null, id: Null } AS metadata
|
|
433
|
+
`;
|
|
434
|
+
const retrievalQuery = this.retrievalQuery
|
|
435
|
+
? this.retrievalQuery
|
|
436
|
+
: defaultRetrieval;
|
|
437
|
+
const readQuery = `${getSearchIndexQuery(this.searchType)} ${retrievalQuery}`;
|
|
438
|
+
const parameters = {
|
|
439
|
+
index: this.indexName,
|
|
440
|
+
k: Number(k),
|
|
441
|
+
embedding: vector,
|
|
442
|
+
keyword_index: this.keywordIndexName,
|
|
443
|
+
query,
|
|
444
|
+
};
|
|
445
|
+
const results = await this.query(readQuery, parameters);
|
|
446
|
+
if (results) {
|
|
447
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
448
|
+
const docs = results.map((result) => [
|
|
449
|
+
new Document({
|
|
450
|
+
pageContent: result.text,
|
|
451
|
+
metadata: Object.fromEntries(Object.entries(result.metadata).filter(([_, v]) => v !== null)),
|
|
452
|
+
}),
|
|
453
|
+
result.score,
|
|
454
|
+
]);
|
|
455
|
+
return docs;
|
|
456
|
+
}
|
|
457
|
+
return [];
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
function toObjects(records) {
|
|
461
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
462
|
+
const recordValues = records.map((record) => {
|
|
463
|
+
const rObj = record.toObject();
|
|
464
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
465
|
+
const out = {};
|
|
466
|
+
Object.keys(rObj).forEach((key) => {
|
|
467
|
+
out[key] = itemIntToString(rObj[key]);
|
|
468
|
+
});
|
|
469
|
+
return out;
|
|
470
|
+
});
|
|
471
|
+
return recordValues;
|
|
472
|
+
}
|
|
473
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
474
|
+
function itemIntToString(item) {
|
|
475
|
+
if (neo4j.isInt(item))
|
|
476
|
+
return item.toString();
|
|
477
|
+
if (Array.isArray(item))
|
|
478
|
+
return item.map((ii) => itemIntToString(ii));
|
|
479
|
+
if (["number", "string", "boolean"].indexOf(typeof item) !== -1)
|
|
480
|
+
return item;
|
|
481
|
+
if (item === null)
|
|
482
|
+
return item;
|
|
483
|
+
if (typeof item === "object")
|
|
484
|
+
return objIntToString(item);
|
|
485
|
+
}
|
|
486
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
487
|
+
function objIntToString(obj) {
|
|
488
|
+
const entry = extractFromNeoObjects(obj);
|
|
489
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
490
|
+
let newObj = null;
|
|
491
|
+
if (Array.isArray(entry)) {
|
|
492
|
+
newObj = entry.map((item) => itemIntToString(item));
|
|
493
|
+
}
|
|
494
|
+
else if (entry !== null && typeof entry === "object") {
|
|
495
|
+
newObj = {};
|
|
496
|
+
Object.keys(entry).forEach((key) => {
|
|
497
|
+
newObj[key] = itemIntToString(entry[key]);
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
return newObj;
|
|
501
|
+
}
|
|
502
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
503
|
+
function extractFromNeoObjects(obj) {
|
|
504
|
+
if (
|
|
505
|
+
// eslint-disable-next-line
|
|
506
|
+
obj instanceof neo4j.types.Node ||
|
|
507
|
+
// eslint-disable-next-line
|
|
508
|
+
obj instanceof neo4j.types.Relationship) {
|
|
509
|
+
return obj.properties;
|
|
510
|
+
// eslint-disable-next-line
|
|
511
|
+
}
|
|
512
|
+
else if (obj instanceof neo4j.types.Path) {
|
|
513
|
+
// eslint-disable-next-line
|
|
514
|
+
return [].concat.apply([], extractPathForRows(obj));
|
|
515
|
+
}
|
|
516
|
+
return obj;
|
|
517
|
+
}
|
|
518
|
+
function extractPathForRows(path) {
|
|
519
|
+
let { segments } = path;
|
|
520
|
+
// Zero length path. No relationship, end === start
|
|
521
|
+
if (!Array.isArray(path.segments) || path.segments.length < 1) {
|
|
522
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
523
|
+
segments = [{ ...path, end: null }];
|
|
524
|
+
}
|
|
525
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
526
|
+
return segments.map((segment) => [
|
|
527
|
+
objIntToString(segment.start),
|
|
528
|
+
objIntToString(segment.relationship),
|
|
529
|
+
objIntToString(segment.end),
|
|
530
|
+
].filter((part) => part !== null));
|
|
531
|
+
}
|
|
532
|
+
function getSearchIndexQuery(searchType) {
|
|
533
|
+
const typeToQueryMap = {
|
|
534
|
+
vector: "CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score",
|
|
535
|
+
hybrid: `
|
|
536
|
+
CALL {
|
|
537
|
+
CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score
|
|
538
|
+
RETURN node, score UNION
|
|
539
|
+
CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) YIELD node, score
|
|
540
|
+
WITH collect({node: node, score: score}) AS nodes, max(score) AS max
|
|
541
|
+
UNWIND nodes AS n
|
|
542
|
+
RETURN n.node AS node, (n.score / max) AS score
|
|
543
|
+
}
|
|
544
|
+
WITH node, max(score) AS score ORDER BY score DESC LIMIT toInteger($k)
|
|
545
|
+
`,
|
|
546
|
+
};
|
|
547
|
+
return typeToQueryMap[searchType];
|
|
548
|
+
}
|
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.SupabaseVectorStore = void 0;
|
|
4
4
|
const base_js_1 = require("./base.cjs");
|
|
5
5
|
const document_js_1 = require("../document.cjs");
|
|
6
|
+
const math_js_1 = require("../util/math.cjs");
|
|
6
7
|
/**
|
|
7
8
|
* Class for interacting with a Supabase database to store and manage
|
|
8
9
|
* vectors.
|
|
@@ -103,14 +104,7 @@ class SupabaseVectorStore extends base_js_1.VectorStore {
|
|
|
103
104
|
await this.client.from(this.tableName).delete().eq("id", id);
|
|
104
105
|
}
|
|
105
106
|
}
|
|
106
|
-
|
|
107
|
-
* Performs a similarity search on the vector store.
|
|
108
|
-
* @param query The query vector.
|
|
109
|
-
* @param k The number of results to return.
|
|
110
|
-
* @param filter Optional filter to apply to the search.
|
|
111
|
-
* @returns A promise that resolves with the search results when the search is complete.
|
|
112
|
-
*/
|
|
113
|
-
async similaritySearchVectorWithScore(query, k, filter) {
|
|
107
|
+
async _searchSupabase(query, k, filter) {
|
|
114
108
|
if (filter && this.filter) {
|
|
115
109
|
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
116
110
|
}
|
|
@@ -135,6 +129,17 @@ class SupabaseVectorStore extends base_js_1.VectorStore {
|
|
|
135
129
|
if (error) {
|
|
136
130
|
throw new Error(`Error searching for documents: ${error.code} ${error.message} ${error.details}`);
|
|
137
131
|
}
|
|
132
|
+
return searches;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Performs a similarity search on the vector store.
|
|
136
|
+
* @param query The query vector.
|
|
137
|
+
* @param k The number of results to return.
|
|
138
|
+
* @param filter Optional filter to apply to the search.
|
|
139
|
+
* @returns A promise that resolves with the search results when the search is complete.
|
|
140
|
+
*/
|
|
141
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
142
|
+
const searches = await this._searchSupabase(query, k, filter);
|
|
138
143
|
const result = searches.map((resp) => [
|
|
139
144
|
new document_js_1.Document({
|
|
140
145
|
metadata: resp.metadata,
|
|
@@ -144,6 +149,30 @@ class SupabaseVectorStore extends base_js_1.VectorStore {
|
|
|
144
149
|
]);
|
|
145
150
|
return result;
|
|
146
151
|
}
|
|
152
|
+
/**
|
|
153
|
+
* Return documents selected using the maximal marginal relevance.
|
|
154
|
+
* Maximal marginal relevance optimizes for similarity to the query AND diversity
|
|
155
|
+
* among selected documents.
|
|
156
|
+
*
|
|
157
|
+
* @param {string} query - Text to look up documents similar to.
|
|
158
|
+
* @param {number} options.k - Number of documents to return.
|
|
159
|
+
* @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm.
|
|
160
|
+
* @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results,
|
|
161
|
+
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
|
|
162
|
+
* @param {SupabaseLibArgs} options.filter - Optional filter to apply to the search.
|
|
163
|
+
*
|
|
164
|
+
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
|
|
165
|
+
*/
|
|
166
|
+
async maxMarginalRelevanceSearch(query, options) {
|
|
167
|
+
const queryEmbedding = await this.embeddings.embedQuery(query);
|
|
168
|
+
const searches = await this._searchSupabase(queryEmbedding, options.fetchK ?? 20, options.filter);
|
|
169
|
+
const embeddingList = searches.map((searchResp) => searchResp.embedding);
|
|
170
|
+
const mmrIndexes = (0, math_js_1.maximalMarginalRelevance)(queryEmbedding, embeddingList, options.lambda, options.k);
|
|
171
|
+
return mmrIndexes.map((idx) => new document_js_1.Document({
|
|
172
|
+
metadata: searches[idx].metadata,
|
|
173
|
+
pageContent: searches[idx].content,
|
|
174
|
+
}));
|
|
175
|
+
}
|
|
147
176
|
/**
|
|
148
177
|
* Creates a new SupabaseVectorStore instance from an array of texts.
|
|
149
178
|
* @param texts The texts to create documents from.
|
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
import type { SupabaseClient } from "@supabase/supabase-js";
|
|
2
2
|
import type { PostgrestFilterBuilder } from "@supabase/postgrest-js";
|
|
3
|
-
import { VectorStore } from "./base.js";
|
|
3
|
+
import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js";
|
|
4
4
|
import { Embeddings } from "../embeddings/base.js";
|
|
5
5
|
import { Document } from "../document.js";
|
|
6
6
|
export type SupabaseMetadata = Record<string, any>;
|
|
7
7
|
export type SupabaseFilter = PostgrestFilterBuilder<any, any, any>;
|
|
8
8
|
export type SupabaseFilterRPCCall = (rpcCall: SupabaseFilter) => SupabaseFilter;
|
|
9
|
+
/**
|
|
10
|
+
* Interface for the response returned when searching embeddings.
|
|
11
|
+
*/
|
|
12
|
+
interface SearchEmbeddingsResponse {
|
|
13
|
+
id: number;
|
|
14
|
+
content: string;
|
|
15
|
+
metadata: object;
|
|
16
|
+
embedding: number[];
|
|
17
|
+
similarity: number;
|
|
18
|
+
}
|
|
9
19
|
/**
|
|
10
20
|
* Interface for the arguments required to initialize a Supabase library.
|
|
11
21
|
*/
|
|
@@ -56,6 +66,7 @@ export declare class SupabaseVectorStore extends VectorStore {
|
|
|
56
66
|
delete(params: {
|
|
57
67
|
ids: string[] | number[];
|
|
58
68
|
}): Promise<void>;
|
|
69
|
+
protected _searchSupabase(query: number[], k: number, filter?: this["FilterType"]): Promise<SearchEmbeddingsResponse[]>;
|
|
59
70
|
/**
|
|
60
71
|
* Performs a similarity search on the vector store.
|
|
61
72
|
* @param query The query vector.
|
|
@@ -64,6 +75,21 @@ export declare class SupabaseVectorStore extends VectorStore {
|
|
|
64
75
|
* @returns A promise that resolves with the search results when the search is complete.
|
|
65
76
|
*/
|
|
66
77
|
similaritySearchVectorWithScore(query: number[], k: number, filter?: this["FilterType"]): Promise<[Document, number][]>;
|
|
78
|
+
/**
|
|
79
|
+
* Return documents selected using the maximal marginal relevance.
|
|
80
|
+
* Maximal marginal relevance optimizes for similarity to the query AND diversity
|
|
81
|
+
* among selected documents.
|
|
82
|
+
*
|
|
83
|
+
* @param {string} query - Text to look up documents similar to.
|
|
84
|
+
* @param {number} options.k - Number of documents to return.
|
|
85
|
+
* @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm.
|
|
86
|
+
* @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results,
|
|
87
|
+
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
|
|
88
|
+
* @param {SupabaseLibArgs} options.filter - Optional filter to apply to the search.
|
|
89
|
+
*
|
|
90
|
+
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
|
|
91
|
+
*/
|
|
92
|
+
maxMarginalRelevanceSearch(query: string, options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>): Promise<Document[]>;
|
|
67
93
|
/**
|
|
68
94
|
* Creates a new SupabaseVectorStore instance from an array of texts.
|
|
69
95
|
* @param texts The texts to create documents from.
|
|
@@ -89,3 +115,4 @@ export declare class SupabaseVectorStore extends VectorStore {
|
|
|
89
115
|
*/
|
|
90
116
|
static fromExistingIndex(embeddings: Embeddings, dbConfig: SupabaseLibArgs): Promise<SupabaseVectorStore>;
|
|
91
117
|
}
|
|
118
|
+
export {};
|