hana-kgvector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +449 -0
- package/dist/Stream-JW2S2DUH.js +6 -0
- package/dist/Stream-JW2S2DUH.js.map +1 -0
- package/dist/chunk-I3F3SOHM.js +726 -0
- package/dist/chunk-I3F3SOHM.js.map +1 -0
- package/dist/chunk-VUNV25KB.js +16 -0
- package/dist/chunk-VUNV25KB.js.map +1 -0
- package/dist/index.d.ts +382 -0
- package/dist/index.js +1036 -0
- package/dist/index.js.map +1 -0
- package/dist/lib-J24D2XQX.js +1256 -0
- package/dist/lib-J24D2XQX.js.map +1 -0
- package/package.json +59 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1036 @@
|
|
|
1
|
+
import "./chunk-VUNV25KB.js";
|
|
2
|
+
|
|
3
|
+
// src/env.ts
|
|
4
|
+
import dotenv from "dotenv";
|
|
5
|
+
function loadEnv() {
|
|
6
|
+
dotenv.config({ path: ".env.local" });
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
// src/hana/parse-host-port.ts
|
|
10
|
+
function parseHostPort(host, fallbackPort) {
|
|
11
|
+
const idx = host.lastIndexOf(":");
|
|
12
|
+
if (idx > -1 && idx < host.length - 1) {
|
|
13
|
+
const maybePort = Number(host.slice(idx + 1));
|
|
14
|
+
if (!Number.isNaN(maybePort) && maybePort > 0) {
|
|
15
|
+
return { host: host.slice(0, idx), port: maybePort };
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return { host, port: fallbackPort };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// src/hana/connection.ts
|
|
22
|
+
async function createHanaConnection(config) {
|
|
23
|
+
let hana;
|
|
24
|
+
try {
|
|
25
|
+
const hanaModule = await import("./lib-J24D2XQX.js");
|
|
26
|
+
hana = hanaModule.default || hanaModule;
|
|
27
|
+
} catch {
|
|
28
|
+
throw new Error(
|
|
29
|
+
"@sap/hana-client is not available. Ensure it is installed and pnpm build scripts are approved (pnpm approve-builds)."
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
const fallbackPort = config.port ?? 443;
|
|
33
|
+
const { host, port } = parseHostPort(config.host, fallbackPort);
|
|
34
|
+
const conn = hana.createConnection();
|
|
35
|
+
await new Promise((resolve, reject) => {
|
|
36
|
+
conn.connect(
|
|
37
|
+
{
|
|
38
|
+
serverNode: `${host}:${port}`,
|
|
39
|
+
uid: config.user,
|
|
40
|
+
pwd: config.password,
|
|
41
|
+
encrypt: config.encrypt ?? true,
|
|
42
|
+
sslValidateCertificate: config.sslValidateCertificate ?? true
|
|
43
|
+
},
|
|
44
|
+
(err) => err ? reject(err) : resolve()
|
|
45
|
+
);
|
|
46
|
+
});
|
|
47
|
+
return conn;
|
|
48
|
+
}
|
|
49
|
+
async function hanaExec(conn, sql) {
|
|
50
|
+
return await new Promise((resolve, reject) => {
|
|
51
|
+
conn.exec(sql, (err, result) => {
|
|
52
|
+
if (err) return reject(err);
|
|
53
|
+
resolve(result);
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// src/hana/sparql-store.ts
|
|
59
|
+
var HanaSparqlStore = class {
|
|
60
|
+
conn;
|
|
61
|
+
constructor(conn) {
|
|
62
|
+
this.conn = conn;
|
|
63
|
+
}
|
|
64
|
+
async execute(options) {
|
|
65
|
+
const headerLines = [];
|
|
66
|
+
if (options.defaultGraphUri) {
|
|
67
|
+
headerLines.push(`rqx-default-graph-uri: ${options.defaultGraphUri}`);
|
|
68
|
+
}
|
|
69
|
+
if (options.headers) {
|
|
70
|
+
headerLines.push(options.headers.trim());
|
|
71
|
+
}
|
|
72
|
+
const hdrs = headerLines.length ? `${headerLines.join("\r\n")}\r
|
|
73
|
+
` : "";
|
|
74
|
+
const sql = "CALL SPARQL_EXECUTE(?, ?, ?, ?)";
|
|
75
|
+
return await new Promise((resolve, reject) => {
|
|
76
|
+
this.conn.exec(sql, [options.sparql, hdrs, "", null], (err, result) => {
|
|
77
|
+
if (err) return reject(err);
|
|
78
|
+
resolve(result);
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
async loadTurtle(options) {
|
|
83
|
+
const requestHdrs = [
|
|
84
|
+
"rqx-load-protocol: true",
|
|
85
|
+
options.filename ? `rqx-load-filename: ${options.filename}` : void 0,
|
|
86
|
+
`rqx-load-graphname: ${options.graphName}`
|
|
87
|
+
].filter(Boolean).join("\r\n");
|
|
88
|
+
return await this.execute({ sparql: options.turtle, headers: requestHdrs });
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
// src/graph/types.ts
|
|
93
|
+
import { z } from "zod";
|
|
94
|
+
var TRIPLET_SOURCE_KEY = "triplet_source_id";
|
|
95
|
+
var KG_NODES_KEY = "kg_nodes";
|
|
96
|
+
var KG_RELATIONS_KEY = "kg_relations";
|
|
97
|
+
var VECTOR_SOURCE_KEY = "vector_source_id";
|
|
98
|
+
var KG_SOURCE_REL = "HAS_SOURCE";
|
|
99
|
+
var EntityNodeSchema = z.object({
|
|
100
|
+
id: z.string(),
|
|
101
|
+
label: z.string(),
|
|
102
|
+
name: z.string(),
|
|
103
|
+
properties: z.record(z.unknown()).default({}),
|
|
104
|
+
embedding: z.array(z.number()).optional()
|
|
105
|
+
});
|
|
106
|
+
var RelationSchema = z.object({
|
|
107
|
+
id: z.string().optional(),
|
|
108
|
+
label: z.string(),
|
|
109
|
+
sourceId: z.string(),
|
|
110
|
+
targetId: z.string(),
|
|
111
|
+
properties: z.record(z.unknown()).default({})
|
|
112
|
+
});
|
|
113
|
+
function createEntityNode(opts) {
|
|
114
|
+
const id = `${opts.label.toUpperCase()}_${opts.name.replace(/\s+/g, "_").toUpperCase()}`;
|
|
115
|
+
return {
|
|
116
|
+
id,
|
|
117
|
+
label: opts.label,
|
|
118
|
+
name: opts.name,
|
|
119
|
+
properties: opts.properties ?? {},
|
|
120
|
+
embedding: opts.embedding
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
function createRelation(opts) {
|
|
124
|
+
return {
|
|
125
|
+
id: `${opts.sourceId}_${opts.label}_${opts.targetId}`,
|
|
126
|
+
label: opts.label,
|
|
127
|
+
sourceId: opts.sourceId,
|
|
128
|
+
targetId: opts.targetId,
|
|
129
|
+
properties: opts.properties ?? {}
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
function tripletToString(triplet, includeProperties = false) {
|
|
133
|
+
const [subj, rel, obj] = triplet;
|
|
134
|
+
if (includeProperties) {
|
|
135
|
+
return `${subj.name} (${subj.label}) -[${rel.label}]-> ${obj.name} (${obj.label})`;
|
|
136
|
+
}
|
|
137
|
+
return `${subj.id} -> ${rel.label} -> ${obj.id}`;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// src/graph/hana-property-graph-store.ts
|
|
141
|
+
var HanaPropertyGraphStore = class {
|
|
142
|
+
conn;
|
|
143
|
+
graphName;
|
|
144
|
+
vectorTableName;
|
|
145
|
+
llamaNodesTableName;
|
|
146
|
+
resetTables;
|
|
147
|
+
initialized = false;
|
|
148
|
+
supportsVectorQueries = true;
|
|
149
|
+
supportsStructuredQueries = true;
|
|
150
|
+
constructor(conn, options) {
|
|
151
|
+
this.conn = conn;
|
|
152
|
+
this.graphName = options.graphName;
|
|
153
|
+
this.vectorTableName = options.vectorTableName ?? `${options.graphName.replace(/[^a-zA-Z0-9]/g, "_")}_VECTORS`;
|
|
154
|
+
this.llamaNodesTableName = options.llamaNodesTableName ?? `${options.graphName.replace(/[^a-zA-Z0-9]/g, "_")}_NODES`;
|
|
155
|
+
this.resetTables = options.resetTables ?? false;
|
|
156
|
+
}
|
|
157
|
+
async ensureInitialized() {
|
|
158
|
+
if (this.initialized) return;
|
|
159
|
+
if (this.resetTables) {
|
|
160
|
+
await this.exec(`DROP TABLE ${this.vectorTableName}`).catch(() => {
|
|
161
|
+
});
|
|
162
|
+
await this.exec(`DROP TABLE ${this.llamaNodesTableName}`).catch(() => {
|
|
163
|
+
});
|
|
164
|
+
await this.sparqlExecute(`CLEAR GRAPH <${this.graphName}>`).catch(() => {
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
const createVectorTable = `
|
|
168
|
+
CREATE COLUMN TABLE ${this.vectorTableName} (
|
|
169
|
+
id NVARCHAR(512) PRIMARY KEY,
|
|
170
|
+
node_type NVARCHAR(64),
|
|
171
|
+
label NVARCHAR(128),
|
|
172
|
+
name NVARCHAR(512),
|
|
173
|
+
properties NCLOB,
|
|
174
|
+
embedding REAL_VECTOR
|
|
175
|
+
)
|
|
176
|
+
`;
|
|
177
|
+
await this.exec(createVectorTable).catch((err) => {
|
|
178
|
+
const message = String(err?.message ?? "");
|
|
179
|
+
if (!/exists|duplicate table name/i.test(message)) {
|
|
180
|
+
throw err;
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
const createLlamaTable = `
|
|
184
|
+
CREATE COLUMN TABLE ${this.llamaNodesTableName} (
|
|
185
|
+
id NVARCHAR(512) PRIMARY KEY,
|
|
186
|
+
text NCLOB,
|
|
187
|
+
metadata NCLOB,
|
|
188
|
+
hash NVARCHAR(64),
|
|
189
|
+
embedding REAL_VECTOR
|
|
190
|
+
)
|
|
191
|
+
`;
|
|
192
|
+
await this.exec(createLlamaTable).catch((err) => {
|
|
193
|
+
const message = String(err?.message ?? "");
|
|
194
|
+
if (!/exists|duplicate table name/i.test(message)) {
|
|
195
|
+
throw err;
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
this.initialized = true;
|
|
199
|
+
}
|
|
200
|
+
async exec(sql, params) {
|
|
201
|
+
return new Promise((resolve, reject) => {
|
|
202
|
+
if (params && params.length > 0) {
|
|
203
|
+
this.conn.exec(sql, params, (err, result) => {
|
|
204
|
+
if (err) reject(err);
|
|
205
|
+
else resolve(result);
|
|
206
|
+
});
|
|
207
|
+
} else {
|
|
208
|
+
this.conn.exec(sql, (err, result) => {
|
|
209
|
+
if (err) reject(err);
|
|
210
|
+
else resolve(result);
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
async sparqlExecute(sparql, headers = "") {
|
|
216
|
+
try {
|
|
217
|
+
const streamMod = await import("./Stream-JW2S2DUH.js");
|
|
218
|
+
const Stream = streamMod.default ?? streamMod;
|
|
219
|
+
return await new Promise((resolve, reject) => {
|
|
220
|
+
Stream.createProcStatement(this.conn, "CALL SPARQL_EXECUTE(?, ?, ?, ?)", (err, stmt) => {
|
|
221
|
+
if (err) return reject(err);
|
|
222
|
+
stmt.exec([sparql, headers], (err2, scalarParams) => {
|
|
223
|
+
if (err2) return reject(err2);
|
|
224
|
+
resolve(scalarParams);
|
|
225
|
+
});
|
|
226
|
+
});
|
|
227
|
+
});
|
|
228
|
+
} catch {
|
|
229
|
+
return await new Promise((resolve, reject) => {
|
|
230
|
+
this.conn.exec(
|
|
231
|
+
"CALL SPARQL_EXECUTE(?, ?, ?, ?)",
|
|
232
|
+
// Important: only pass IN params; OUT params are placeholders in SQL.
|
|
233
|
+
[sparql, headers],
|
|
234
|
+
(err, result) => {
|
|
235
|
+
if (err) reject(err);
|
|
236
|
+
else resolve(result);
|
|
237
|
+
}
|
|
238
|
+
);
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
entityToUri(node) {
|
|
243
|
+
return `<urn:hkv:${this.graphName}:${node.label}:${encodeURIComponent(node.id)}>`;
|
|
244
|
+
}
|
|
245
|
+
relationToTriples(rel, subj, obj) {
|
|
246
|
+
const subjUri = this.entityToUri(subj);
|
|
247
|
+
const objUri = this.entityToUri(obj);
|
|
248
|
+
const predUri = `<urn:hkv:rel:${rel.label}>`;
|
|
249
|
+
return `${subjUri} ${predUri} ${objUri} .`;
|
|
250
|
+
}
|
|
251
|
+
entityToTriples(node) {
|
|
252
|
+
const uri = this.entityToUri(node);
|
|
253
|
+
const lines = [];
|
|
254
|
+
lines.push(`${uri} a <urn:hkv:type:${node.label}> .`);
|
|
255
|
+
lines.push(`${uri} <urn:hkv:prop:name> "${this.escapeLiteral(node.name)}" .`);
|
|
256
|
+
for (const [key, value] of Object.entries(node.properties)) {
|
|
257
|
+
if (value !== void 0 && value !== null) {
|
|
258
|
+
const escaped = typeof value === "string" ? this.escapeLiteral(value) : String(value);
|
|
259
|
+
lines.push(`${uri} <urn:hkv:prop:${key}> "${escaped}" .`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return lines.join("\n");
|
|
263
|
+
}
|
|
264
|
+
escapeLiteral(s) {
|
|
265
|
+
return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n");
|
|
266
|
+
}
|
|
267
|
+
async upsertNodes(nodes) {
|
|
268
|
+
await this.ensureInitialized();
|
|
269
|
+
if (nodes.length === 0) return;
|
|
270
|
+
const triples = nodes.map((n) => this.entityToTriples(n)).join("\n");
|
|
271
|
+
const sparql = `INSERT DATA { GRAPH <${this.graphName}> { ${triples} } }`;
|
|
272
|
+
await this.sparqlExecute(sparql);
|
|
273
|
+
for (const node of nodes) {
|
|
274
|
+
if (node.embedding) {
|
|
275
|
+
const safeProps = { ...node.properties ?? {} };
|
|
276
|
+
delete safeProps.kg_nodes;
|
|
277
|
+
delete safeProps.kg_relations;
|
|
278
|
+
const sql = `
|
|
279
|
+
UPSERT ${this.vectorTableName} (id, node_type, label, name, properties, embedding)
|
|
280
|
+
VALUES (?, 'entity', ?, ?, ?, TO_REAL_VECTOR(?))
|
|
281
|
+
WITH PRIMARY KEY
|
|
282
|
+
`;
|
|
283
|
+
await this.exec(sql, [
|
|
284
|
+
node.id,
|
|
285
|
+
node.label,
|
|
286
|
+
node.name,
|
|
287
|
+
JSON.stringify(safeProps),
|
|
288
|
+
JSON.stringify(node.embedding)
|
|
289
|
+
]);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
async upsertRelations(relations) {
|
|
294
|
+
await this.ensureInitialized();
|
|
295
|
+
if (relations.length === 0) return;
|
|
296
|
+
const nodeIds = /* @__PURE__ */ new Set();
|
|
297
|
+
for (const rel of relations) {
|
|
298
|
+
nodeIds.add(rel.sourceId);
|
|
299
|
+
nodeIds.add(rel.targetId);
|
|
300
|
+
}
|
|
301
|
+
const existingNodes = await this.get({ ids: Array.from(nodeIds) });
|
|
302
|
+
const nodeMap = new Map(existingNodes.map((n) => [n.id, n]));
|
|
303
|
+
const lines = [];
|
|
304
|
+
for (const rel of relations) {
|
|
305
|
+
const subj = nodeMap.get(rel.sourceId);
|
|
306
|
+
const obj = nodeMap.get(rel.targetId);
|
|
307
|
+
if (subj && obj) {
|
|
308
|
+
lines.push(this.relationToTriples(rel, subj, obj));
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
if (lines.length > 0) {
|
|
312
|
+
const sparql = `INSERT DATA { GRAPH <${this.graphName}> { ${lines.join("\n")} } }`;
|
|
313
|
+
await this.sparqlExecute(sparql);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
async get(opts) {
|
|
317
|
+
await this.ensureInitialized();
|
|
318
|
+
if (opts.ids.length === 0) return [];
|
|
319
|
+
const rows = await this.exec(
|
|
320
|
+
`SELECT id, label, name, properties FROM ${this.vectorTableName} WHERE id IN (${opts.ids.map(() => "?").join(",")})`,
|
|
321
|
+
opts.ids
|
|
322
|
+
);
|
|
323
|
+
return (rows ?? []).map((r) => ({
|
|
324
|
+
id: r.ID ?? r.id,
|
|
325
|
+
label: r.LABEL ?? r.label,
|
|
326
|
+
name: r.NAME ?? r.name,
|
|
327
|
+
properties: JSON.parse(r.PROPERTIES ?? r.properties ?? "{}")
|
|
328
|
+
}));
|
|
329
|
+
}
|
|
330
|
+
async getRelMap(opts) {
|
|
331
|
+
await this.ensureInitialized();
|
|
332
|
+
if (opts.nodes.length === 0) return [];
|
|
333
|
+
const nodeIds = opts.nodes.map((n) => n.id);
|
|
334
|
+
const depth = opts.depth ?? 1;
|
|
335
|
+
const limit = opts.limit ?? 100;
|
|
336
|
+
const ignoreRels = opts.ignoreRels ?? [KG_SOURCE_REL];
|
|
337
|
+
const filterClause = nodeIds.map((id) => `CONTAINS(STR(?s), "${encodeURIComponent(id)}") || CONTAINS(STR(?o), "${encodeURIComponent(id)}")`).join(" || ");
|
|
338
|
+
const ignoreFilter = ignoreRels.length > 0 ? `FILTER(${ignoreRels.map((r) => `!CONTAINS(STR(?p), "${r}")`).join(" && ")})` : "";
|
|
339
|
+
const sparql = `
|
|
340
|
+
SELECT ?s ?p ?o
|
|
341
|
+
FROM <${this.graphName}>
|
|
342
|
+
WHERE {
|
|
343
|
+
?s ?p ?o .
|
|
344
|
+
FILTER(${filterClause})
|
|
345
|
+
${ignoreFilter}
|
|
346
|
+
}
|
|
347
|
+
LIMIT ${limit}
|
|
348
|
+
`;
|
|
349
|
+
const result = await this.exec(
|
|
350
|
+
`SELECT * FROM SPARQL_TABLE('${sparql.replace(/'/g, "''")}')`
|
|
351
|
+
);
|
|
352
|
+
const triplets = [];
|
|
353
|
+
const nodeMap = new Map(opts.nodes.map((n) => [n.id, n]));
|
|
354
|
+
for (const row of result ?? []) {
|
|
355
|
+
const sUri = row.S ?? row.s;
|
|
356
|
+
const pUri = row.P ?? row.p;
|
|
357
|
+
const oUri = row.O ?? row.o;
|
|
358
|
+
const sId = this.extractIdFromUri(sUri);
|
|
359
|
+
const oId = this.extractIdFromUri(oUri);
|
|
360
|
+
const relLabel = this.extractRelLabelFromUri(pUri);
|
|
361
|
+
let subj = nodeMap.get(sId);
|
|
362
|
+
let obj = nodeMap.get(oId);
|
|
363
|
+
if (!subj) {
|
|
364
|
+
subj = { id: sId, label: "UNKNOWN", name: sId, properties: {} };
|
|
365
|
+
}
|
|
366
|
+
if (!obj) {
|
|
367
|
+
obj = { id: oId, label: "UNKNOWN", name: oId, properties: {} };
|
|
368
|
+
}
|
|
369
|
+
const rel = {
|
|
370
|
+
label: relLabel,
|
|
371
|
+
sourceId: sId,
|
|
372
|
+
targetId: oId,
|
|
373
|
+
properties: {}
|
|
374
|
+
};
|
|
375
|
+
triplets.push([subj, rel, obj]);
|
|
376
|
+
}
|
|
377
|
+
return triplets;
|
|
378
|
+
}
|
|
379
|
+
extractIdFromUri(uri) {
|
|
380
|
+
const cleanUri = uri.replace(/^<|>$/g, "");
|
|
381
|
+
const parts = cleanUri.split(":");
|
|
382
|
+
if (parts.length >= 2) {
|
|
383
|
+
return decodeURIComponent(parts[parts.length - 1]);
|
|
384
|
+
}
|
|
385
|
+
return uri;
|
|
386
|
+
}
|
|
387
|
+
extractRelLabelFromUri(uri) {
|
|
388
|
+
const match = uri.match(/urn:hkv:rel:(.+)>?$/);
|
|
389
|
+
return match ? match[1].replace(/>$/, "") : uri;
|
|
390
|
+
}
|
|
391
|
+
async delete(opts) {
|
|
392
|
+
await this.ensureInitialized();
|
|
393
|
+
if (opts.ids.length === 0) return;
|
|
394
|
+
for (const id of opts.ids) {
|
|
395
|
+
await this.exec(`DELETE FROM ${this.vectorTableName} WHERE id = ?`, [id]);
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
async vectorQuery(query) {
|
|
399
|
+
await this.ensureInitialized();
|
|
400
|
+
const sql = `
|
|
401
|
+
SELECT id, label, name, properties,
|
|
402
|
+
COSINE_SIMILARITY(embedding, TO_REAL_VECTOR(?)) AS score
|
|
403
|
+
FROM ${this.vectorTableName}
|
|
404
|
+
WHERE embedding IS NOT NULL
|
|
405
|
+
ORDER BY score DESC
|
|
406
|
+
LIMIT ?
|
|
407
|
+
`;
|
|
408
|
+
const rows = await this.exec(sql, [
|
|
409
|
+
JSON.stringify(query.queryEmbedding),
|
|
410
|
+
query.similarityTopK
|
|
411
|
+
]);
|
|
412
|
+
const nodes = [];
|
|
413
|
+
const scores = [];
|
|
414
|
+
for (const row of rows ?? []) {
|
|
415
|
+
nodes.push({
|
|
416
|
+
id: row.ID ?? row.id,
|
|
417
|
+
label: row.LABEL ?? row.label,
|
|
418
|
+
name: row.NAME ?? row.name,
|
|
419
|
+
properties: JSON.parse(row.PROPERTIES ?? row.properties ?? "{}")
|
|
420
|
+
});
|
|
421
|
+
scores.push(row.SCORE ?? row.score ?? 0);
|
|
422
|
+
}
|
|
423
|
+
return [nodes, scores];
|
|
424
|
+
}
|
|
425
|
+
async upsertLlamaNodes(nodes) {
|
|
426
|
+
await this.ensureInitialized();
|
|
427
|
+
for (const node of nodes) {
|
|
428
|
+
const { metadata, ...rest } = node;
|
|
429
|
+
const safeMetadata = { ...metadata };
|
|
430
|
+
delete safeMetadata.kg_nodes;
|
|
431
|
+
delete safeMetadata.kg_relations;
|
|
432
|
+
const sql = `
|
|
433
|
+
UPSERT ${this.llamaNodesTableName} (id, text, metadata, hash, embedding)
|
|
434
|
+
VALUES (?, ?, ?, ?, ${node.embedding ? "TO_REAL_VECTOR(?)" : "NULL"})
|
|
435
|
+
WITH PRIMARY KEY
|
|
436
|
+
`;
|
|
437
|
+
const params = [
|
|
438
|
+
node.id,
|
|
439
|
+
node.text,
|
|
440
|
+
JSON.stringify(safeMetadata ?? {}),
|
|
441
|
+
node.hash ?? null
|
|
442
|
+
];
|
|
443
|
+
if (node.embedding) {
|
|
444
|
+
params.push(JSON.stringify(node.embedding));
|
|
445
|
+
}
|
|
446
|
+
await this.exec(sql, params);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
async getLlamaNodes(ids) {
|
|
450
|
+
await this.ensureInitialized();
|
|
451
|
+
if (ids.length === 0) return [];
|
|
452
|
+
const rows = await this.exec(
|
|
453
|
+
`SELECT id, text, metadata, hash FROM ${this.llamaNodesTableName} WHERE id IN (${ids.map(() => "?").join(",")})`,
|
|
454
|
+
ids
|
|
455
|
+
);
|
|
456
|
+
return (rows ?? []).map((r) => ({
|
|
457
|
+
id: r.ID ?? r.id,
|
|
458
|
+
text: r.TEXT ?? r.text,
|
|
459
|
+
metadata: JSON.parse(r.METADATA ?? r.metadata ?? "{}"),
|
|
460
|
+
hash: r.HASH ?? r.hash
|
|
461
|
+
}));
|
|
462
|
+
}
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
// src/graph/pg-retriever.ts
|
|
466
|
+
var PGRetriever = class {
|
|
467
|
+
subRetrievers;
|
|
468
|
+
showProgress;
|
|
469
|
+
constructor(options) {
|
|
470
|
+
this.subRetrievers = options.subRetrievers;
|
|
471
|
+
this.showProgress = options.showProgress ?? false;
|
|
472
|
+
}
|
|
473
|
+
deduplicate(nodes) {
|
|
474
|
+
const seen = /* @__PURE__ */ new Set();
|
|
475
|
+
const deduped = [];
|
|
476
|
+
for (const node of nodes) {
|
|
477
|
+
if (!seen.has(node.node.text)) {
|
|
478
|
+
deduped.push(node);
|
|
479
|
+
seen.add(node.node.text);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
return deduped;
|
|
483
|
+
}
|
|
484
|
+
async retrieve(queryBundle) {
|
|
485
|
+
const allResults = [];
|
|
486
|
+
const promises = this.subRetrievers.map((r) => r.retrieve(queryBundle));
|
|
487
|
+
const results = await Promise.all(promises);
|
|
488
|
+
for (const result of results) {
|
|
489
|
+
allResults.push(...result);
|
|
490
|
+
}
|
|
491
|
+
return this.deduplicate(allResults);
|
|
492
|
+
}
|
|
493
|
+
};
|
|
494
|
+
|
|
495
|
+
// src/graph/retrievers/base.ts
|
|
496
|
+
var DEFAULT_PREAMBLE = "Here are some facts extracted from the provided text:\n\n";
|
|
497
|
+
var BasePGRetriever = class {
|
|
498
|
+
graphStore;
|
|
499
|
+
includeText;
|
|
500
|
+
includeTextPreamble;
|
|
501
|
+
includeProperties;
|
|
502
|
+
constructor(options) {
|
|
503
|
+
this.graphStore = options.graphStore;
|
|
504
|
+
this.includeText = options.includeText ?? true;
|
|
505
|
+
this.includeTextPreamble = options.includeTextPreamble ?? DEFAULT_PREAMBLE;
|
|
506
|
+
this.includeProperties = options.includeProperties ?? false;
|
|
507
|
+
}
|
|
508
|
+
getNodesWithScore(triplets, scores) {
|
|
509
|
+
const results = [];
|
|
510
|
+
for (let i = 0; i < triplets.length; i++) {
|
|
511
|
+
const triplet = triplets[i];
|
|
512
|
+
const sourceId = triplet[0].properties[TRIPLET_SOURCE_KEY];
|
|
513
|
+
const text = tripletToString(triplet, this.includeProperties);
|
|
514
|
+
results.push({
|
|
515
|
+
node: {
|
|
516
|
+
id: `triplet_${i}`,
|
|
517
|
+
text,
|
|
518
|
+
metadata: {},
|
|
519
|
+
refDocId: sourceId
|
|
520
|
+
},
|
|
521
|
+
score: scores?.[i] ?? 1
|
|
522
|
+
});
|
|
523
|
+
}
|
|
524
|
+
return results;
|
|
525
|
+
}
|
|
526
|
+
async addSourceText(nodes) {
|
|
527
|
+
if (!this.graphStore.getLlamaNodes) {
|
|
528
|
+
return nodes;
|
|
529
|
+
}
|
|
530
|
+
const refDocIds = nodes.map((n) => n.node.refDocId).filter((id) => id !== void 0);
|
|
531
|
+
if (refDocIds.length === 0) return nodes;
|
|
532
|
+
const ogNodes = await this.graphStore.getLlamaNodes(refDocIds);
|
|
533
|
+
const nodeMap = new Map(ogNodes.map((n) => [n.id, n]));
|
|
534
|
+
const graphNodeMap = /* @__PURE__ */ new Map();
|
|
535
|
+
for (const node of nodes) {
|
|
536
|
+
const refDocId = node.node.refDocId ?? "";
|
|
537
|
+
if (!graphNodeMap.has(refDocId)) {
|
|
538
|
+
graphNodeMap.set(refDocId, []);
|
|
539
|
+
}
|
|
540
|
+
graphNodeMap.get(refDocId).push(node.node.text);
|
|
541
|
+
}
|
|
542
|
+
const resultNodes = [];
|
|
543
|
+
for (const nodeWithScore of nodes) {
|
|
544
|
+
const mappedNode = nodeMap.get(nodeWithScore.node.refDocId ?? "");
|
|
545
|
+
if (mappedNode) {
|
|
546
|
+
const graphContent = graphNodeMap.get(mappedNode.id) ?? [];
|
|
547
|
+
if (graphContent.length > 0) {
|
|
548
|
+
const graphContentStr = graphContent.join("\n");
|
|
549
|
+
const newContent = this.includeTextPreamble + graphContentStr + "\n\n" + mappedNode.text;
|
|
550
|
+
resultNodes.push({
|
|
551
|
+
node: {
|
|
552
|
+
id: mappedNode.id,
|
|
553
|
+
text: newContent,
|
|
554
|
+
metadata: mappedNode.metadata,
|
|
555
|
+
refDocId: nodeWithScore.node.refDocId
|
|
556
|
+
},
|
|
557
|
+
score: nodeWithScore.score
|
|
558
|
+
});
|
|
559
|
+
} else {
|
|
560
|
+
resultNodes.push({
|
|
561
|
+
node: {
|
|
562
|
+
id: mappedNode.id,
|
|
563
|
+
text: mappedNode.text,
|
|
564
|
+
metadata: mappedNode.metadata,
|
|
565
|
+
refDocId: nodeWithScore.node.refDocId
|
|
566
|
+
},
|
|
567
|
+
score: nodeWithScore.score
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
} else {
|
|
571
|
+
resultNodes.push(nodeWithScore);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
return resultNodes;
|
|
575
|
+
}
|
|
576
|
+
async retrieve(queryBundle) {
|
|
577
|
+
let nodes = await this.retrieveFromGraph(queryBundle);
|
|
578
|
+
if (this.includeText && nodes.length > 0) {
|
|
579
|
+
nodes = await this.addSourceText(nodes);
|
|
580
|
+
}
|
|
581
|
+
return nodes;
|
|
582
|
+
}
|
|
583
|
+
};
|
|
584
|
+
|
|
585
|
+
// src/graph/retrievers/vector-context.ts
|
|
586
|
+
var VectorContextRetriever = class extends BasePGRetriever {
|
|
587
|
+
embedModel;
|
|
588
|
+
similarityTopK;
|
|
589
|
+
pathDepth;
|
|
590
|
+
limit;
|
|
591
|
+
similarityScore;
|
|
592
|
+
crossCheckBoost;
|
|
593
|
+
crossCheckBoostFactor;
|
|
594
|
+
constructor(options) {
|
|
595
|
+
super(options);
|
|
596
|
+
this.embedModel = options.embedModel;
|
|
597
|
+
this.similarityTopK = options.similarityTopK ?? 4;
|
|
598
|
+
this.pathDepth = options.pathDepth ?? 1;
|
|
599
|
+
this.limit = options.limit ?? 30;
|
|
600
|
+
this.similarityScore = options.similarityScore;
|
|
601
|
+
this.crossCheckBoost = options.crossCheckBoost ?? true;
|
|
602
|
+
this.crossCheckBoostFactor = options.crossCheckBoostFactor ?? 1.25;
|
|
603
|
+
}
|
|
604
|
+
async retrieveFromGraph(queryBundle) {
|
|
605
|
+
let embedding = queryBundle.embedding;
|
|
606
|
+
if (!embedding) {
|
|
607
|
+
embedding = await this.embedModel.getTextEmbedding(queryBundle.queryStr);
|
|
608
|
+
}
|
|
609
|
+
const vectorQuery = {
|
|
610
|
+
queryEmbedding: embedding,
|
|
611
|
+
similarityTopK: this.similarityTopK
|
|
612
|
+
};
|
|
613
|
+
if (!this.graphStore.supportsVectorQueries || !this.graphStore.vectorQuery) {
|
|
614
|
+
throw new Error("Graph store does not support vector queries");
|
|
615
|
+
}
|
|
616
|
+
const [kgNodes, scores] = await this.graphStore.vectorQuery(vectorQuery);
|
|
617
|
+
if (kgNodes.length === 0) {
|
|
618
|
+
return [];
|
|
619
|
+
}
|
|
620
|
+
const kgIds = kgNodes.map((n) => n.id);
|
|
621
|
+
const provenanceSet = /* @__PURE__ */ new Set();
|
|
622
|
+
if (this.crossCheckBoost) {
|
|
623
|
+
for (const node of kgNodes) {
|
|
624
|
+
provenanceSet.add(node.id.toLowerCase());
|
|
625
|
+
provenanceSet.add(node.name.toLowerCase());
|
|
626
|
+
const props = node.properties ?? {};
|
|
627
|
+
if (props.documentId) {
|
|
628
|
+
provenanceSet.add(String(props.documentId).toLowerCase());
|
|
629
|
+
}
|
|
630
|
+
if (props.sourceChunk) {
|
|
631
|
+
provenanceSet.add(String(props.sourceChunk).toLowerCase());
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
const triplets = await this.graphStore.getRelMap({
|
|
636
|
+
nodes: kgNodes,
|
|
637
|
+
depth: this.pathDepth,
|
|
638
|
+
limit: this.limit,
|
|
639
|
+
ignoreRels: [KG_SOURCE_REL]
|
|
640
|
+
});
|
|
641
|
+
const newScores = [];
|
|
642
|
+
for (const triplet of triplets) {
|
|
643
|
+
const idx1 = kgIds.indexOf(triplet[0].id);
|
|
644
|
+
const idx2 = kgIds.indexOf(triplet[2].id);
|
|
645
|
+
const score1 = idx1 >= 0 ? scores[idx1] : 0;
|
|
646
|
+
const score2 = idx2 >= 0 ? scores[idx2] : 0;
|
|
647
|
+
let baseScore = Math.max(score1, score2);
|
|
648
|
+
if (this.crossCheckBoost && baseScore > 0) {
|
|
649
|
+
const shouldBoost = this.checkProvenance(triplet[0], provenanceSet) || this.checkProvenance(triplet[2], provenanceSet);
|
|
650
|
+
if (shouldBoost) {
|
|
651
|
+
baseScore = Math.min(1, baseScore * this.crossCheckBoostFactor);
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
newScores.push(baseScore);
|
|
655
|
+
}
|
|
656
|
+
let results = triplets.map((t, i) => ({ triplet: t, score: newScores[i] }));
|
|
657
|
+
if (this.similarityScore !== void 0) {
|
|
658
|
+
results = results.filter((r) => r.score >= this.similarityScore);
|
|
659
|
+
}
|
|
660
|
+
results.sort((a, b) => b.score - a.score);
|
|
661
|
+
return this.getNodesWithScore(
|
|
662
|
+
results.map((r) => r.triplet),
|
|
663
|
+
results.map((r) => r.score)
|
|
664
|
+
);
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Check if a node has provenance linking to the vector-matched nodes.
|
|
668
|
+
* Returns true if the node's properties contain a documentId or sourceChunk
|
|
669
|
+
* that matches something in the provenance set.
|
|
670
|
+
*/
|
|
671
|
+
checkProvenance(node, provenanceSet) {
|
|
672
|
+
if (provenanceSet.size === 0) return false;
|
|
673
|
+
const props = node.properties ?? {};
|
|
674
|
+
if (props.documentId && provenanceSet.has(String(props.documentId).toLowerCase())) {
|
|
675
|
+
return true;
|
|
676
|
+
}
|
|
677
|
+
if (props.sourceChunk && provenanceSet.has(String(props.sourceChunk).toLowerCase())) {
|
|
678
|
+
return true;
|
|
679
|
+
}
|
|
680
|
+
if (provenanceSet.has(node.id.toLowerCase()) || provenanceSet.has(node.name.toLowerCase())) {
|
|
681
|
+
return true;
|
|
682
|
+
}
|
|
683
|
+
return false;
|
|
684
|
+
}
|
|
685
|
+
};
|
|
686
|
+
|
|
687
|
+
// src/graph/extractors/implicit.ts
|
|
688
|
+
var ImplicitPathExtractor = class {
|
|
689
|
+
async transform(nodes, options) {
|
|
690
|
+
const results = [];
|
|
691
|
+
for (const node of nodes) {
|
|
692
|
+
const existingNodes = node.metadata[KG_NODES_KEY] ?? [];
|
|
693
|
+
const existingRelations = node.metadata[KG_RELATIONS_KEY] ?? [];
|
|
694
|
+
const safeMetadata = { ...node.metadata };
|
|
695
|
+
delete safeMetadata[KG_NODES_KEY];
|
|
696
|
+
delete safeMetadata[KG_RELATIONS_KEY];
|
|
697
|
+
const chunkNode = createEntityNode({
|
|
698
|
+
label: "CHUNK",
|
|
699
|
+
name: node.id,
|
|
700
|
+
properties: {
|
|
701
|
+
text: node.text.slice(0, 500),
|
|
702
|
+
...safeMetadata
|
|
703
|
+
}
|
|
704
|
+
});
|
|
705
|
+
if (node.metadata.documentId) {
|
|
706
|
+
const docNode = createEntityNode({
|
|
707
|
+
label: "DOCUMENT",
|
|
708
|
+
name: String(node.metadata.documentId),
|
|
709
|
+
properties: {}
|
|
710
|
+
});
|
|
711
|
+
const rel = createRelation({
|
|
712
|
+
label: "FROM_DOCUMENT",
|
|
713
|
+
sourceId: chunkNode.id,
|
|
714
|
+
targetId: docNode.id
|
|
715
|
+
});
|
|
716
|
+
existingNodes.push(docNode);
|
|
717
|
+
existingRelations.push(rel);
|
|
718
|
+
}
|
|
719
|
+
existingNodes.push(chunkNode);
|
|
720
|
+
results.push({
|
|
721
|
+
...node,
|
|
722
|
+
metadata: {
|
|
723
|
+
...node.metadata,
|
|
724
|
+
[KG_NODES_KEY]: existingNodes,
|
|
725
|
+
[KG_RELATIONS_KEY]: existingRelations
|
|
726
|
+
}
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
return results;
|
|
730
|
+
}
|
|
731
|
+
};
|
|
732
|
+
|
|
733
|
+
// src/graph/property-graph-index.ts
|
|
734
|
+
import { createHash } from "crypto";
|
|
735
|
+
var PropertyGraphIndex = class _PropertyGraphIndex {
|
|
736
|
+
propertyGraphStore;
|
|
737
|
+
kgExtractors;
|
|
738
|
+
embedModel;
|
|
739
|
+
embedKgNodes;
|
|
740
|
+
showProgress;
|
|
741
|
+
constructor(options) {
|
|
742
|
+
this.propertyGraphStore = options.propertyGraphStore;
|
|
743
|
+
this.kgExtractors = options.kgExtractors ?? [new ImplicitPathExtractor()];
|
|
744
|
+
this.embedModel = options.embedModel;
|
|
745
|
+
this.embedKgNodes = options.embedKgNodes ?? true;
|
|
746
|
+
this.showProgress = options.showProgress ?? false;
|
|
747
|
+
}
|
|
748
|
+
get graphStore() {
|
|
749
|
+
return this.propertyGraphStore;
|
|
750
|
+
}
|
|
751
|
+
static fromExisting(options) {
|
|
752
|
+
return new _PropertyGraphIndex(options);
|
|
753
|
+
}
|
|
754
|
+
computeHash(text) {
|
|
755
|
+
return createHash("md5").update(text).digest("hex");
|
|
756
|
+
}
|
|
757
|
+
async insert(nodes) {
|
|
758
|
+
if (nodes.length === 0) return [];
|
|
759
|
+
let processedNodes = nodes;
|
|
760
|
+
for (const extractor of this.kgExtractors) {
|
|
761
|
+
processedNodes = await extractor.transform(processedNodes, {
|
|
762
|
+
showProgress: this.showProgress
|
|
763
|
+
});
|
|
764
|
+
}
|
|
765
|
+
for (const node of processedNodes) {
|
|
766
|
+
if (!node.metadata[KG_NODES_KEY] && !node.metadata[KG_RELATIONS_KEY]) {
|
|
767
|
+
throw new Error(`Node ${node.id} has no KG_NODES_KEY or KG_RELATIONS_KEY after extraction`);
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
const kgNodesToInsert = [];
|
|
771
|
+
const kgRelsToInsert = [];
|
|
772
|
+
for (const node of processedNodes) {
|
|
773
|
+
const kgNodes = node.metadata[KG_NODES_KEY] ?? [];
|
|
774
|
+
const kgRels = node.metadata[KG_RELATIONS_KEY] ?? [];
|
|
775
|
+
for (const kgNode of kgNodes) {
|
|
776
|
+
kgNode.properties[TRIPLET_SOURCE_KEY] = node.id;
|
|
777
|
+
}
|
|
778
|
+
for (const kgRel of kgRels) {
|
|
779
|
+
kgRel.properties[TRIPLET_SOURCE_KEY] = node.id;
|
|
780
|
+
}
|
|
781
|
+
kgNodesToInsert.push(...kgNodes);
|
|
782
|
+
kgRelsToInsert.push(...kgRels);
|
|
783
|
+
}
|
|
784
|
+
const kgNodeIds = [...new Set(kgNodesToInsert.map((n) => n.id))];
|
|
785
|
+
const existingKgNodes = await this.propertyGraphStore.get({ ids: kgNodeIds });
|
|
786
|
+
const existingKgNodeIds = new Set(existingKgNodes.map((n) => n.id));
|
|
787
|
+
const newKgNodes = kgNodesToInsert.filter((n) => !existingKgNodeIds.has(n.id));
|
|
788
|
+
if (this.propertyGraphStore.getLlamaNodes) {
|
|
789
|
+
const existingLlamaNodes = await this.propertyGraphStore.getLlamaNodes(
|
|
790
|
+
processedNodes.map((n) => n.id)
|
|
791
|
+
);
|
|
792
|
+
const existingHashes = new Set(existingLlamaNodes.map((n) => n.hash));
|
|
793
|
+
processedNodes = processedNodes.filter((n) => {
|
|
794
|
+
const hash = this.computeHash(n.text);
|
|
795
|
+
n.hash = hash;
|
|
796
|
+
return !existingHashes.has(hash);
|
|
797
|
+
});
|
|
798
|
+
}
|
|
799
|
+
if (this.embedKgNodes && this.embedModel && newKgNodes.length > 0) {
|
|
800
|
+
const nodeTexts = processedNodes.map((n) => n.text);
|
|
801
|
+
const embeddings = await this.embedModel.getTextEmbeddingBatch(nodeTexts);
|
|
802
|
+
for (let i = 0; i < processedNodes.length; i++) {
|
|
803
|
+
processedNodes[i].embedding = embeddings[i];
|
|
804
|
+
}
|
|
805
|
+
const kgNodeTexts = newKgNodes.map((n) => `${n.label}: ${n.name}`);
|
|
806
|
+
const kgEmbeddings = await this.embedModel.getTextEmbeddingBatch(kgNodeTexts);
|
|
807
|
+
for (let i = 0; i < newKgNodes.length; i++) {
|
|
808
|
+
newKgNodes[i].embedding = kgEmbeddings[i];
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
if (this.propertyGraphStore.upsertLlamaNodes && processedNodes.length > 0) {
|
|
812
|
+
await this.propertyGraphStore.upsertLlamaNodes(
|
|
813
|
+
processedNodes.map((n) => ({
|
|
814
|
+
id: n.id,
|
|
815
|
+
text: n.text,
|
|
816
|
+
metadata: n.metadata,
|
|
817
|
+
embedding: n.embedding,
|
|
818
|
+
hash: n.hash
|
|
819
|
+
}))
|
|
820
|
+
);
|
|
821
|
+
}
|
|
822
|
+
if (newKgNodes.length > 0) {
|
|
823
|
+
await this.propertyGraphStore.upsertNodes(newKgNodes);
|
|
824
|
+
}
|
|
825
|
+
if (kgRelsToInsert.length > 0) {
|
|
826
|
+
await this.propertyGraphStore.upsertRelations(kgRelsToInsert);
|
|
827
|
+
}
|
|
828
|
+
return processedNodes;
|
|
829
|
+
}
|
|
830
|
+
async delete(nodeIds) {
|
|
831
|
+
await this.propertyGraphStore.delete({ ids: nodeIds });
|
|
832
|
+
}
|
|
833
|
+
asRetriever(options) {
|
|
834
|
+
let subRetrievers = options?.subRetrievers;
|
|
835
|
+
if (!subRetrievers && this.embedModel && this.propertyGraphStore.supportsVectorQueries) {
|
|
836
|
+
subRetrievers = [
|
|
837
|
+
new VectorContextRetriever({
|
|
838
|
+
graphStore: this.propertyGraphStore,
|
|
839
|
+
embedModel: this.embedModel,
|
|
840
|
+
includeText: options?.includeText ?? true,
|
|
841
|
+
similarityTopK: options?.similarityTopK,
|
|
842
|
+
pathDepth: options?.pathDepth,
|
|
843
|
+
limit: options?.limit,
|
|
844
|
+
similarityScore: options?.similarityScore,
|
|
845
|
+
crossCheckBoost: options?.crossCheckBoost,
|
|
846
|
+
crossCheckBoostFactor: options?.crossCheckBoostFactor
|
|
847
|
+
})
|
|
848
|
+
];
|
|
849
|
+
}
|
|
850
|
+
if (!subRetrievers) {
|
|
851
|
+
subRetrievers = [];
|
|
852
|
+
}
|
|
853
|
+
return new PGRetriever({ subRetrievers });
|
|
854
|
+
}
|
|
855
|
+
async query(queryStr, options) {
|
|
856
|
+
const retriever = this.asRetriever(options);
|
|
857
|
+
return retriever.retrieve({ queryStr });
|
|
858
|
+
}
|
|
859
|
+
};
|
|
860
|
+
|
|
861
|
+
// src/graph/extractors/schema-llm.ts
|
|
862
|
+
import { z as z2 } from "zod";
|
|
863
|
+
var DEFAULT_EXTRACT_PROMPT = `Given the following text, extract a knowledge graph according to the provided schema.
|
|
864
|
+
Try to limit to {maxTriplets} extracted paths.
|
|
865
|
+
|
|
866
|
+
Schema:
|
|
867
|
+
- Entity types: {entityTypes}
|
|
868
|
+
- Relation types: {relationTypes}
|
|
869
|
+
- Valid relationships: {validationSchema}
|
|
870
|
+
|
|
871
|
+
Text:
|
|
872
|
+
-------
|
|
873
|
+
{text}
|
|
874
|
+
-------
|
|
875
|
+
|
|
876
|
+
Extract entities and relationships from the text above.
|
|
877
|
+
Return your answer as a JSON object with a "triplets" array. Each triplet should have:
|
|
878
|
+
- "subject": { "name": string, "type": one of [{entityTypes}] }
|
|
879
|
+
- "relation": { "type": one of [{relationTypes}] }
|
|
880
|
+
- "object": { "name": string, "type": one of [{entityTypes}] }
|
|
881
|
+
|
|
882
|
+
Example output format:
|
|
883
|
+
{
|
|
884
|
+
"triplets": [
|
|
885
|
+
{
|
|
886
|
+
"subject": { "name": "John", "type": "PERSON" },
|
|
887
|
+
"relation": { "type": "WORKS_AT" },
|
|
888
|
+
"object": { "name": "Acme Corp", "type": "ORGANIZATION" }
|
|
889
|
+
}
|
|
890
|
+
]
|
|
891
|
+
}`;
|
|
892
|
+
var SchemaLLMPathExtractor = class {
|
|
893
|
+
llm;
|
|
894
|
+
schema;
|
|
895
|
+
maxTripletsPerChunk;
|
|
896
|
+
strict;
|
|
897
|
+
extractPromptTemplate;
|
|
898
|
+
tripletSchema;
|
|
899
|
+
constructor(options) {
|
|
900
|
+
this.llm = options.llm;
|
|
901
|
+
this.schema = options.schema;
|
|
902
|
+
this.maxTripletsPerChunk = options.maxTripletsPerChunk ?? 10;
|
|
903
|
+
this.strict = options.strict ?? true;
|
|
904
|
+
this.extractPromptTemplate = options.extractPromptTemplate ?? DEFAULT_EXTRACT_PROMPT;
|
|
905
|
+
const entityTypeEnum = z2.enum(this.schema.entityTypes);
|
|
906
|
+
const relationTypeEnum = z2.enum(this.schema.relationTypes);
|
|
907
|
+
const entitySchema = z2.object({
|
|
908
|
+
name: z2.string(),
|
|
909
|
+
type: entityTypeEnum,
|
|
910
|
+
properties: z2.record(z2.unknown()).optional()
|
|
911
|
+
});
|
|
912
|
+
const relationSchema = z2.object({
|
|
913
|
+
type: relationTypeEnum,
|
|
914
|
+
properties: z2.record(z2.unknown()).optional()
|
|
915
|
+
});
|
|
916
|
+
const tripletSchema = z2.object({
|
|
917
|
+
subject: entitySchema,
|
|
918
|
+
relation: relationSchema,
|
|
919
|
+
object: entitySchema
|
|
920
|
+
});
|
|
921
|
+
this.tripletSchema = z2.object({
|
|
922
|
+
triplets: z2.array(tripletSchema)
|
|
923
|
+
});
|
|
924
|
+
}
|
|
925
|
+
buildPrompt(text) {
|
|
926
|
+
const validationSchemaStr = this.schema.validationSchema?.map(([s, r, o]) => `(${s})-[${r}]->(${o})`).join(", ") ?? "Any valid combination";
|
|
927
|
+
return this.extractPromptTemplate.replace("{maxTriplets}", String(this.maxTripletsPerChunk)).replace("{entityTypes}", this.schema.entityTypes.join(", ")).replace("{relationTypes}", this.schema.relationTypes.join(", ")).replace("{validationSchema}", validationSchemaStr).replace("{text}", text);
|
|
928
|
+
}
|
|
929
|
+
isValidTriplet(subjectType, relationType, objectType) {
|
|
930
|
+
if (!this.strict || !this.schema.validationSchema) {
|
|
931
|
+
return true;
|
|
932
|
+
}
|
|
933
|
+
return this.schema.validationSchema.some(
|
|
934
|
+
([s, r, o]) => s.toUpperCase() === subjectType.toUpperCase() && r.toUpperCase() === relationType.toUpperCase() && o.toUpperCase() === objectType.toUpperCase()
|
|
935
|
+
);
|
|
936
|
+
}
|
|
937
|
+
pruneInvalidTriplets(extracted) {
|
|
938
|
+
const validTriplets = [];
|
|
939
|
+
for (const triplet of extracted.triplets) {
|
|
940
|
+
const subjectType = String(triplet.subject.type).toUpperCase().replace(/\s+/g, "_");
|
|
941
|
+
const relationType = String(triplet.relation.type).toUpperCase().replace(/\s+/g, "_");
|
|
942
|
+
const objectType = String(triplet.object.type).toUpperCase().replace(/\s+/g, "_");
|
|
943
|
+
if (!this.isValidTriplet(subjectType, relationType, objectType)) {
|
|
944
|
+
continue;
|
|
945
|
+
}
|
|
946
|
+
const subjectName = String(triplet.subject.name).trim();
|
|
947
|
+
const objectName = String(triplet.object.name).trim();
|
|
948
|
+
if (subjectName.toLowerCase() === objectName.toLowerCase()) {
|
|
949
|
+
continue;
|
|
950
|
+
}
|
|
951
|
+
const subj = createEntityNode({
|
|
952
|
+
label: subjectType,
|
|
953
|
+
name: subjectName,
|
|
954
|
+
properties: triplet.subject.properties ?? {}
|
|
955
|
+
});
|
|
956
|
+
const obj = createEntityNode({
|
|
957
|
+
label: objectType,
|
|
958
|
+
name: objectName,
|
|
959
|
+
properties: triplet.object.properties ?? {}
|
|
960
|
+
});
|
|
961
|
+
const rel = createRelation({
|
|
962
|
+
label: relationType,
|
|
963
|
+
sourceId: subj.id,
|
|
964
|
+
targetId: obj.id,
|
|
965
|
+
properties: triplet.relation.properties ?? {}
|
|
966
|
+
});
|
|
967
|
+
validTriplets.push([subj, rel, obj]);
|
|
968
|
+
}
|
|
969
|
+
return validTriplets;
|
|
970
|
+
}
|
|
971
|
+
async extractFromNode(node) {
|
|
972
|
+
const prompt = this.buildPrompt(node.text);
|
|
973
|
+
let triplets = [];
|
|
974
|
+
try {
|
|
975
|
+
const result = await this.llm.structuredPredict(this.tripletSchema, prompt);
|
|
976
|
+
triplets = this.pruneInvalidTriplets(result);
|
|
977
|
+
} catch (err) {
|
|
978
|
+
console.warn(`[SchemaLLMPathExtractor] Failed to extract from node ${node.id}:`, err);
|
|
979
|
+
triplets = [];
|
|
980
|
+
}
|
|
981
|
+
const existingNodes = node.metadata[KG_NODES_KEY] ?? [];
|
|
982
|
+
const existingRelations = node.metadata[KG_RELATIONS_KEY] ?? [];
|
|
983
|
+
const safeMetadata = { ...node.metadata };
|
|
984
|
+
delete safeMetadata[KG_NODES_KEY];
|
|
985
|
+
delete safeMetadata[KG_RELATIONS_KEY];
|
|
986
|
+
for (const [subj, rel, obj] of triplets) {
|
|
987
|
+
subj.properties = { ...subj.properties, ...safeMetadata };
|
|
988
|
+
obj.properties = { ...obj.properties, ...safeMetadata };
|
|
989
|
+
rel.properties = { ...rel.properties, ...safeMetadata };
|
|
990
|
+
existingNodes.push(subj);
|
|
991
|
+
existingNodes.push(obj);
|
|
992
|
+
existingRelations.push(rel);
|
|
993
|
+
}
|
|
994
|
+
return {
|
|
995
|
+
...node,
|
|
996
|
+
metadata: {
|
|
997
|
+
...node.metadata,
|
|
998
|
+
[KG_NODES_KEY]: existingNodes,
|
|
999
|
+
[KG_RELATIONS_KEY]: existingRelations
|
|
1000
|
+
}
|
|
1001
|
+
};
|
|
1002
|
+
}
|
|
1003
|
+
async transform(nodes, options) {
|
|
1004
|
+
const results = [];
|
|
1005
|
+
for (const node of nodes) {
|
|
1006
|
+
const processed = await this.extractFromNode(node);
|
|
1007
|
+
results.push(processed);
|
|
1008
|
+
}
|
|
1009
|
+
return results;
|
|
1010
|
+
}
|
|
1011
|
+
};
|
|
1012
|
+
export {
|
|
1013
|
+
BasePGRetriever,
|
|
1014
|
+
DEFAULT_PREAMBLE,
|
|
1015
|
+
EntityNodeSchema,
|
|
1016
|
+
HanaPropertyGraphStore,
|
|
1017
|
+
HanaSparqlStore,
|
|
1018
|
+
ImplicitPathExtractor,
|
|
1019
|
+
KG_NODES_KEY,
|
|
1020
|
+
KG_RELATIONS_KEY,
|
|
1021
|
+
KG_SOURCE_REL,
|
|
1022
|
+
PGRetriever,
|
|
1023
|
+
PropertyGraphIndex,
|
|
1024
|
+
RelationSchema,
|
|
1025
|
+
SchemaLLMPathExtractor,
|
|
1026
|
+
TRIPLET_SOURCE_KEY,
|
|
1027
|
+
VECTOR_SOURCE_KEY,
|
|
1028
|
+
VectorContextRetriever,
|
|
1029
|
+
createEntityNode,
|
|
1030
|
+
createHanaConnection,
|
|
1031
|
+
createRelation,
|
|
1032
|
+
hanaExec,
|
|
1033
|
+
loadEnv,
|
|
1034
|
+
tripletToString
|
|
1035
|
+
};
|
|
1036
|
+
//# sourceMappingURL=index.js.map
|