@framers/agentos 0.1.32 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +5 -2
  2. package/dist/api/AgentOS.d.ts +62 -1
  3. package/dist/api/AgentOS.d.ts.map +1 -1
  4. package/dist/api/AgentOS.js +177 -2
  5. package/dist/api/AgentOS.js.map +1 -1
  6. package/dist/api/AgentOSOrchestrator.d.ts +187 -0
  7. package/dist/api/AgentOSOrchestrator.d.ts.map +1 -1
  8. package/dist/api/AgentOSOrchestrator.js +709 -16
  9. package/dist/api/AgentOSOrchestrator.js.map +1 -1
  10. package/dist/cognitive_substrate/GMI.d.ts.map +1 -1
  11. package/dist/cognitive_substrate/GMI.js +36 -1
  12. package/dist/cognitive_substrate/GMI.js.map +1 -1
  13. package/dist/cognitive_substrate/IGMI.d.ts +21 -0
  14. package/dist/cognitive_substrate/IGMI.d.ts.map +1 -1
  15. package/dist/cognitive_substrate/IGMI.js.map +1 -1
  16. package/dist/config/AgentOSConfig.d.ts.map +1 -1
  17. package/dist/config/AgentOSConfig.js +17 -0
  18. package/dist/config/AgentOSConfig.js.map +1 -1
  19. package/dist/config/VectorStoreConfiguration.d.ts +2 -1
  20. package/dist/config/VectorStoreConfiguration.d.ts.map +1 -1
  21. package/dist/config/VectorStoreConfiguration.js.map +1 -1
  22. package/dist/core/knowledge/Neo4jKnowledgeGraph.d.ts +89 -0
  23. package/dist/core/knowledge/Neo4jKnowledgeGraph.d.ts.map +1 -0
  24. package/dist/core/knowledge/Neo4jKnowledgeGraph.js +683 -0
  25. package/dist/core/knowledge/Neo4jKnowledgeGraph.js.map +1 -0
  26. package/dist/core/llm/providers/implementations/OllamaProvider.d.ts +14 -1
  27. package/dist/core/llm/providers/implementations/OllamaProvider.d.ts.map +1 -1
  28. package/dist/core/llm/providers/implementations/OllamaProvider.js +142 -37
  29. package/dist/core/llm/providers/implementations/OllamaProvider.js.map +1 -1
  30. package/dist/core/llm/providers/implementations/OpenAIProvider.js +3 -3
  31. package/dist/core/llm/providers/implementations/OpenAIProvider.js.map +1 -1
  32. package/dist/core/observability/otel.d.ts +2 -0
  33. package/dist/core/observability/otel.d.ts.map +1 -1
  34. package/dist/core/observability/otel.js +14 -0
  35. package/dist/core/observability/otel.js.map +1 -1
  36. package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.d.ts +30 -0
  37. package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.d.ts.map +1 -0
  38. package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.js +123 -0
  39. package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.js.map +1 -0
  40. package/dist/core/orchestration/TurnPlanner.d.ts +89 -0
  41. package/dist/core/orchestration/TurnPlanner.d.ts.map +1 -0
  42. package/dist/core/orchestration/TurnPlanner.js +242 -0
  43. package/dist/core/orchestration/TurnPlanner.js.map +1 -0
  44. package/dist/discovery/CapabilityDiscoveryEngine.js +4 -4
  45. package/dist/discovery/CapabilityDiscoveryEngine.js.map +1 -1
  46. package/dist/discovery/CapabilityGraph.d.ts +2 -2
  47. package/dist/discovery/CapabilityGraph.d.ts.map +1 -1
  48. package/dist/discovery/CapabilityGraph.js +46 -17
  49. package/dist/discovery/CapabilityGraph.js.map +1 -1
  50. package/dist/discovery/Neo4jCapabilityGraph.d.ts +58 -0
  51. package/dist/discovery/Neo4jCapabilityGraph.d.ts.map +1 -0
  52. package/dist/discovery/Neo4jCapabilityGraph.js +226 -0
  53. package/dist/discovery/Neo4jCapabilityGraph.js.map +1 -0
  54. package/dist/discovery/index.d.ts +1 -0
  55. package/dist/discovery/index.d.ts.map +1 -1
  56. package/dist/discovery/index.js +1 -0
  57. package/dist/discovery/index.js.map +1 -1
  58. package/dist/discovery/types.d.ts +1 -1
  59. package/dist/discovery/types.d.ts.map +1 -1
  60. package/dist/index.d.ts +2 -0
  61. package/dist/index.d.ts.map +1 -1
  62. package/dist/index.js +2 -0
  63. package/dist/index.js.map +1 -1
  64. package/dist/neo4j/Neo4jConnectionManager.d.ts +59 -0
  65. package/dist/neo4j/Neo4jConnectionManager.d.ts.map +1 -0
  66. package/dist/neo4j/Neo4jConnectionManager.js +115 -0
  67. package/dist/neo4j/Neo4jConnectionManager.js.map +1 -0
  68. package/dist/neo4j/Neo4jCypherRunner.d.ts +39 -0
  69. package/dist/neo4j/Neo4jCypherRunner.d.ts.map +1 -0
  70. package/dist/neo4j/Neo4jCypherRunner.js +74 -0
  71. package/dist/neo4j/Neo4jCypherRunner.js.map +1 -0
  72. package/dist/neo4j/index.d.ts +12 -0
  73. package/dist/neo4j/index.d.ts.map +1 -0
  74. package/dist/neo4j/index.js +11 -0
  75. package/dist/neo4j/index.js.map +1 -0
  76. package/dist/neo4j/types.d.ts +27 -0
  77. package/dist/neo4j/types.d.ts.map +1 -0
  78. package/dist/neo4j/types.js +6 -0
  79. package/dist/neo4j/types.js.map +1 -0
  80. package/dist/rag/VectorStoreManager.d.ts.map +1 -1
  81. package/dist/rag/VectorStoreManager.js +6 -7
  82. package/dist/rag/VectorStoreManager.js.map +1 -1
  83. package/dist/rag/graphrag/GraphRAGEngine.d.ts.map +1 -1
  84. package/dist/rag/graphrag/GraphRAGEngine.js +42 -10
  85. package/dist/rag/graphrag/GraphRAGEngine.js.map +1 -1
  86. package/dist/rag/graphrag/Neo4jGraphRAGEngine.d.ts +95 -0
  87. package/dist/rag/graphrag/Neo4jGraphRAGEngine.d.ts.map +1 -0
  88. package/dist/rag/graphrag/Neo4jGraphRAGEngine.js +748 -0
  89. package/dist/rag/graphrag/Neo4jGraphRAGEngine.js.map +1 -0
  90. package/dist/rag/graphrag/index.d.ts +1 -0
  91. package/dist/rag/graphrag/index.d.ts.map +1 -1
  92. package/dist/rag/graphrag/index.js +1 -0
  93. package/dist/rag/graphrag/index.js.map +1 -1
  94. package/dist/rag/implementations/vector_stores/Neo4jVectorStore.d.ts +55 -0
  95. package/dist/rag/implementations/vector_stores/Neo4jVectorStore.d.ts.map +1 -0
  96. package/dist/rag/implementations/vector_stores/Neo4jVectorStore.js +369 -0
  97. package/dist/rag/implementations/vector_stores/Neo4jVectorStore.js.map +1 -0
  98. package/dist/rag/implementations/vector_stores/index.d.ts +1 -0
  99. package/dist/rag/implementations/vector_stores/index.d.ts.map +1 -1
  100. package/dist/rag/implementations/vector_stores/index.js +2 -0
  101. package/dist/rag/implementations/vector_stores/index.js.map +1 -1
  102. package/package.json +5 -1
@@ -0,0 +1,748 @@
1
+ /**
2
+ * @fileoverview Neo4j-backed GraphRAG engine for AgentOS.
3
+ *
4
+ * Implements `IGraphRAGEngine` using Neo4j for persistent entity/relationship/community
5
+ * storage, native HNSW vector indexes for entity/community semantic search, and
6
+ * GDS Louvain for community detection (with client-side graphology fallback).
7
+ *
8
+ * The entity extraction pipeline (LLM or pattern-based) is delegated to the caller
9
+ * or reused from the existing GraphRAGEngine's extraction utilities.
10
+ *
11
+ * Features:
12
+ * - Persistent graph storage in Neo4j
13
+ * - Native vector indexes on entity/community embeddings
14
+ * - GDS Louvain community detection (falls back to graphology if GDS unavailable)
15
+ * - Document contribution tracking for safe re-ingestion
16
+ * - Global search (community summaries) and local search (entity + 1-hop expansion)
17
+ * - Shared Neo4jConnectionManager for connection pooling
18
+ *
19
+ * @module @framers/agentos/rag/graphrag/Neo4jGraphRAGEngine
20
+ * @see ./IGraphRAG.ts for the interface definition.
21
+ */
22
+ import { Neo4jCypherRunner } from '../../neo4j/Neo4jCypherRunner.js';
23
+ // ============================================================================
24
+ // Constants
25
+ // ============================================================================
26
+ const ENTITY_LABEL = 'GraphRAGEntity';
27
+ const COMMUNITY_LABEL = 'GraphRAGCommunity';
28
+ const DOC_LABEL = 'GraphRAGDocument';
29
+ const REL_TYPE = 'GRAPHRAG_REL';
30
+ const MEMBER_OF = 'MEMBER_OF';
31
+ const ENTITY_VEC_INDEX = 'graphrag_entity_embeddings';
32
+ const COMMUNITY_VEC_INDEX = 'graphrag_community_embeddings';
33
+ const DEFAULT_EMBEDDING_DIM = 1536;
34
+ function generateId() {
35
+ return `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
36
+ }
37
+ function nowIso() {
38
+ return new Date().toISOString();
39
+ }
40
+ // ============================================================================
41
+ // Implementation
42
+ // ============================================================================
43
+ export class Neo4jGraphRAGEngine {
44
+ constructor(deps) {
45
+ this.deps = deps;
46
+ this._isInitialized = false;
47
+ }
48
+ async initialize(config) {
49
+ this.config = config;
50
+ this.cypher = new Neo4jCypherRunner(this.deps.connectionManager);
51
+ this.embeddingDimension = config.embeddingDimension ?? DEFAULT_EMBEDDING_DIM;
52
+ // Create constraints
53
+ await this.cypher.writeVoid(`CREATE CONSTRAINT graphrag_entity_unique IF NOT EXISTS
54
+ FOR (n:${ENTITY_LABEL}) REQUIRE n.entityId IS UNIQUE`);
55
+ await this.cypher.writeVoid(`CREATE CONSTRAINT graphrag_community_unique IF NOT EXISTS
56
+ FOR (n:${COMMUNITY_LABEL}) REQUIRE n.communityId IS UNIQUE`);
57
+ await this.cypher.writeVoid(`CREATE CONSTRAINT graphrag_doc_unique IF NOT EXISTS
58
+ FOR (n:${DOC_LABEL}) REQUIRE n.documentId IS UNIQUE`);
59
+ // Create vector indexes
60
+ await this.cypher.writeVoid(`CREATE VECTOR INDEX ${ENTITY_VEC_INDEX} IF NOT EXISTS
61
+ FOR (n:${ENTITY_LABEL}) ON (n.embedding)
62
+ OPTIONS { indexConfig: {
63
+ \`vector.dimensions\`: toInteger($dim),
64
+ \`vector.similarity_function\`: 'cosine'
65
+ }}`, { dim: this.embeddingDimension });
66
+ await this.cypher.writeVoid(`CREATE VECTOR INDEX ${COMMUNITY_VEC_INDEX} IF NOT EXISTS
67
+ FOR (n:${COMMUNITY_LABEL}) ON (n.summaryEmbedding)
68
+ OPTIONS { indexConfig: {
69
+ \`vector.dimensions\`: toInteger($dim),
70
+ \`vector.similarity_function\`: 'cosine'
71
+ }}`, { dim: this.embeddingDimension });
72
+ this._isInitialized = true;
73
+ }
74
+ async ingestDocuments(documents) {
75
+ let totalEntities = 0;
76
+ let totalRelationships = 0;
77
+ for (const doc of documents) {
78
+ // Track document
79
+ await this.cypher.writeVoid(`MERGE (d:${DOC_LABEL} { documentId: $docId })
80
+ SET d.ingestedAt = $now, d.contentHash = $hash`, { docId: doc.id, now: nowIso(), hash: this.simpleHash(doc.content) });
81
+ // Extract entities and relationships (LLM-based or pattern-based)
82
+ const extraction = await this.extractEntitiesAndRelationships(doc.content, doc.id);
83
+ // Merge entities into graph
84
+ for (const entity of extraction.entities) {
85
+ await this.cypher.writeVoid(`MERGE (e:${ENTITY_LABEL} { normalizedName: toLower(trim($name)) })
86
+ ON CREATE SET
87
+ e.entityId = $id,
88
+ e.name = $name,
89
+ e.type = $type,
90
+ e.description = $description,
91
+ e.properties_json = $props_json,
92
+ e.frequency = $frequency,
93
+ e.sourceDocumentIds = [$docId],
94
+ e.createdAt = $now,
95
+ e.updatedAt = $now
96
+ ON MATCH SET
97
+ e.frequency = e.frequency + $frequency,
98
+ e.updatedAt = $now,
99
+ e.sourceDocumentIds = CASE
100
+ WHEN NOT $docId IN e.sourceDocumentIds
101
+ THEN e.sourceDocumentIds + $docId
102
+ ELSE e.sourceDocumentIds
103
+ END,
104
+ e.type = CASE WHEN e.type = 'concept' AND $type <> 'concept' THEN $type ELSE e.type END,
105
+ e.description = CASE WHEN size($description) > size(e.description) THEN $description ELSE e.description END`, {
106
+ id: entity.id,
107
+ name: entity.name,
108
+ type: entity.type,
109
+ description: entity.description,
110
+ props_json: JSON.stringify(entity.properties),
111
+ frequency: entity.frequency,
112
+ docId: doc.id,
113
+ now: nowIso(),
114
+ });
115
+ totalEntities++;
116
+ }
117
+ // Merge relationships
118
+ for (const rel of extraction.relationships) {
119
+ await this.cypher.writeVoid(`MATCH (src:${ENTITY_LABEL} { entityId: $sourceId })
120
+ MATCH (tgt:${ENTITY_LABEL} { entityId: $targetId })
121
+ MERGE (src)-[r:${REL_TYPE} { relType: $type }]->(tgt)
122
+ ON CREATE SET
123
+ r.relId = $id,
124
+ r.description = $description,
125
+ r.weight = $weight,
126
+ r.sourceDocumentIds = [$docId],
127
+ r.createdAt = $now
128
+ ON MATCH SET
129
+ r.weight = r.weight + $weight,
130
+ r.sourceDocumentIds = CASE
131
+ WHEN NOT $docId IN r.sourceDocumentIds
132
+ THEN r.sourceDocumentIds + $docId
133
+ ELSE r.sourceDocumentIds
134
+ END`, {
135
+ id: rel.id,
136
+ sourceId: rel.sourceEntityId,
137
+ targetId: rel.targetEntityId,
138
+ type: rel.type,
139
+ description: rel.description,
140
+ weight: rel.weight,
141
+ docId: doc.id,
142
+ now: nowIso(),
143
+ });
144
+ totalRelationships++;
145
+ }
146
+ // Generate entity embeddings if configured
147
+ if (this.config.generateEntityEmbeddings !== false && this.deps.embeddingManager) {
148
+ await this.generateEntityEmbeddings(extraction.entities);
149
+ }
150
+ }
151
+ // Detect communities
152
+ const communitiesDetected = await this.detectCommunities();
153
+ return {
154
+ entitiesExtracted: totalEntities,
155
+ relationshipsExtracted: totalRelationships,
156
+ communitiesDetected,
157
+ documentsProcessed: documents.length,
158
+ };
159
+ }
160
+ async removeDocuments(documentIds) {
161
+ let removed = 0;
162
+ for (const docId of documentIds) {
163
+ // Remove document contributions from entities
164
+ await this.cypher.writeVoid(`MATCH (e:${ENTITY_LABEL})
165
+ WHERE $docId IN e.sourceDocumentIds
166
+ SET e.sourceDocumentIds = [x IN e.sourceDocumentIds WHERE x <> $docId]
167
+ WITH e WHERE size(e.sourceDocumentIds) = 0
168
+ DETACH DELETE e`, { docId });
169
+ // Remove document contributions from relationships
170
+ await this.cypher.writeVoid(`MATCH ()-[r:${REL_TYPE}]->()
171
+ WHERE $docId IN r.sourceDocumentIds
172
+ SET r.sourceDocumentIds = [x IN r.sourceDocumentIds WHERE x <> $docId]
173
+ WITH r WHERE size(r.sourceDocumentIds) = 0
174
+ DELETE r`, { docId });
175
+ // Remove document node
176
+ await this.cypher.writeVoid(`MATCH (d:${DOC_LABEL} { documentId: $docId }) DELETE d`, { docId });
177
+ removed++;
178
+ }
179
+ const communitiesDetected = await this.detectCommunities();
180
+ return { documentsRemoved: removed, communitiesDetected };
181
+ }
182
+ async globalSearch(query, options) {
183
+ const topK = options?.topK ?? 10;
184
+ const startTime = Date.now();
185
+ let communitySummaries = [];
186
+ // Try vector search on community summaries
187
+ if (this.deps.embeddingManager) {
188
+ const embStart = Date.now();
189
+ const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({ texts: query });
190
+ const embTime = Date.now() - embStart;
191
+ const searchStart = Date.now();
192
+ const results = await this.cypher.read(`CALL db.index.vector.queryNodes($idx, $topK, $queryVec)
193
+ YIELD node, score
194
+ RETURN node.communityId AS communityId,
195
+ node.level AS level,
196
+ node.title AS title,
197
+ node.summary AS summary,
198
+ score
199
+ ORDER BY score DESC`, { idx: COMMUNITY_VEC_INDEX, topK, queryVec: embeddings[0] });
200
+ const searchTime = Date.now() - searchStart;
201
+ communitySummaries = results.map((r) => ({
202
+ communityId: r.communityId,
203
+ level: Number(r.level),
204
+ title: r.title,
205
+ summary: r.summary,
206
+ relevanceScore: r.score,
207
+ }));
208
+ // Synthesize answer from community summaries
209
+ let answer = communitySummaries.map((c) => c.summary).join('\n\n');
210
+ if (this.deps.llmProvider && communitySummaries.length > 0) {
211
+ const prompt = `Based on the following community summaries from a knowledge graph, answer this question: "${query}"\n\n${communitySummaries.map((c) => `## ${c.title}\n${c.summary}`).join('\n\n')}`;
212
+ try {
213
+ answer = await this.deps.llmProvider.generateText(prompt, { maxTokens: this.config.maxSummaryTokens ?? 500 });
214
+ }
215
+ catch { /* Use concatenated summaries as fallback */ }
216
+ }
217
+ const totalCommunities = await this.cypher.read(`MATCH (c:${COMMUNITY_LABEL}) RETURN count(c) AS count`);
218
+ return {
219
+ query,
220
+ answer,
221
+ communitySummaries,
222
+ totalCommunitiesSearched: Number(totalCommunities[0]?.count ?? 0),
223
+ diagnostics: {
224
+ embeddingTimeMs: embTime,
225
+ searchTimeMs: searchTime,
226
+ synthesisTimeMs: Date.now() - startTime - embTime - searchTime,
227
+ },
228
+ };
229
+ }
230
+ // Fallback: text-based community search
231
+ const results = await this.cypher.read(`MATCH (c:${COMMUNITY_LABEL})
232
+ WHERE c.summary CONTAINS $query OR c.title CONTAINS $query
233
+ RETURN c.communityId AS communityId, c.level AS level,
234
+ c.title AS title, c.summary AS summary
235
+ LIMIT $topK`, { query, topK });
236
+ return {
237
+ query,
238
+ answer: results.map((r) => r.summary).join('\n\n'),
239
+ communitySummaries: results.map((r) => ({
240
+ communityId: r.communityId,
241
+ level: Number(r.level),
242
+ title: r.title,
243
+ summary: r.summary,
244
+ relevanceScore: 0.5,
245
+ })),
246
+ totalCommunitiesSearched: results.length,
247
+ diagnostics: { searchTimeMs: Date.now() - startTime },
248
+ };
249
+ }
250
+ async localSearch(query, options) {
251
+ const topK = options?.topK ?? 10;
252
+ const startTime = Date.now();
253
+ let matchedEntities = [];
254
+ let relationships = [];
255
+ let communityContext = [];
256
+ if (this.deps.embeddingManager) {
257
+ const embStart = Date.now();
258
+ const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({ texts: query });
259
+ const embTime = Date.now() - embStart;
260
+ // Entity vector search
261
+ const searchStart = Date.now();
262
+ const entityResults = await this.cypher.read(`CALL db.index.vector.queryNodes($idx, $topK, $queryVec)
263
+ YIELD node, score
264
+ RETURN node.entityId AS entityId,
265
+ node.name AS name,
266
+ node.type AS type,
267
+ node.description AS description,
268
+ node.properties_json AS properties_json,
269
+ node.frequency AS frequency,
270
+ node.sourceDocumentIds AS sourceDocumentIds,
271
+ node.createdAt AS createdAt,
272
+ node.updatedAt AS updatedAt,
273
+ score
274
+ ORDER BY score DESC`, { idx: ENTITY_VEC_INDEX, topK, queryVec: embeddings[0] });
275
+ const searchTime = Date.now() - searchStart;
276
+ matchedEntities = entityResults.map((r) => ({
277
+ id: r.entityId,
278
+ name: r.name,
279
+ type: r.type,
280
+ description: r.description,
281
+ properties: this.safeParseJson(r.properties_json, {}),
282
+ sourceDocumentIds: r.sourceDocumentIds ?? [],
283
+ frequency: Number(r.frequency ?? 0),
284
+ createdAt: r.createdAt ?? '',
285
+ updatedAt: r.updatedAt ?? '',
286
+ relevanceScore: r.score,
287
+ }));
288
+ // 1-hop expansion for matched entities
289
+ const graphStart = Date.now();
290
+ const entityIds = matchedEntities.map((e) => e.id);
291
+ if (entityIds.length > 0) {
292
+ const neighborResults = await this.cypher.read(`UNWIND $entityIds AS eid
293
+ MATCH (e:${ENTITY_LABEL} { entityId: eid })-[r:${REL_TYPE}]-(neighbor:${ENTITY_LABEL})
294
+ RETURN r.relId AS relId,
295
+ CASE WHEN startNode(r) = e THEN e.entityId ELSE neighbor.entityId END AS sourceId,
296
+ CASE WHEN endNode(r) = e THEN e.entityId ELSE neighbor.entityId END AS targetId,
297
+ r.relType AS relType,
298
+ r.description AS description,
299
+ r.weight AS weight,
300
+ r.sourceDocumentIds AS sourceDocumentIds,
301
+ r.createdAt AS createdAt`, { entityIds });
302
+ relationships = neighborResults.map((r) => ({
303
+ id: r.relId ?? '',
304
+ sourceEntityId: r.sourceId,
305
+ targetEntityId: r.targetId,
306
+ type: r.relType,
307
+ description: r.description ?? '',
308
+ weight: Number(r.weight ?? 0),
309
+ properties: {},
310
+ sourceDocumentIds: r.sourceDocumentIds ?? [],
311
+ createdAt: r.createdAt ?? '',
312
+ }));
313
+ }
314
+ const graphTime = Date.now() - graphStart;
315
+ // Get community context for matched entities
316
+ if (entityIds.length > 0) {
317
+ const comResults = await this.cypher.read(`UNWIND $entityIds AS eid
318
+ MATCH (e:${ENTITY_LABEL} { entityId: eid })-[:${MEMBER_OF}]->(c:${COMMUNITY_LABEL})
319
+ RETURN DISTINCT c.communityId AS communityId,
320
+ c.title AS title,
321
+ c.summary AS summary,
322
+ c.level AS level`, { entityIds });
323
+ communityContext = comResults.map((r) => ({
324
+ communityId: r.communityId,
325
+ title: r.title,
326
+ summary: r.summary,
327
+ level: Number(r.level),
328
+ }));
329
+ }
330
+ // Assemble augmented context
331
+ const contextParts = [];
332
+ for (const e of matchedEntities.slice(0, 5)) {
333
+ contextParts.push(`[${e.type}] ${e.name}: ${e.description}`);
334
+ }
335
+ for (const r of relationships.slice(0, 10)) {
336
+ contextParts.push(`${r.sourceEntityId} --[${r.type}]--> ${r.targetEntityId}: ${r.description}`);
337
+ }
338
+ for (const c of communityContext.slice(0, 3)) {
339
+ contextParts.push(`Community "${c.title}": ${c.summary}`);
340
+ }
341
+ return {
342
+ query,
343
+ entities: matchedEntities,
344
+ relationships,
345
+ communityContext,
346
+ augmentedContext: contextParts.join('\n'),
347
+ diagnostics: {
348
+ embeddingTimeMs: embTime,
349
+ searchTimeMs: searchTime,
350
+ graphTraversalTimeMs: graphTime,
351
+ },
352
+ };
353
+ }
354
+ // Fallback: text-based search
355
+ const textResults = await this.cypher.read(`MATCH (e:${ENTITY_LABEL})
356
+ WHERE e.name CONTAINS $query OR e.description CONTAINS $query
357
+ RETURN e LIMIT $topK`, { query, topK });
358
+ matchedEntities = textResults.map((r) => {
359
+ const props = r.e?.properties ?? r.e ?? {};
360
+ return {
361
+ id: props.entityId ?? '',
362
+ name: props.name ?? '',
363
+ type: props.type ?? '',
364
+ description: props.description ?? '',
365
+ properties: this.safeParseJson(props.properties_json, {}),
366
+ sourceDocumentIds: props.sourceDocumentIds ?? [],
367
+ frequency: Number(props.frequency ?? 0),
368
+ createdAt: props.createdAt ?? '',
369
+ updatedAt: props.updatedAt ?? '',
370
+ relevanceScore: 0.5,
371
+ };
372
+ });
373
+ return {
374
+ query,
375
+ entities: matchedEntities,
376
+ relationships: [],
377
+ communityContext: [],
378
+ augmentedContext: matchedEntities.map((e) => `[${e.type}] ${e.name}: ${e.description}`).join('\n'),
379
+ };
380
+ }
381
+ async getEntities(options) {
382
+ const conditions = [];
383
+ const params = { limit: options?.limit ?? 100 };
384
+ if (options?.type) {
385
+ conditions.push('e.type = $type');
386
+ params.type = options.type;
387
+ }
388
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
389
+ const results = await this.cypher.read(`MATCH (e:${ENTITY_LABEL}) ${where} RETURN e LIMIT $limit`, params);
390
+ return results.map((r) => this.nodeToGraphEntity(r.e));
391
+ }
392
+ async getRelationships(entityId) {
393
+ const results = await this.cypher.read(`MATCH (e:${ENTITY_LABEL} { entityId: $entityId })-[r:${REL_TYPE}]-(other:${ENTITY_LABEL})
394
+ RETURN r.relId AS relId, r.relType AS relType, r.description AS description,
395
+ r.weight AS weight, r.sourceDocumentIds AS sourceDocumentIds, r.createdAt AS createdAt,
396
+ CASE WHEN startNode(r) = e THEN e.entityId ELSE other.entityId END AS sourceId,
397
+ CASE WHEN endNode(r) = e THEN e.entityId ELSE other.entityId END AS targetId`, { entityId });
398
+ return results.map((r) => ({
399
+ id: r.relId ?? '',
400
+ sourceEntityId: r.sourceId,
401
+ targetEntityId: r.targetId,
402
+ type: r.relType ?? '',
403
+ description: r.description ?? '',
404
+ weight: Number(r.weight ?? 0),
405
+ properties: {},
406
+ sourceDocumentIds: r.sourceDocumentIds ?? [],
407
+ createdAt: r.createdAt ?? '',
408
+ }));
409
+ }
410
+ async getCommunities(level) {
411
+ const conditions = [];
412
+ const params = {};
413
+ if (level !== undefined) {
414
+ conditions.push('c.level = $level');
415
+ params.level = level;
416
+ }
417
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
418
+ const results = await this.cypher.read(`MATCH (c:${COMMUNITY_LABEL}) ${where}
419
+ RETURN c ORDER BY c.importance DESC`, params);
420
+ return results.map((r) => this.nodeToCommunity(r.c));
421
+ }
422
+ async getStats() {
423
+ const results = await this.cypher.read(`MATCH (e:${ENTITY_LABEL})
424
+ WITH count(e) AS entities
425
+ OPTIONAL MATCH ()-[r:${REL_TYPE}]->()
426
+ WITH entities, count(r) AS rels
427
+ OPTIONAL MATCH (c:${COMMUNITY_LABEL})
428
+ WITH entities, rels, count(c) AS communities,
429
+ CASE WHEN count(c) > 0 THEN max(c.level) + 1 ELSE 0 END AS levels
430
+ OPTIONAL MATCH (d:${DOC_LABEL})
431
+ RETURN entities, rels, communities, levels, count(d) AS docs`);
432
+ const row = results[0] ?? {};
433
+ return {
434
+ totalEntities: Number(row.entities ?? 0),
435
+ totalRelationships: Number(row.rels ?? 0),
436
+ totalCommunities: Number(row.communities ?? 0),
437
+ communityLevels: Number(row.levels ?? 0),
438
+ documentsIngested: Number(row.docs ?? 0),
439
+ };
440
+ }
441
+ async clear() {
442
+ await this.cypher.writeVoid(`MATCH (n:${ENTITY_LABEL}) DETACH DELETE n`);
443
+ await this.cypher.writeVoid(`MATCH (n:${COMMUNITY_LABEL}) DETACH DELETE n`);
444
+ await this.cypher.writeVoid(`MATCH (n:${DOC_LABEL}) DETACH DELETE n`);
445
+ }
446
+ async shutdown() {
447
+ // Connection manager is shared — don't close it here
448
+ this._isInitialized = false;
449
+ }
450
+ // ============ Private: Community Detection ============
451
+ async detectCommunities() {
452
+ // Clear existing communities
453
+ await this.cypher.writeVoid(`MATCH (c:${COMMUNITY_LABEL}) DETACH DELETE c`);
454
+ // Try GDS Louvain first
455
+ try {
456
+ return await this.detectCommunitiesGDS();
457
+ }
458
+ catch {
459
+ // GDS not available — fall back to client-side
460
+ return await this.detectCommunitiesClientSide();
461
+ }
462
+ }
463
+ async detectCommunitiesGDS() {
464
+ // Project the graph
465
+ await this.cypher.writeVoid(`CALL gds.graph.project('graphrag_projection', $nodeLabel, {
466
+ ${REL_TYPE}: { properties: 'weight' }
467
+ })`, { nodeLabel: ENTITY_LABEL });
468
+ try {
469
+ // Run Louvain
470
+ const results = await this.cypher.read(`CALL gds.louvain.stream('graphrag_projection', {
471
+ relationshipWeightProperty: 'weight'
472
+ })
473
+ YIELD nodeId, communityId
474
+ WITH gds.util.asNode(nodeId) AS node, communityId
475
+ RETURN node.entityId AS entityId, communityId`);
476
+ return await this.storeCommunities(results);
477
+ }
478
+ finally {
479
+ // Clean up projection
480
+ try {
481
+ await this.cypher.writeVoid(`CALL gds.graph.drop('graphrag_projection')`);
482
+ }
483
+ catch { /* ignore if already dropped */ }
484
+ }
485
+ }
486
+ async detectCommunitiesClientSide() {
487
+ // Fetch all nodes and edges, run graphology Louvain client-side
488
+ try {
489
+ const Graph = (await import('graphology')).default;
490
+ const louvain = (await import('graphology-communities-louvain')).default;
491
+ const graph = new Graph({ multi: false, type: 'undirected' });
492
+ // Fetch all entities
493
+ const entities = await this.cypher.read(`MATCH (e:${ENTITY_LABEL}) RETURN e.entityId AS entityId`);
494
+ for (const e of entities) {
495
+ graph.addNode(e.entityId);
496
+ }
497
+ // Fetch all relationships
498
+ const rels = await this.cypher.read(`MATCH (s:${ENTITY_LABEL})-[r:${REL_TYPE}]->(t:${ENTITY_LABEL})
499
+ RETURN s.entityId AS src, t.entityId AS tgt, r.weight AS weight`);
500
+ for (const r of rels) {
501
+ if (graph.hasNode(r.src) && graph.hasNode(r.tgt) && !graph.hasEdge(r.src, r.tgt)) {
502
+ graph.addEdge(r.src, r.tgt, { weight: Number(r.weight ?? 1) });
503
+ }
504
+ }
505
+ if (graph.order === 0)
506
+ return 0;
507
+ // Run Louvain
508
+ const partition = louvain(graph, {
509
+ resolution: this.config.communityResolution ?? 1.0,
510
+ getEdgeWeight: 'weight',
511
+ });
512
+ // Convert to community assignment format
513
+ const assignments = Object.entries(partition).map(([entityId, communityId]) => ({
514
+ entityId,
515
+ communityId: communityId,
516
+ }));
517
+ return await this.storeCommunities(assignments);
518
+ }
519
+ catch {
520
+ // Neither GDS nor graphology available
521
+ return 0;
522
+ }
523
+ }
524
+ async storeCommunities(assignments) {
525
+ // Group by community
526
+ const communityMap = new Map();
527
+ for (const a of assignments) {
528
+ if (!communityMap.has(a.communityId)) {
529
+ communityMap.set(a.communityId, []);
530
+ }
531
+ communityMap.get(a.communityId).push(a.entityId);
532
+ }
533
+ const minSize = this.config.minCommunitySize ?? 2;
534
+ let stored = 0;
535
+ for (const [commId, entityIds] of communityMap) {
536
+ if (entityIds.length < minSize)
537
+ continue;
538
+ const communityId = `community-${commId}`;
539
+ // Get entity descriptions for summary
540
+ const entityDescs = await this.cypher.read(`MATCH (e:${ENTITY_LABEL}) WHERE e.entityId IN $ids
541
+ RETURN e.name AS name, e.description AS description`, { ids: entityIds });
542
+ // Generate summary
543
+ let summary = entityDescs.map((e) => `${e.name}: ${e.description}`).join('; ');
544
+ let title = entityDescs.slice(0, 3).map((e) => e.name).join(', ');
545
+ if (this.deps.llmProvider && entityDescs.length > 2) {
546
+ try {
547
+ const prompt = `Summarize this group of related entities in 2-3 sentences:\n${entityDescs.map((e) => `- ${e.name}: ${e.description}`).join('\n')}`;
548
+ summary = await this.deps.llmProvider.generateText(prompt, { maxTokens: 200 });
549
+ title = `Community: ${entityDescs.slice(0, 3).map((e) => e.name).join(', ')}`;
550
+ }
551
+ catch { /* use concatenated descriptions */ }
552
+ }
553
+ // Store community node
554
+ await this.cypher.writeVoid(`CREATE (c:${COMMUNITY_LABEL} {
555
+ communityId: $communityId,
556
+ level: 0,
557
+ title: $title,
558
+ summary: $summary,
559
+ entityIds: $entityIds,
560
+ importance: $importance,
561
+ parentCommunityId: null,
562
+ childCommunityIds: [],
563
+ relationshipIds: [],
564
+ findings: [],
565
+ createdAt: $now
566
+ })`, {
567
+ communityId,
568
+ title,
569
+ summary,
570
+ entityIds,
571
+ importance: entityIds.length / assignments.length,
572
+ now: nowIso(),
573
+ });
574
+ // Create MEMBER_OF edges
575
+ await this.cypher.writeVoid(`MATCH (c:${COMMUNITY_LABEL} { communityId: $communityId })
576
+ UNWIND $entityIds AS eid
577
+ MATCH (e:${ENTITY_LABEL} { entityId: eid })
578
+ MERGE (e)-[:${MEMBER_OF}]->(c)`, { communityId, entityIds });
579
+ // Generate community embedding for global search
580
+ if (this.deps.embeddingManager) {
581
+ try {
582
+ const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({
583
+ texts: `${title}: ${summary}`,
584
+ });
585
+ await this.cypher.writeVoid(`MATCH (c:${COMMUNITY_LABEL} { communityId: $communityId })
586
+ SET c.summaryEmbedding = $embedding`, { communityId, embedding: embeddings[0] });
587
+ }
588
+ catch { /* skip embedding if it fails */ }
589
+ }
590
+ stored++;
591
+ }
592
+ return stored;
593
+ }
594
+ // ============ Private: Entity Extraction ============
595
+ async extractEntitiesAndRelationships(content, documentId) {
596
+ if (this.deps.llmProvider) {
597
+ return this.extractViaLLM(content, documentId);
598
+ }
599
+ return this.extractViaPatterns(content, documentId);
600
+ }
601
+ async extractViaLLM(content, documentId) {
602
+ const entityTypes = this.config.entityTypes ?? ['person', 'organization', 'concept', 'location', 'event', 'technology'];
603
+ const prompt = `Extract entities and relationships from the following text.
604
+ Return JSON with this exact structure:
605
+ {"entities": [{"name": "...", "type": "...", "description": "..."}], "relationships": [{"source": "...", "target": "...", "type": "...", "description": "..."}]}
606
+
607
+ Entity types: ${entityTypes.join(', ')}
608
+
609
+ Text:
610
+ ${content.slice(0, 4000)}`;
611
+ try {
612
+ const response = await this.deps.llmProvider.generateText(prompt, {
613
+ maxTokens: 2000,
614
+ temperature: 0,
615
+ });
616
+ const parsed = JSON.parse(response.replace(/```json?\n?/g, '').replace(/```/g, '').trim());
617
+ const now = nowIso();
618
+ const entities = (parsed.entities ?? []).map((e) => ({
619
+ id: generateId(),
620
+ name: e.name,
621
+ type: e.type ?? 'concept',
622
+ description: e.description ?? '',
623
+ properties: {},
624
+ sourceDocumentIds: [documentId],
625
+ frequency: 1,
626
+ createdAt: now,
627
+ updatedAt: now,
628
+ }));
629
+ const entityNameToId = new Map(entities.map((e) => [e.name.toLowerCase(), e.id]));
630
+ const relationships = (parsed.relationships ?? [])
631
+ .filter((r) => entityNameToId.has(r.source?.toLowerCase()) && entityNameToId.has(r.target?.toLowerCase()))
632
+ .map((r) => ({
633
+ id: generateId(),
634
+ sourceEntityId: entityNameToId.get(r.source.toLowerCase()),
635
+ targetEntityId: entityNameToId.get(r.target.toLowerCase()),
636
+ type: r.type ?? 'related_to',
637
+ description: r.description ?? '',
638
+ weight: 1,
639
+ properties: {},
640
+ sourceDocumentIds: [documentId],
641
+ createdAt: now,
642
+ }));
643
+ return { entities, relationships };
644
+ }
645
+ catch {
646
+ return this.extractViaPatterns(content, documentId);
647
+ }
648
+ }
649
+ extractViaPatterns(content, documentId) {
650
+ // Simple NER-like extraction via capitalized phrases
651
+ const now = nowIso();
652
+ const entityMap = new Map();
653
+ const capitalizedPattern = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/g;
654
+ let match;
655
+ while ((match = capitalizedPattern.exec(content)) !== null) {
656
+ const name = match[1];
657
+ const normalized = name.toLowerCase();
658
+ if (!entityMap.has(normalized)) {
659
+ entityMap.set(normalized, {
660
+ id: generateId(),
661
+ name,
662
+ type: 'concept',
663
+ description: `Entity "${name}" extracted from text`,
664
+ properties: {},
665
+ sourceDocumentIds: [documentId],
666
+ frequency: 1,
667
+ createdAt: now,
668
+ updatedAt: now,
669
+ });
670
+ }
671
+ else {
672
+ entityMap.get(normalized).frequency++;
673
+ }
674
+ }
675
+ return { entities: Array.from(entityMap.values()), relationships: [] };
676
+ }
677
+ // ============ Private: Embedding Generation ============
678
+ async generateEntityEmbeddings(entities) {
679
+ if (!this.deps.embeddingManager || entities.length === 0)
680
+ return;
681
+ const texts = entities.map((e) => `${e.name} (${e.type}): ${e.description}`);
682
+ const batchSize = 32;
683
+ for (let i = 0; i < texts.length; i += batchSize) {
684
+ const batch = texts.slice(i, i + batchSize);
685
+ const batchEntities = entities.slice(i, i + batchSize);
686
+ try {
687
+ const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({ texts: batch });
688
+ for (let j = 0; j < batchEntities.length; j++) {
689
+ await this.cypher.writeVoid(`MATCH (e:${ENTITY_LABEL} { entityId: $entityId })
690
+ SET e.embedding = $embedding`, { entityId: batchEntities[j].id, embedding: embeddings[j] });
691
+ }
692
+ }
693
+ catch { /* skip embedding batch on error */ }
694
+ }
695
+ }
696
+ // ============ Private Helpers ============
697
+ nodeToGraphEntity(node) {
698
+ const props = node?.properties ?? node ?? {};
699
+ return {
700
+ id: props.entityId ?? '',
701
+ name: props.name ?? '',
702
+ type: props.type ?? '',
703
+ description: props.description ?? '',
704
+ properties: this.safeParseJson(props.properties_json, {}),
705
+ embedding: props.embedding ?? undefined,
706
+ sourceDocumentIds: props.sourceDocumentIds ?? [],
707
+ frequency: Number(props.frequency ?? 0),
708
+ createdAt: props.createdAt ?? '',
709
+ updatedAt: props.updatedAt ?? '',
710
+ };
711
+ }
712
+ nodeToCommunity(node) {
713
+ const props = node?.properties ?? node ?? {};
714
+ return {
715
+ id: props.communityId ?? '',
716
+ level: Number(props.level ?? 0),
717
+ parentCommunityId: props.parentCommunityId ?? null,
718
+ childCommunityIds: props.childCommunityIds ?? [],
719
+ entityIds: props.entityIds ?? [],
720
+ relationshipIds: props.relationshipIds ?? [],
721
+ summary: props.summary ?? '',
722
+ findings: props.findings ?? [],
723
+ importance: Number(props.importance ?? 0),
724
+ title: props.title ?? '',
725
+ createdAt: props.createdAt ?? '',
726
+ };
727
+ }
728
+ safeParseJson(json, fallback) {
729
+ if (!json)
730
+ return fallback;
731
+ try {
732
+ return JSON.parse(json);
733
+ }
734
+ catch {
735
+ return fallback;
736
+ }
737
+ }
738
+ simpleHash(text) {
739
+ let hash = 0;
740
+ for (let i = 0; i < text.length; i++) {
741
+ const char = text.charCodeAt(i);
742
+ hash = ((hash << 5) - hash) + char;
743
+ hash = hash & hash;
744
+ }
745
+ return hash.toString(36);
746
+ }
747
+ }
748
+ //# sourceMappingURL=Neo4jGraphRAGEngine.js.map