@framers/agentos 0.1.32 → 0.1.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/dist/api/AgentOS.d.ts +62 -1
- package/dist/api/AgentOS.d.ts.map +1 -1
- package/dist/api/AgentOS.js +177 -2
- package/dist/api/AgentOS.js.map +1 -1
- package/dist/api/AgentOSOrchestrator.d.ts +187 -0
- package/dist/api/AgentOSOrchestrator.d.ts.map +1 -1
- package/dist/api/AgentOSOrchestrator.js +709 -16
- package/dist/api/AgentOSOrchestrator.js.map +1 -1
- package/dist/cognitive_substrate/GMI.d.ts.map +1 -1
- package/dist/cognitive_substrate/GMI.js +36 -1
- package/dist/cognitive_substrate/GMI.js.map +1 -1
- package/dist/cognitive_substrate/IGMI.d.ts +21 -0
- package/dist/cognitive_substrate/IGMI.d.ts.map +1 -1
- package/dist/cognitive_substrate/IGMI.js.map +1 -1
- package/dist/config/AgentOSConfig.d.ts.map +1 -1
- package/dist/config/AgentOSConfig.js +17 -0
- package/dist/config/AgentOSConfig.js.map +1 -1
- package/dist/config/VectorStoreConfiguration.d.ts +2 -1
- package/dist/config/VectorStoreConfiguration.d.ts.map +1 -1
- package/dist/config/VectorStoreConfiguration.js.map +1 -1
- package/dist/core/knowledge/Neo4jKnowledgeGraph.d.ts +89 -0
- package/dist/core/knowledge/Neo4jKnowledgeGraph.d.ts.map +1 -0
- package/dist/core/knowledge/Neo4jKnowledgeGraph.js +683 -0
- package/dist/core/knowledge/Neo4jKnowledgeGraph.js.map +1 -0
- package/dist/core/llm/providers/implementations/OllamaProvider.d.ts +14 -1
- package/dist/core/llm/providers/implementations/OllamaProvider.d.ts.map +1 -1
- package/dist/core/llm/providers/implementations/OllamaProvider.js +142 -37
- package/dist/core/llm/providers/implementations/OllamaProvider.js.map +1 -1
- package/dist/core/llm/providers/implementations/OpenAIProvider.js +3 -3
- package/dist/core/llm/providers/implementations/OpenAIProvider.js.map +1 -1
- package/dist/core/observability/otel.d.ts +2 -0
- package/dist/core/observability/otel.d.ts.map +1 -1
- package/dist/core/observability/otel.js +14 -0
- package/dist/core/observability/otel.js.map +1 -1
- package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.d.ts +30 -0
- package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.d.ts.map +1 -0
- package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.js +123 -0
- package/dist/core/orchestration/SqlTaskOutcomeTelemetryStore.js.map +1 -0
- package/dist/core/orchestration/TurnPlanner.d.ts +89 -0
- package/dist/core/orchestration/TurnPlanner.d.ts.map +1 -0
- package/dist/core/orchestration/TurnPlanner.js +242 -0
- package/dist/core/orchestration/TurnPlanner.js.map +1 -0
- package/dist/discovery/CapabilityDiscoveryEngine.js +4 -4
- package/dist/discovery/CapabilityDiscoveryEngine.js.map +1 -1
- package/dist/discovery/CapabilityGraph.d.ts +2 -2
- package/dist/discovery/CapabilityGraph.d.ts.map +1 -1
- package/dist/discovery/CapabilityGraph.js +46 -17
- package/dist/discovery/CapabilityGraph.js.map +1 -1
- package/dist/discovery/Neo4jCapabilityGraph.d.ts +58 -0
- package/dist/discovery/Neo4jCapabilityGraph.d.ts.map +1 -0
- package/dist/discovery/Neo4jCapabilityGraph.js +226 -0
- package/dist/discovery/Neo4jCapabilityGraph.js.map +1 -0
- package/dist/discovery/index.d.ts +1 -0
- package/dist/discovery/index.d.ts.map +1 -1
- package/dist/discovery/index.js +1 -0
- package/dist/discovery/index.js.map +1 -1
- package/dist/discovery/types.d.ts +1 -1
- package/dist/discovery/types.d.ts.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/neo4j/Neo4jConnectionManager.d.ts +59 -0
- package/dist/neo4j/Neo4jConnectionManager.d.ts.map +1 -0
- package/dist/neo4j/Neo4jConnectionManager.js +115 -0
- package/dist/neo4j/Neo4jConnectionManager.js.map +1 -0
- package/dist/neo4j/Neo4jCypherRunner.d.ts +39 -0
- package/dist/neo4j/Neo4jCypherRunner.d.ts.map +1 -0
- package/dist/neo4j/Neo4jCypherRunner.js +74 -0
- package/dist/neo4j/Neo4jCypherRunner.js.map +1 -0
- package/dist/neo4j/index.d.ts +12 -0
- package/dist/neo4j/index.d.ts.map +1 -0
- package/dist/neo4j/index.js +11 -0
- package/dist/neo4j/index.js.map +1 -0
- package/dist/neo4j/types.d.ts +27 -0
- package/dist/neo4j/types.d.ts.map +1 -0
- package/dist/neo4j/types.js +6 -0
- package/dist/neo4j/types.js.map +1 -0
- package/dist/rag/VectorStoreManager.d.ts.map +1 -1
- package/dist/rag/VectorStoreManager.js +6 -7
- package/dist/rag/VectorStoreManager.js.map +1 -1
- package/dist/rag/graphrag/GraphRAGEngine.d.ts.map +1 -1
- package/dist/rag/graphrag/GraphRAGEngine.js +42 -10
- package/dist/rag/graphrag/GraphRAGEngine.js.map +1 -1
- package/dist/rag/graphrag/Neo4jGraphRAGEngine.d.ts +95 -0
- package/dist/rag/graphrag/Neo4jGraphRAGEngine.d.ts.map +1 -0
- package/dist/rag/graphrag/Neo4jGraphRAGEngine.js +748 -0
- package/dist/rag/graphrag/Neo4jGraphRAGEngine.js.map +1 -0
- package/dist/rag/graphrag/index.d.ts +1 -0
- package/dist/rag/graphrag/index.d.ts.map +1 -1
- package/dist/rag/graphrag/index.js +1 -0
- package/dist/rag/graphrag/index.js.map +1 -1
- package/dist/rag/implementations/vector_stores/Neo4jVectorStore.d.ts +55 -0
- package/dist/rag/implementations/vector_stores/Neo4jVectorStore.d.ts.map +1 -0
- package/dist/rag/implementations/vector_stores/Neo4jVectorStore.js +369 -0
- package/dist/rag/implementations/vector_stores/Neo4jVectorStore.js.map +1 -0
- package/dist/rag/implementations/vector_stores/index.d.ts +1 -0
- package/dist/rag/implementations/vector_stores/index.d.ts.map +1 -1
- package/dist/rag/implementations/vector_stores/index.js +2 -0
- package/dist/rag/implementations/vector_stores/index.js.map +1 -1
- package/package.json +5 -1
|
@@ -0,0 +1,748 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Neo4j-backed GraphRAG engine for AgentOS.
|
|
3
|
+
*
|
|
4
|
+
* Implements `IGraphRAGEngine` using Neo4j for persistent entity/relationship/community
|
|
5
|
+
* storage, native HNSW vector indexes for entity/community semantic search, and
|
|
6
|
+
* GDS Louvain for community detection (with client-side graphology fallback).
|
|
7
|
+
*
|
|
8
|
+
* The entity extraction pipeline (LLM or pattern-based) is delegated to the caller
|
|
9
|
+
* or reused from the existing GraphRAGEngine's extraction utilities.
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Persistent graph storage in Neo4j
|
|
13
|
+
* - Native vector indexes on entity/community embeddings
|
|
14
|
+
* - GDS Louvain community detection (falls back to graphology if GDS unavailable)
|
|
15
|
+
* - Document contribution tracking for safe re-ingestion
|
|
16
|
+
* - Global search (community summaries) and local search (entity + 1-hop expansion)
|
|
17
|
+
* - Shared Neo4jConnectionManager for connection pooling
|
|
18
|
+
*
|
|
19
|
+
* @module @framers/agentos/rag/graphrag/Neo4jGraphRAGEngine
|
|
20
|
+
* @see ./IGraphRAG.ts for the interface definition.
|
|
21
|
+
*/
|
|
22
|
+
import { Neo4jCypherRunner } from '../../neo4j/Neo4jCypherRunner.js';
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// Constants
|
|
25
|
+
// ============================================================================
|
|
26
|
+
const ENTITY_LABEL = 'GraphRAGEntity';
|
|
27
|
+
const COMMUNITY_LABEL = 'GraphRAGCommunity';
|
|
28
|
+
const DOC_LABEL = 'GraphRAGDocument';
|
|
29
|
+
const REL_TYPE = 'GRAPHRAG_REL';
|
|
30
|
+
const MEMBER_OF = 'MEMBER_OF';
|
|
31
|
+
const ENTITY_VEC_INDEX = 'graphrag_entity_embeddings';
|
|
32
|
+
const COMMUNITY_VEC_INDEX = 'graphrag_community_embeddings';
|
|
33
|
+
const DEFAULT_EMBEDDING_DIM = 1536;
|
|
34
|
+
function generateId() {
|
|
35
|
+
return `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
|
36
|
+
}
|
|
37
|
+
function nowIso() {
|
|
38
|
+
return new Date().toISOString();
|
|
39
|
+
}
|
|
40
|
+
// ============================================================================
|
|
41
|
+
// Implementation
|
|
42
|
+
// ============================================================================
|
|
43
|
+
export class Neo4jGraphRAGEngine {
|
|
44
|
+
constructor(deps) {
|
|
45
|
+
this.deps = deps;
|
|
46
|
+
this._isInitialized = false;
|
|
47
|
+
}
|
|
48
|
+
async initialize(config) {
|
|
49
|
+
this.config = config;
|
|
50
|
+
this.cypher = new Neo4jCypherRunner(this.deps.connectionManager);
|
|
51
|
+
this.embeddingDimension = config.embeddingDimension ?? DEFAULT_EMBEDDING_DIM;
|
|
52
|
+
// Create constraints
|
|
53
|
+
await this.cypher.writeVoid(`CREATE CONSTRAINT graphrag_entity_unique IF NOT EXISTS
|
|
54
|
+
FOR (n:${ENTITY_LABEL}) REQUIRE n.entityId IS UNIQUE`);
|
|
55
|
+
await this.cypher.writeVoid(`CREATE CONSTRAINT graphrag_community_unique IF NOT EXISTS
|
|
56
|
+
FOR (n:${COMMUNITY_LABEL}) REQUIRE n.communityId IS UNIQUE`);
|
|
57
|
+
await this.cypher.writeVoid(`CREATE CONSTRAINT graphrag_doc_unique IF NOT EXISTS
|
|
58
|
+
FOR (n:${DOC_LABEL}) REQUIRE n.documentId IS UNIQUE`);
|
|
59
|
+
// Create vector indexes
|
|
60
|
+
await this.cypher.writeVoid(`CREATE VECTOR INDEX ${ENTITY_VEC_INDEX} IF NOT EXISTS
|
|
61
|
+
FOR (n:${ENTITY_LABEL}) ON (n.embedding)
|
|
62
|
+
OPTIONS { indexConfig: {
|
|
63
|
+
\`vector.dimensions\`: toInteger($dim),
|
|
64
|
+
\`vector.similarity_function\`: 'cosine'
|
|
65
|
+
}}`, { dim: this.embeddingDimension });
|
|
66
|
+
await this.cypher.writeVoid(`CREATE VECTOR INDEX ${COMMUNITY_VEC_INDEX} IF NOT EXISTS
|
|
67
|
+
FOR (n:${COMMUNITY_LABEL}) ON (n.summaryEmbedding)
|
|
68
|
+
OPTIONS { indexConfig: {
|
|
69
|
+
\`vector.dimensions\`: toInteger($dim),
|
|
70
|
+
\`vector.similarity_function\`: 'cosine'
|
|
71
|
+
}}`, { dim: this.embeddingDimension });
|
|
72
|
+
this._isInitialized = true;
|
|
73
|
+
}
|
|
74
|
+
async ingestDocuments(documents) {
|
|
75
|
+
let totalEntities = 0;
|
|
76
|
+
let totalRelationships = 0;
|
|
77
|
+
for (const doc of documents) {
|
|
78
|
+
// Track document
|
|
79
|
+
await this.cypher.writeVoid(`MERGE (d:${DOC_LABEL} { documentId: $docId })
|
|
80
|
+
SET d.ingestedAt = $now, d.contentHash = $hash`, { docId: doc.id, now: nowIso(), hash: this.simpleHash(doc.content) });
|
|
81
|
+
// Extract entities and relationships (LLM-based or pattern-based)
|
|
82
|
+
const extraction = await this.extractEntitiesAndRelationships(doc.content, doc.id);
|
|
83
|
+
// Merge entities into graph
|
|
84
|
+
for (const entity of extraction.entities) {
|
|
85
|
+
await this.cypher.writeVoid(`MERGE (e:${ENTITY_LABEL} { normalizedName: toLower(trim($name)) })
|
|
86
|
+
ON CREATE SET
|
|
87
|
+
e.entityId = $id,
|
|
88
|
+
e.name = $name,
|
|
89
|
+
e.type = $type,
|
|
90
|
+
e.description = $description,
|
|
91
|
+
e.properties_json = $props_json,
|
|
92
|
+
e.frequency = $frequency,
|
|
93
|
+
e.sourceDocumentIds = [$docId],
|
|
94
|
+
e.createdAt = $now,
|
|
95
|
+
e.updatedAt = $now
|
|
96
|
+
ON MATCH SET
|
|
97
|
+
e.frequency = e.frequency + $frequency,
|
|
98
|
+
e.updatedAt = $now,
|
|
99
|
+
e.sourceDocumentIds = CASE
|
|
100
|
+
WHEN NOT $docId IN e.sourceDocumentIds
|
|
101
|
+
THEN e.sourceDocumentIds + $docId
|
|
102
|
+
ELSE e.sourceDocumentIds
|
|
103
|
+
END,
|
|
104
|
+
e.type = CASE WHEN e.type = 'concept' AND $type <> 'concept' THEN $type ELSE e.type END,
|
|
105
|
+
e.description = CASE WHEN size($description) > size(e.description) THEN $description ELSE e.description END`, {
|
|
106
|
+
id: entity.id,
|
|
107
|
+
name: entity.name,
|
|
108
|
+
type: entity.type,
|
|
109
|
+
description: entity.description,
|
|
110
|
+
props_json: JSON.stringify(entity.properties),
|
|
111
|
+
frequency: entity.frequency,
|
|
112
|
+
docId: doc.id,
|
|
113
|
+
now: nowIso(),
|
|
114
|
+
});
|
|
115
|
+
totalEntities++;
|
|
116
|
+
}
|
|
117
|
+
// Merge relationships
|
|
118
|
+
for (const rel of extraction.relationships) {
|
|
119
|
+
await this.cypher.writeVoid(`MATCH (src:${ENTITY_LABEL} { entityId: $sourceId })
|
|
120
|
+
MATCH (tgt:${ENTITY_LABEL} { entityId: $targetId })
|
|
121
|
+
MERGE (src)-[r:${REL_TYPE} { relType: $type }]->(tgt)
|
|
122
|
+
ON CREATE SET
|
|
123
|
+
r.relId = $id,
|
|
124
|
+
r.description = $description,
|
|
125
|
+
r.weight = $weight,
|
|
126
|
+
r.sourceDocumentIds = [$docId],
|
|
127
|
+
r.createdAt = $now
|
|
128
|
+
ON MATCH SET
|
|
129
|
+
r.weight = r.weight + $weight,
|
|
130
|
+
r.sourceDocumentIds = CASE
|
|
131
|
+
WHEN NOT $docId IN r.sourceDocumentIds
|
|
132
|
+
THEN r.sourceDocumentIds + $docId
|
|
133
|
+
ELSE r.sourceDocumentIds
|
|
134
|
+
END`, {
|
|
135
|
+
id: rel.id,
|
|
136
|
+
sourceId: rel.sourceEntityId,
|
|
137
|
+
targetId: rel.targetEntityId,
|
|
138
|
+
type: rel.type,
|
|
139
|
+
description: rel.description,
|
|
140
|
+
weight: rel.weight,
|
|
141
|
+
docId: doc.id,
|
|
142
|
+
now: nowIso(),
|
|
143
|
+
});
|
|
144
|
+
totalRelationships++;
|
|
145
|
+
}
|
|
146
|
+
// Generate entity embeddings if configured
|
|
147
|
+
if (this.config.generateEntityEmbeddings !== false && this.deps.embeddingManager) {
|
|
148
|
+
await this.generateEntityEmbeddings(extraction.entities);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// Detect communities
|
|
152
|
+
const communitiesDetected = await this.detectCommunities();
|
|
153
|
+
return {
|
|
154
|
+
entitiesExtracted: totalEntities,
|
|
155
|
+
relationshipsExtracted: totalRelationships,
|
|
156
|
+
communitiesDetected,
|
|
157
|
+
documentsProcessed: documents.length,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
async removeDocuments(documentIds) {
|
|
161
|
+
let removed = 0;
|
|
162
|
+
for (const docId of documentIds) {
|
|
163
|
+
// Remove document contributions from entities
|
|
164
|
+
await this.cypher.writeVoid(`MATCH (e:${ENTITY_LABEL})
|
|
165
|
+
WHERE $docId IN e.sourceDocumentIds
|
|
166
|
+
SET e.sourceDocumentIds = [x IN e.sourceDocumentIds WHERE x <> $docId]
|
|
167
|
+
WITH e WHERE size(e.sourceDocumentIds) = 0
|
|
168
|
+
DETACH DELETE e`, { docId });
|
|
169
|
+
// Remove document contributions from relationships
|
|
170
|
+
await this.cypher.writeVoid(`MATCH ()-[r:${REL_TYPE}]->()
|
|
171
|
+
WHERE $docId IN r.sourceDocumentIds
|
|
172
|
+
SET r.sourceDocumentIds = [x IN r.sourceDocumentIds WHERE x <> $docId]
|
|
173
|
+
WITH r WHERE size(r.sourceDocumentIds) = 0
|
|
174
|
+
DELETE r`, { docId });
|
|
175
|
+
// Remove document node
|
|
176
|
+
await this.cypher.writeVoid(`MATCH (d:${DOC_LABEL} { documentId: $docId }) DELETE d`, { docId });
|
|
177
|
+
removed++;
|
|
178
|
+
}
|
|
179
|
+
const communitiesDetected = await this.detectCommunities();
|
|
180
|
+
return { documentsRemoved: removed, communitiesDetected };
|
|
181
|
+
}
|
|
182
|
+
async globalSearch(query, options) {
|
|
183
|
+
const topK = options?.topK ?? 10;
|
|
184
|
+
const startTime = Date.now();
|
|
185
|
+
let communitySummaries = [];
|
|
186
|
+
// Try vector search on community summaries
|
|
187
|
+
if (this.deps.embeddingManager) {
|
|
188
|
+
const embStart = Date.now();
|
|
189
|
+
const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({ texts: query });
|
|
190
|
+
const embTime = Date.now() - embStart;
|
|
191
|
+
const searchStart = Date.now();
|
|
192
|
+
const results = await this.cypher.read(`CALL db.index.vector.queryNodes($idx, $topK, $queryVec)
|
|
193
|
+
YIELD node, score
|
|
194
|
+
RETURN node.communityId AS communityId,
|
|
195
|
+
node.level AS level,
|
|
196
|
+
node.title AS title,
|
|
197
|
+
node.summary AS summary,
|
|
198
|
+
score
|
|
199
|
+
ORDER BY score DESC`, { idx: COMMUNITY_VEC_INDEX, topK, queryVec: embeddings[0] });
|
|
200
|
+
const searchTime = Date.now() - searchStart;
|
|
201
|
+
communitySummaries = results.map((r) => ({
|
|
202
|
+
communityId: r.communityId,
|
|
203
|
+
level: Number(r.level),
|
|
204
|
+
title: r.title,
|
|
205
|
+
summary: r.summary,
|
|
206
|
+
relevanceScore: r.score,
|
|
207
|
+
}));
|
|
208
|
+
// Synthesize answer from community summaries
|
|
209
|
+
let answer = communitySummaries.map((c) => c.summary).join('\n\n');
|
|
210
|
+
if (this.deps.llmProvider && communitySummaries.length > 0) {
|
|
211
|
+
const prompt = `Based on the following community summaries from a knowledge graph, answer this question: "${query}"\n\n${communitySummaries.map((c) => `## ${c.title}\n${c.summary}`).join('\n\n')}`;
|
|
212
|
+
try {
|
|
213
|
+
answer = await this.deps.llmProvider.generateText(prompt, { maxTokens: this.config.maxSummaryTokens ?? 500 });
|
|
214
|
+
}
|
|
215
|
+
catch { /* Use concatenated summaries as fallback */ }
|
|
216
|
+
}
|
|
217
|
+
const totalCommunities = await this.cypher.read(`MATCH (c:${COMMUNITY_LABEL}) RETURN count(c) AS count`);
|
|
218
|
+
return {
|
|
219
|
+
query,
|
|
220
|
+
answer,
|
|
221
|
+
communitySummaries,
|
|
222
|
+
totalCommunitiesSearched: Number(totalCommunities[0]?.count ?? 0),
|
|
223
|
+
diagnostics: {
|
|
224
|
+
embeddingTimeMs: embTime,
|
|
225
|
+
searchTimeMs: searchTime,
|
|
226
|
+
synthesisTimeMs: Date.now() - startTime - embTime - searchTime,
|
|
227
|
+
},
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
// Fallback: text-based community search
|
|
231
|
+
const results = await this.cypher.read(`MATCH (c:${COMMUNITY_LABEL})
|
|
232
|
+
WHERE c.summary CONTAINS $query OR c.title CONTAINS $query
|
|
233
|
+
RETURN c.communityId AS communityId, c.level AS level,
|
|
234
|
+
c.title AS title, c.summary AS summary
|
|
235
|
+
LIMIT $topK`, { query, topK });
|
|
236
|
+
return {
|
|
237
|
+
query,
|
|
238
|
+
answer: results.map((r) => r.summary).join('\n\n'),
|
|
239
|
+
communitySummaries: results.map((r) => ({
|
|
240
|
+
communityId: r.communityId,
|
|
241
|
+
level: Number(r.level),
|
|
242
|
+
title: r.title,
|
|
243
|
+
summary: r.summary,
|
|
244
|
+
relevanceScore: 0.5,
|
|
245
|
+
})),
|
|
246
|
+
totalCommunitiesSearched: results.length,
|
|
247
|
+
diagnostics: { searchTimeMs: Date.now() - startTime },
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
async localSearch(query, options) {
|
|
251
|
+
const topK = options?.topK ?? 10;
|
|
252
|
+
const startTime = Date.now();
|
|
253
|
+
let matchedEntities = [];
|
|
254
|
+
let relationships = [];
|
|
255
|
+
let communityContext = [];
|
|
256
|
+
if (this.deps.embeddingManager) {
|
|
257
|
+
const embStart = Date.now();
|
|
258
|
+
const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({ texts: query });
|
|
259
|
+
const embTime = Date.now() - embStart;
|
|
260
|
+
// Entity vector search
|
|
261
|
+
const searchStart = Date.now();
|
|
262
|
+
const entityResults = await this.cypher.read(`CALL db.index.vector.queryNodes($idx, $topK, $queryVec)
|
|
263
|
+
YIELD node, score
|
|
264
|
+
RETURN node.entityId AS entityId,
|
|
265
|
+
node.name AS name,
|
|
266
|
+
node.type AS type,
|
|
267
|
+
node.description AS description,
|
|
268
|
+
node.properties_json AS properties_json,
|
|
269
|
+
node.frequency AS frequency,
|
|
270
|
+
node.sourceDocumentIds AS sourceDocumentIds,
|
|
271
|
+
node.createdAt AS createdAt,
|
|
272
|
+
node.updatedAt AS updatedAt,
|
|
273
|
+
score
|
|
274
|
+
ORDER BY score DESC`, { idx: ENTITY_VEC_INDEX, topK, queryVec: embeddings[0] });
|
|
275
|
+
const searchTime = Date.now() - searchStart;
|
|
276
|
+
matchedEntities = entityResults.map((r) => ({
|
|
277
|
+
id: r.entityId,
|
|
278
|
+
name: r.name,
|
|
279
|
+
type: r.type,
|
|
280
|
+
description: r.description,
|
|
281
|
+
properties: this.safeParseJson(r.properties_json, {}),
|
|
282
|
+
sourceDocumentIds: r.sourceDocumentIds ?? [],
|
|
283
|
+
frequency: Number(r.frequency ?? 0),
|
|
284
|
+
createdAt: r.createdAt ?? '',
|
|
285
|
+
updatedAt: r.updatedAt ?? '',
|
|
286
|
+
relevanceScore: r.score,
|
|
287
|
+
}));
|
|
288
|
+
// 1-hop expansion for matched entities
|
|
289
|
+
const graphStart = Date.now();
|
|
290
|
+
const entityIds = matchedEntities.map((e) => e.id);
|
|
291
|
+
if (entityIds.length > 0) {
|
|
292
|
+
const neighborResults = await this.cypher.read(`UNWIND $entityIds AS eid
|
|
293
|
+
MATCH (e:${ENTITY_LABEL} { entityId: eid })-[r:${REL_TYPE}]-(neighbor:${ENTITY_LABEL})
|
|
294
|
+
RETURN r.relId AS relId,
|
|
295
|
+
CASE WHEN startNode(r) = e THEN e.entityId ELSE neighbor.entityId END AS sourceId,
|
|
296
|
+
CASE WHEN endNode(r) = e THEN e.entityId ELSE neighbor.entityId END AS targetId,
|
|
297
|
+
r.relType AS relType,
|
|
298
|
+
r.description AS description,
|
|
299
|
+
r.weight AS weight,
|
|
300
|
+
r.sourceDocumentIds AS sourceDocumentIds,
|
|
301
|
+
r.createdAt AS createdAt`, { entityIds });
|
|
302
|
+
relationships = neighborResults.map((r) => ({
|
|
303
|
+
id: r.relId ?? '',
|
|
304
|
+
sourceEntityId: r.sourceId,
|
|
305
|
+
targetEntityId: r.targetId,
|
|
306
|
+
type: r.relType,
|
|
307
|
+
description: r.description ?? '',
|
|
308
|
+
weight: Number(r.weight ?? 0),
|
|
309
|
+
properties: {},
|
|
310
|
+
sourceDocumentIds: r.sourceDocumentIds ?? [],
|
|
311
|
+
createdAt: r.createdAt ?? '',
|
|
312
|
+
}));
|
|
313
|
+
}
|
|
314
|
+
const graphTime = Date.now() - graphStart;
|
|
315
|
+
// Get community context for matched entities
|
|
316
|
+
if (entityIds.length > 0) {
|
|
317
|
+
const comResults = await this.cypher.read(`UNWIND $entityIds AS eid
|
|
318
|
+
MATCH (e:${ENTITY_LABEL} { entityId: eid })-[:${MEMBER_OF}]->(c:${COMMUNITY_LABEL})
|
|
319
|
+
RETURN DISTINCT c.communityId AS communityId,
|
|
320
|
+
c.title AS title,
|
|
321
|
+
c.summary AS summary,
|
|
322
|
+
c.level AS level`, { entityIds });
|
|
323
|
+
communityContext = comResults.map((r) => ({
|
|
324
|
+
communityId: r.communityId,
|
|
325
|
+
title: r.title,
|
|
326
|
+
summary: r.summary,
|
|
327
|
+
level: Number(r.level),
|
|
328
|
+
}));
|
|
329
|
+
}
|
|
330
|
+
// Assemble augmented context
|
|
331
|
+
const contextParts = [];
|
|
332
|
+
for (const e of matchedEntities.slice(0, 5)) {
|
|
333
|
+
contextParts.push(`[${e.type}] ${e.name}: ${e.description}`);
|
|
334
|
+
}
|
|
335
|
+
for (const r of relationships.slice(0, 10)) {
|
|
336
|
+
contextParts.push(`${r.sourceEntityId} --[${r.type}]--> ${r.targetEntityId}: ${r.description}`);
|
|
337
|
+
}
|
|
338
|
+
for (const c of communityContext.slice(0, 3)) {
|
|
339
|
+
contextParts.push(`Community "${c.title}": ${c.summary}`);
|
|
340
|
+
}
|
|
341
|
+
return {
|
|
342
|
+
query,
|
|
343
|
+
entities: matchedEntities,
|
|
344
|
+
relationships,
|
|
345
|
+
communityContext,
|
|
346
|
+
augmentedContext: contextParts.join('\n'),
|
|
347
|
+
diagnostics: {
|
|
348
|
+
embeddingTimeMs: embTime,
|
|
349
|
+
searchTimeMs: searchTime,
|
|
350
|
+
graphTraversalTimeMs: graphTime,
|
|
351
|
+
},
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
// Fallback: text-based search
|
|
355
|
+
const textResults = await this.cypher.read(`MATCH (e:${ENTITY_LABEL})
|
|
356
|
+
WHERE e.name CONTAINS $query OR e.description CONTAINS $query
|
|
357
|
+
RETURN e LIMIT $topK`, { query, topK });
|
|
358
|
+
matchedEntities = textResults.map((r) => {
|
|
359
|
+
const props = r.e?.properties ?? r.e ?? {};
|
|
360
|
+
return {
|
|
361
|
+
id: props.entityId ?? '',
|
|
362
|
+
name: props.name ?? '',
|
|
363
|
+
type: props.type ?? '',
|
|
364
|
+
description: props.description ?? '',
|
|
365
|
+
properties: this.safeParseJson(props.properties_json, {}),
|
|
366
|
+
sourceDocumentIds: props.sourceDocumentIds ?? [],
|
|
367
|
+
frequency: Number(props.frequency ?? 0),
|
|
368
|
+
createdAt: props.createdAt ?? '',
|
|
369
|
+
updatedAt: props.updatedAt ?? '',
|
|
370
|
+
relevanceScore: 0.5,
|
|
371
|
+
};
|
|
372
|
+
});
|
|
373
|
+
return {
|
|
374
|
+
query,
|
|
375
|
+
entities: matchedEntities,
|
|
376
|
+
relationships: [],
|
|
377
|
+
communityContext: [],
|
|
378
|
+
augmentedContext: matchedEntities.map((e) => `[${e.type}] ${e.name}: ${e.description}`).join('\n'),
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
async getEntities(options) {
|
|
382
|
+
const conditions = [];
|
|
383
|
+
const params = { limit: options?.limit ?? 100 };
|
|
384
|
+
if (options?.type) {
|
|
385
|
+
conditions.push('e.type = $type');
|
|
386
|
+
params.type = options.type;
|
|
387
|
+
}
|
|
388
|
+
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
389
|
+
const results = await this.cypher.read(`MATCH (e:${ENTITY_LABEL}) ${where} RETURN e LIMIT $limit`, params);
|
|
390
|
+
return results.map((r) => this.nodeToGraphEntity(r.e));
|
|
391
|
+
}
|
|
392
|
+
async getRelationships(entityId) {
|
|
393
|
+
const results = await this.cypher.read(`MATCH (e:${ENTITY_LABEL} { entityId: $entityId })-[r:${REL_TYPE}]-(other:${ENTITY_LABEL})
|
|
394
|
+
RETURN r.relId AS relId, r.relType AS relType, r.description AS description,
|
|
395
|
+
r.weight AS weight, r.sourceDocumentIds AS sourceDocumentIds, r.createdAt AS createdAt,
|
|
396
|
+
CASE WHEN startNode(r) = e THEN e.entityId ELSE other.entityId END AS sourceId,
|
|
397
|
+
CASE WHEN endNode(r) = e THEN e.entityId ELSE other.entityId END AS targetId`, { entityId });
|
|
398
|
+
return results.map((r) => ({
|
|
399
|
+
id: r.relId ?? '',
|
|
400
|
+
sourceEntityId: r.sourceId,
|
|
401
|
+
targetEntityId: r.targetId,
|
|
402
|
+
type: r.relType ?? '',
|
|
403
|
+
description: r.description ?? '',
|
|
404
|
+
weight: Number(r.weight ?? 0),
|
|
405
|
+
properties: {},
|
|
406
|
+
sourceDocumentIds: r.sourceDocumentIds ?? [],
|
|
407
|
+
createdAt: r.createdAt ?? '',
|
|
408
|
+
}));
|
|
409
|
+
}
|
|
410
|
+
async getCommunities(level) {
|
|
411
|
+
const conditions = [];
|
|
412
|
+
const params = {};
|
|
413
|
+
if (level !== undefined) {
|
|
414
|
+
conditions.push('c.level = $level');
|
|
415
|
+
params.level = level;
|
|
416
|
+
}
|
|
417
|
+
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
418
|
+
const results = await this.cypher.read(`MATCH (c:${COMMUNITY_LABEL}) ${where}
|
|
419
|
+
RETURN c ORDER BY c.importance DESC`, params);
|
|
420
|
+
return results.map((r) => this.nodeToCommunity(r.c));
|
|
421
|
+
}
|
|
422
|
+
async getStats() {
|
|
423
|
+
const results = await this.cypher.read(`MATCH (e:${ENTITY_LABEL})
|
|
424
|
+
WITH count(e) AS entities
|
|
425
|
+
OPTIONAL MATCH ()-[r:${REL_TYPE}]->()
|
|
426
|
+
WITH entities, count(r) AS rels
|
|
427
|
+
OPTIONAL MATCH (c:${COMMUNITY_LABEL})
|
|
428
|
+
WITH entities, rels, count(c) AS communities,
|
|
429
|
+
CASE WHEN count(c) > 0 THEN max(c.level) + 1 ELSE 0 END AS levels
|
|
430
|
+
OPTIONAL MATCH (d:${DOC_LABEL})
|
|
431
|
+
RETURN entities, rels, communities, levels, count(d) AS docs`);
|
|
432
|
+
const row = results[0] ?? {};
|
|
433
|
+
return {
|
|
434
|
+
totalEntities: Number(row.entities ?? 0),
|
|
435
|
+
totalRelationships: Number(row.rels ?? 0),
|
|
436
|
+
totalCommunities: Number(row.communities ?? 0),
|
|
437
|
+
communityLevels: Number(row.levels ?? 0),
|
|
438
|
+
documentsIngested: Number(row.docs ?? 0),
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
async clear() {
|
|
442
|
+
await this.cypher.writeVoid(`MATCH (n:${ENTITY_LABEL}) DETACH DELETE n`);
|
|
443
|
+
await this.cypher.writeVoid(`MATCH (n:${COMMUNITY_LABEL}) DETACH DELETE n`);
|
|
444
|
+
await this.cypher.writeVoid(`MATCH (n:${DOC_LABEL}) DETACH DELETE n`);
|
|
445
|
+
}
|
|
446
|
+
async shutdown() {
|
|
447
|
+
// Connection manager is shared — don't close it here
|
|
448
|
+
this._isInitialized = false;
|
|
449
|
+
}
|
|
450
|
+
// ============ Private: Community Detection ============
|
|
451
|
+
async detectCommunities() {
|
|
452
|
+
// Clear existing communities
|
|
453
|
+
await this.cypher.writeVoid(`MATCH (c:${COMMUNITY_LABEL}) DETACH DELETE c`);
|
|
454
|
+
// Try GDS Louvain first
|
|
455
|
+
try {
|
|
456
|
+
return await this.detectCommunitiesGDS();
|
|
457
|
+
}
|
|
458
|
+
catch {
|
|
459
|
+
// GDS not available — fall back to client-side
|
|
460
|
+
return await this.detectCommunitiesClientSide();
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
async detectCommunitiesGDS() {
|
|
464
|
+
// Project the graph
|
|
465
|
+
await this.cypher.writeVoid(`CALL gds.graph.project('graphrag_projection', $nodeLabel, {
|
|
466
|
+
${REL_TYPE}: { properties: 'weight' }
|
|
467
|
+
})`, { nodeLabel: ENTITY_LABEL });
|
|
468
|
+
try {
|
|
469
|
+
// Run Louvain
|
|
470
|
+
const results = await this.cypher.read(`CALL gds.louvain.stream('graphrag_projection', {
|
|
471
|
+
relationshipWeightProperty: 'weight'
|
|
472
|
+
})
|
|
473
|
+
YIELD nodeId, communityId
|
|
474
|
+
WITH gds.util.asNode(nodeId) AS node, communityId
|
|
475
|
+
RETURN node.entityId AS entityId, communityId`);
|
|
476
|
+
return await this.storeCommunities(results);
|
|
477
|
+
}
|
|
478
|
+
finally {
|
|
479
|
+
// Clean up projection
|
|
480
|
+
try {
|
|
481
|
+
await this.cypher.writeVoid(`CALL gds.graph.drop('graphrag_projection')`);
|
|
482
|
+
}
|
|
483
|
+
catch { /* ignore if already dropped */ }
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
async detectCommunitiesClientSide() {
|
|
487
|
+
// Fetch all nodes and edges, run graphology Louvain client-side
|
|
488
|
+
try {
|
|
489
|
+
const Graph = (await import('graphology')).default;
|
|
490
|
+
const louvain = (await import('graphology-communities-louvain')).default;
|
|
491
|
+
const graph = new Graph({ multi: false, type: 'undirected' });
|
|
492
|
+
// Fetch all entities
|
|
493
|
+
const entities = await this.cypher.read(`MATCH (e:${ENTITY_LABEL}) RETURN e.entityId AS entityId`);
|
|
494
|
+
for (const e of entities) {
|
|
495
|
+
graph.addNode(e.entityId);
|
|
496
|
+
}
|
|
497
|
+
// Fetch all relationships
|
|
498
|
+
const rels = await this.cypher.read(`MATCH (s:${ENTITY_LABEL})-[r:${REL_TYPE}]->(t:${ENTITY_LABEL})
|
|
499
|
+
RETURN s.entityId AS src, t.entityId AS tgt, r.weight AS weight`);
|
|
500
|
+
for (const r of rels) {
|
|
501
|
+
if (graph.hasNode(r.src) && graph.hasNode(r.tgt) && !graph.hasEdge(r.src, r.tgt)) {
|
|
502
|
+
graph.addEdge(r.src, r.tgt, { weight: Number(r.weight ?? 1) });
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
if (graph.order === 0)
|
|
506
|
+
return 0;
|
|
507
|
+
// Run Louvain
|
|
508
|
+
const partition = louvain(graph, {
|
|
509
|
+
resolution: this.config.communityResolution ?? 1.0,
|
|
510
|
+
getEdgeWeight: 'weight',
|
|
511
|
+
});
|
|
512
|
+
// Convert to community assignment format
|
|
513
|
+
const assignments = Object.entries(partition).map(([entityId, communityId]) => ({
|
|
514
|
+
entityId,
|
|
515
|
+
communityId: communityId,
|
|
516
|
+
}));
|
|
517
|
+
return await this.storeCommunities(assignments);
|
|
518
|
+
}
|
|
519
|
+
catch {
|
|
520
|
+
// Neither GDS nor graphology available
|
|
521
|
+
return 0;
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
async storeCommunities(assignments) {
|
|
525
|
+
// Group by community
|
|
526
|
+
const communityMap = new Map();
|
|
527
|
+
for (const a of assignments) {
|
|
528
|
+
if (!communityMap.has(a.communityId)) {
|
|
529
|
+
communityMap.set(a.communityId, []);
|
|
530
|
+
}
|
|
531
|
+
communityMap.get(a.communityId).push(a.entityId);
|
|
532
|
+
}
|
|
533
|
+
const minSize = this.config.minCommunitySize ?? 2;
|
|
534
|
+
let stored = 0;
|
|
535
|
+
for (const [commId, entityIds] of communityMap) {
|
|
536
|
+
if (entityIds.length < minSize)
|
|
537
|
+
continue;
|
|
538
|
+
const communityId = `community-${commId}`;
|
|
539
|
+
// Get entity descriptions for summary
|
|
540
|
+
const entityDescs = await this.cypher.read(`MATCH (e:${ENTITY_LABEL}) WHERE e.entityId IN $ids
|
|
541
|
+
RETURN e.name AS name, e.description AS description`, { ids: entityIds });
|
|
542
|
+
// Generate summary
|
|
543
|
+
let summary = entityDescs.map((e) => `${e.name}: ${e.description}`).join('; ');
|
|
544
|
+
let title = entityDescs.slice(0, 3).map((e) => e.name).join(', ');
|
|
545
|
+
if (this.deps.llmProvider && entityDescs.length > 2) {
|
|
546
|
+
try {
|
|
547
|
+
const prompt = `Summarize this group of related entities in 2-3 sentences:\n${entityDescs.map((e) => `- ${e.name}: ${e.description}`).join('\n')}`;
|
|
548
|
+
summary = await this.deps.llmProvider.generateText(prompt, { maxTokens: 200 });
|
|
549
|
+
title = `Community: ${entityDescs.slice(0, 3).map((e) => e.name).join(', ')}`;
|
|
550
|
+
}
|
|
551
|
+
catch { /* use concatenated descriptions */ }
|
|
552
|
+
}
|
|
553
|
+
// Store community node
|
|
554
|
+
await this.cypher.writeVoid(`CREATE (c:${COMMUNITY_LABEL} {
|
|
555
|
+
communityId: $communityId,
|
|
556
|
+
level: 0,
|
|
557
|
+
title: $title,
|
|
558
|
+
summary: $summary,
|
|
559
|
+
entityIds: $entityIds,
|
|
560
|
+
importance: $importance,
|
|
561
|
+
parentCommunityId: null,
|
|
562
|
+
childCommunityIds: [],
|
|
563
|
+
relationshipIds: [],
|
|
564
|
+
findings: [],
|
|
565
|
+
createdAt: $now
|
|
566
|
+
})`, {
|
|
567
|
+
communityId,
|
|
568
|
+
title,
|
|
569
|
+
summary,
|
|
570
|
+
entityIds,
|
|
571
|
+
importance: entityIds.length / assignments.length,
|
|
572
|
+
now: nowIso(),
|
|
573
|
+
});
|
|
574
|
+
// Create MEMBER_OF edges
|
|
575
|
+
await this.cypher.writeVoid(`MATCH (c:${COMMUNITY_LABEL} { communityId: $communityId })
|
|
576
|
+
UNWIND $entityIds AS eid
|
|
577
|
+
MATCH (e:${ENTITY_LABEL} { entityId: eid })
|
|
578
|
+
MERGE (e)-[:${MEMBER_OF}]->(c)`, { communityId, entityIds });
|
|
579
|
+
// Generate community embedding for global search
|
|
580
|
+
if (this.deps.embeddingManager) {
|
|
581
|
+
try {
|
|
582
|
+
const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({
|
|
583
|
+
texts: `${title}: ${summary}`,
|
|
584
|
+
});
|
|
585
|
+
await this.cypher.writeVoid(`MATCH (c:${COMMUNITY_LABEL} { communityId: $communityId })
|
|
586
|
+
SET c.summaryEmbedding = $embedding`, { communityId, embedding: embeddings[0] });
|
|
587
|
+
}
|
|
588
|
+
catch { /* skip embedding if it fails */ }
|
|
589
|
+
}
|
|
590
|
+
stored++;
|
|
591
|
+
}
|
|
592
|
+
return stored;
|
|
593
|
+
}
|
|
594
|
+
// ============ Private: Entity Extraction ============
|
|
595
|
+
async extractEntitiesAndRelationships(content, documentId) {
|
|
596
|
+
if (this.deps.llmProvider) {
|
|
597
|
+
return this.extractViaLLM(content, documentId);
|
|
598
|
+
}
|
|
599
|
+
return this.extractViaPatterns(content, documentId);
|
|
600
|
+
}
|
|
601
|
+
async extractViaLLM(content, documentId) {
|
|
602
|
+
const entityTypes = this.config.entityTypes ?? ['person', 'organization', 'concept', 'location', 'event', 'technology'];
|
|
603
|
+
const prompt = `Extract entities and relationships from the following text.
|
|
604
|
+
Return JSON with this exact structure:
|
|
605
|
+
{"entities": [{"name": "...", "type": "...", "description": "..."}], "relationships": [{"source": "...", "target": "...", "type": "...", "description": "..."}]}
|
|
606
|
+
|
|
607
|
+
Entity types: ${entityTypes.join(', ')}
|
|
608
|
+
|
|
609
|
+
Text:
|
|
610
|
+
${content.slice(0, 4000)}`;
|
|
611
|
+
try {
|
|
612
|
+
const response = await this.deps.llmProvider.generateText(prompt, {
|
|
613
|
+
maxTokens: 2000,
|
|
614
|
+
temperature: 0,
|
|
615
|
+
});
|
|
616
|
+
const parsed = JSON.parse(response.replace(/```json?\n?/g, '').replace(/```/g, '').trim());
|
|
617
|
+
const now = nowIso();
|
|
618
|
+
const entities = (parsed.entities ?? []).map((e) => ({
|
|
619
|
+
id: generateId(),
|
|
620
|
+
name: e.name,
|
|
621
|
+
type: e.type ?? 'concept',
|
|
622
|
+
description: e.description ?? '',
|
|
623
|
+
properties: {},
|
|
624
|
+
sourceDocumentIds: [documentId],
|
|
625
|
+
frequency: 1,
|
|
626
|
+
createdAt: now,
|
|
627
|
+
updatedAt: now,
|
|
628
|
+
}));
|
|
629
|
+
const entityNameToId = new Map(entities.map((e) => [e.name.toLowerCase(), e.id]));
|
|
630
|
+
const relationships = (parsed.relationships ?? [])
|
|
631
|
+
.filter((r) => entityNameToId.has(r.source?.toLowerCase()) && entityNameToId.has(r.target?.toLowerCase()))
|
|
632
|
+
.map((r) => ({
|
|
633
|
+
id: generateId(),
|
|
634
|
+
sourceEntityId: entityNameToId.get(r.source.toLowerCase()),
|
|
635
|
+
targetEntityId: entityNameToId.get(r.target.toLowerCase()),
|
|
636
|
+
type: r.type ?? 'related_to',
|
|
637
|
+
description: r.description ?? '',
|
|
638
|
+
weight: 1,
|
|
639
|
+
properties: {},
|
|
640
|
+
sourceDocumentIds: [documentId],
|
|
641
|
+
createdAt: now,
|
|
642
|
+
}));
|
|
643
|
+
return { entities, relationships };
|
|
644
|
+
}
|
|
645
|
+
catch {
|
|
646
|
+
return this.extractViaPatterns(content, documentId);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
extractViaPatterns(content, documentId) {
|
|
650
|
+
// Simple NER-like extraction via capitalized phrases
|
|
651
|
+
const now = nowIso();
|
|
652
|
+
const entityMap = new Map();
|
|
653
|
+
const capitalizedPattern = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/g;
|
|
654
|
+
let match;
|
|
655
|
+
while ((match = capitalizedPattern.exec(content)) !== null) {
|
|
656
|
+
const name = match[1];
|
|
657
|
+
const normalized = name.toLowerCase();
|
|
658
|
+
if (!entityMap.has(normalized)) {
|
|
659
|
+
entityMap.set(normalized, {
|
|
660
|
+
id: generateId(),
|
|
661
|
+
name,
|
|
662
|
+
type: 'concept',
|
|
663
|
+
description: `Entity "${name}" extracted from text`,
|
|
664
|
+
properties: {},
|
|
665
|
+
sourceDocumentIds: [documentId],
|
|
666
|
+
frequency: 1,
|
|
667
|
+
createdAt: now,
|
|
668
|
+
updatedAt: now,
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
else {
|
|
672
|
+
entityMap.get(normalized).frequency++;
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
return { entities: Array.from(entityMap.values()), relationships: [] };
|
|
676
|
+
}
|
|
677
|
+
// ============ Private: Embedding Generation ============
|
|
678
|
+
async generateEntityEmbeddings(entities) {
|
|
679
|
+
if (!this.deps.embeddingManager || entities.length === 0)
|
|
680
|
+
return;
|
|
681
|
+
const texts = entities.map((e) => `${e.name} (${e.type}): ${e.description}`);
|
|
682
|
+
const batchSize = 32;
|
|
683
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
684
|
+
const batch = texts.slice(i, i + batchSize);
|
|
685
|
+
const batchEntities = entities.slice(i, i + batchSize);
|
|
686
|
+
try {
|
|
687
|
+
const { embeddings } = await this.deps.embeddingManager.generateEmbeddings({ texts: batch });
|
|
688
|
+
for (let j = 0; j < batchEntities.length; j++) {
|
|
689
|
+
await this.cypher.writeVoid(`MATCH (e:${ENTITY_LABEL} { entityId: $entityId })
|
|
690
|
+
SET e.embedding = $embedding`, { entityId: batchEntities[j].id, embedding: embeddings[j] });
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
catch { /* skip embedding batch on error */ }
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
// ============ Private Helpers ============
|
|
697
|
+
nodeToGraphEntity(node) {
|
|
698
|
+
const props = node?.properties ?? node ?? {};
|
|
699
|
+
return {
|
|
700
|
+
id: props.entityId ?? '',
|
|
701
|
+
name: props.name ?? '',
|
|
702
|
+
type: props.type ?? '',
|
|
703
|
+
description: props.description ?? '',
|
|
704
|
+
properties: this.safeParseJson(props.properties_json, {}),
|
|
705
|
+
embedding: props.embedding ?? undefined,
|
|
706
|
+
sourceDocumentIds: props.sourceDocumentIds ?? [],
|
|
707
|
+
frequency: Number(props.frequency ?? 0),
|
|
708
|
+
createdAt: props.createdAt ?? '',
|
|
709
|
+
updatedAt: props.updatedAt ?? '',
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
nodeToCommunity(node) {
|
|
713
|
+
const props = node?.properties ?? node ?? {};
|
|
714
|
+
return {
|
|
715
|
+
id: props.communityId ?? '',
|
|
716
|
+
level: Number(props.level ?? 0),
|
|
717
|
+
parentCommunityId: props.parentCommunityId ?? null,
|
|
718
|
+
childCommunityIds: props.childCommunityIds ?? [],
|
|
719
|
+
entityIds: props.entityIds ?? [],
|
|
720
|
+
relationshipIds: props.relationshipIds ?? [],
|
|
721
|
+
summary: props.summary ?? '',
|
|
722
|
+
findings: props.findings ?? [],
|
|
723
|
+
importance: Number(props.importance ?? 0),
|
|
724
|
+
title: props.title ?? '',
|
|
725
|
+
createdAt: props.createdAt ?? '',
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
safeParseJson(json, fallback) {
|
|
729
|
+
if (!json)
|
|
730
|
+
return fallback;
|
|
731
|
+
try {
|
|
732
|
+
return JSON.parse(json);
|
|
733
|
+
}
|
|
734
|
+
catch {
|
|
735
|
+
return fallback;
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
simpleHash(text) {
|
|
739
|
+
let hash = 0;
|
|
740
|
+
for (let i = 0; i < text.length; i++) {
|
|
741
|
+
const char = text.charCodeAt(i);
|
|
742
|
+
hash = ((hash << 5) - hash) + char;
|
|
743
|
+
hash = hash & hash;
|
|
744
|
+
}
|
|
745
|
+
return hash.toString(36);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
//# sourceMappingURL=Neo4jGraphRAGEngine.js.map
|