kailash 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +33 -1
- kailash/access_control/__init__.py +129 -0
- kailash/access_control/managers.py +461 -0
- kailash/access_control/rule_evaluators.py +467 -0
- kailash/access_control_abac.py +825 -0
- kailash/config/__init__.py +27 -0
- kailash/config/database_config.py +359 -0
- kailash/database/__init__.py +28 -0
- kailash/database/execution_pipeline.py +499 -0
- kailash/middleware/__init__.py +306 -0
- kailash/middleware/auth/__init__.py +33 -0
- kailash/middleware/auth/access_control.py +436 -0
- kailash/middleware/auth/auth_manager.py +422 -0
- kailash/middleware/auth/jwt_auth.py +477 -0
- kailash/middleware/auth/kailash_jwt_auth.py +616 -0
- kailash/middleware/communication/__init__.py +37 -0
- kailash/middleware/communication/ai_chat.py +989 -0
- kailash/middleware/communication/api_gateway.py +802 -0
- kailash/middleware/communication/events.py +470 -0
- kailash/middleware/communication/realtime.py +710 -0
- kailash/middleware/core/__init__.py +21 -0
- kailash/middleware/core/agent_ui.py +890 -0
- kailash/middleware/core/schema.py +643 -0
- kailash/middleware/core/workflows.py +396 -0
- kailash/middleware/database/__init__.py +63 -0
- kailash/middleware/database/base.py +113 -0
- kailash/middleware/database/base_models.py +525 -0
- kailash/middleware/database/enums.py +106 -0
- kailash/middleware/database/migrations.py +12 -0
- kailash/{api/database.py → middleware/database/models.py} +183 -291
- kailash/middleware/database/repositories.py +685 -0
- kailash/middleware/database/session_manager.py +19 -0
- kailash/middleware/mcp/__init__.py +38 -0
- kailash/middleware/mcp/client_integration.py +585 -0
- kailash/middleware/mcp/enhanced_server.py +576 -0
- kailash/nodes/__init__.py +27 -3
- kailash/nodes/admin/__init__.py +42 -0
- kailash/nodes/admin/audit_log.py +794 -0
- kailash/nodes/admin/permission_check.py +864 -0
- kailash/nodes/admin/role_management.py +823 -0
- kailash/nodes/admin/security_event.py +1523 -0
- kailash/nodes/admin/user_management.py +944 -0
- kailash/nodes/ai/a2a.py +24 -7
- kailash/nodes/ai/ai_providers.py +248 -40
- kailash/nodes/ai/embedding_generator.py +11 -11
- kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
- kailash/nodes/ai/llm_agent.py +436 -5
- kailash/nodes/ai/self_organizing.py +85 -10
- kailash/nodes/ai/vision_utils.py +148 -0
- kailash/nodes/alerts/__init__.py +26 -0
- kailash/nodes/alerts/base.py +234 -0
- kailash/nodes/alerts/discord.py +499 -0
- kailash/nodes/api/auth.py +287 -6
- kailash/nodes/api/rest.py +151 -0
- kailash/nodes/auth/__init__.py +17 -0
- kailash/nodes/auth/directory_integration.py +1228 -0
- kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
- kailash/nodes/auth/mfa.py +2338 -0
- kailash/nodes/auth/risk_assessment.py +872 -0
- kailash/nodes/auth/session_management.py +1093 -0
- kailash/nodes/auth/sso.py +1040 -0
- kailash/nodes/base.py +344 -13
- kailash/nodes/base_cycle_aware.py +4 -2
- kailash/nodes/base_with_acl.py +1 -1
- kailash/nodes/code/python.py +283 -10
- kailash/nodes/compliance/__init__.py +9 -0
- kailash/nodes/compliance/data_retention.py +1888 -0
- kailash/nodes/compliance/gdpr.py +2004 -0
- kailash/nodes/data/__init__.py +22 -2
- kailash/nodes/data/async_connection.py +469 -0
- kailash/nodes/data/async_sql.py +757 -0
- kailash/nodes/data/async_vector.py +598 -0
- kailash/nodes/data/readers.py +767 -0
- kailash/nodes/data/retrieval.py +360 -1
- kailash/nodes/data/sharepoint_graph.py +397 -21
- kailash/nodes/data/sql.py +94 -5
- kailash/nodes/data/streaming.py +68 -8
- kailash/nodes/data/vector_db.py +54 -4
- kailash/nodes/enterprise/__init__.py +13 -0
- kailash/nodes/enterprise/batch_processor.py +741 -0
- kailash/nodes/enterprise/data_lineage.py +497 -0
- kailash/nodes/logic/convergence.py +31 -9
- kailash/nodes/logic/operations.py +14 -3
- kailash/nodes/mixins/__init__.py +8 -0
- kailash/nodes/mixins/event_emitter.py +201 -0
- kailash/nodes/mixins/mcp.py +9 -4
- kailash/nodes/mixins/security.py +165 -0
- kailash/nodes/monitoring/__init__.py +7 -0
- kailash/nodes/monitoring/performance_benchmark.py +2497 -0
- kailash/nodes/rag/__init__.py +284 -0
- kailash/nodes/rag/advanced.py +1615 -0
- kailash/nodes/rag/agentic.py +773 -0
- kailash/nodes/rag/conversational.py +999 -0
- kailash/nodes/rag/evaluation.py +875 -0
- kailash/nodes/rag/federated.py +1188 -0
- kailash/nodes/rag/graph.py +721 -0
- kailash/nodes/rag/multimodal.py +671 -0
- kailash/nodes/rag/optimized.py +933 -0
- kailash/nodes/rag/privacy.py +1059 -0
- kailash/nodes/rag/query_processing.py +1335 -0
- kailash/nodes/rag/realtime.py +764 -0
- kailash/nodes/rag/registry.py +547 -0
- kailash/nodes/rag/router.py +837 -0
- kailash/nodes/rag/similarity.py +1854 -0
- kailash/nodes/rag/strategies.py +566 -0
- kailash/nodes/rag/workflows.py +575 -0
- kailash/nodes/security/__init__.py +19 -0
- kailash/nodes/security/abac_evaluator.py +1411 -0
- kailash/nodes/security/audit_log.py +103 -0
- kailash/nodes/security/behavior_analysis.py +1893 -0
- kailash/nodes/security/credential_manager.py +401 -0
- kailash/nodes/security/rotating_credentials.py +760 -0
- kailash/nodes/security/security_event.py +133 -0
- kailash/nodes/security/threat_detection.py +1103 -0
- kailash/nodes/testing/__init__.py +9 -0
- kailash/nodes/testing/credential_testing.py +499 -0
- kailash/nodes/transform/__init__.py +10 -2
- kailash/nodes/transform/chunkers.py +592 -1
- kailash/nodes/transform/processors.py +484 -14
- kailash/nodes/validation.py +321 -0
- kailash/runtime/access_controlled.py +1 -1
- kailash/runtime/async_local.py +41 -7
- kailash/runtime/docker.py +1 -1
- kailash/runtime/local.py +474 -55
- kailash/runtime/parallel.py +1 -1
- kailash/runtime/parallel_cyclic.py +1 -1
- kailash/runtime/testing.py +210 -2
- kailash/security.py +1 -1
- kailash/utils/migrations/__init__.py +25 -0
- kailash/utils/migrations/generator.py +433 -0
- kailash/utils/migrations/models.py +231 -0
- kailash/utils/migrations/runner.py +489 -0
- kailash/utils/secure_logging.py +342 -0
- kailash/workflow/__init__.py +16 -0
- kailash/workflow/cyclic_runner.py +3 -4
- kailash/workflow/graph.py +70 -2
- kailash/workflow/resilience.py +249 -0
- kailash/workflow/templates.py +726 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/METADATA +256 -20
- kailash-0.4.1.dist-info/RECORD +227 -0
- kailash/api/__init__.py +0 -17
- kailash/api/__main__.py +0 -6
- kailash/api/studio_secure.py +0 -893
- kailash/mcp/__main__.py +0 -13
- kailash/mcp/server_new.py +0 -336
- kailash/mcp/servers/__init__.py +0 -12
- kailash-0.3.2.dist-info/RECORD +0 -136
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/WHEEL +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,721 @@
|
|
1
|
+
"""
|
2
|
+
Graph-Enhanced RAG Implementation
|
3
|
+
|
4
|
+
Implements knowledge graph-based retrieval for complex reasoning:
|
5
|
+
- Entity and relationship extraction
|
6
|
+
- Community detection and summarization
|
7
|
+
- Multi-hop graph traversal
|
8
|
+
- Local and global context integration
|
9
|
+
|
10
|
+
Based on Microsoft GraphRAG (2024) and knowledge graph research.
|
11
|
+
"""
|
12
|
+
|
13
|
+
import json
|
14
|
+
import logging
|
15
|
+
from collections import defaultdict
|
16
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
17
|
+
|
18
|
+
import networkx as nx
|
19
|
+
|
20
|
+
from ...workflow.builder import WorkflowBuilder
|
21
|
+
from ..ai.llm_agent import LLMAgentNode
|
22
|
+
from ..base import Node, NodeParameter, register_node
|
23
|
+
from ..code.python import PythonCodeNode
|
24
|
+
from ..logic.workflow import WorkflowNode
|
25
|
+
|
26
|
+
logger = logging.getLogger(__name__)
|
27
|
+
|
28
|
+
|
29
|
+
@register_node()
|
30
|
+
class GraphRAGNode(WorkflowNode):
|
31
|
+
"""
|
32
|
+
Knowledge Graph-Enhanced RAG
|
33
|
+
|
34
|
+
Builds and queries knowledge graphs from documents for superior reasoning
|
35
|
+
capabilities. Combines entity-centric retrieval with relationship traversal.
|
36
|
+
|
37
|
+
When to use:
|
38
|
+
- Best for: Complex multi-hop questions, relationship queries, analytical tasks
|
39
|
+
- Not ideal for: Simple factual lookups, real-time requirements
|
40
|
+
- Performance: 2-5 seconds (includes graph building)
|
41
|
+
- Quality improvement: 40-60% for complex reasoning tasks
|
42
|
+
|
43
|
+
Key features:
|
44
|
+
- Automatic entity and relationship extraction
|
45
|
+
- Community detection for topic clustering
|
46
|
+
- Multi-hop reasoning across connections
|
47
|
+
- Hierarchical summarization at multiple levels
|
48
|
+
- Combines local entity context with global graph understanding
|
49
|
+
|
50
|
+
Example:
|
51
|
+
graph_rag = GraphRAGNode(
|
52
|
+
entity_types=["person", "organization", "technology", "concept"],
|
53
|
+
max_hops=3
|
54
|
+
)
|
55
|
+
|
56
|
+
# Query: "How did key researchers influence the development of transformers?"
|
57
|
+
# GraphRAG will:
|
58
|
+
# 1. Extract entities (researchers, transformer, papers)
|
59
|
+
# 2. Find relationships (authored, influenced, cited)
|
60
|
+
# 3. Traverse graph to find influence paths
|
61
|
+
# 4. Synthesize multi-hop connections
|
62
|
+
|
63
|
+
result = await graph_rag.run(
|
64
|
+
documents=research_papers,
|
65
|
+
query="How did key researchers influence the development of transformers?"
|
66
|
+
)
|
67
|
+
|
68
|
+
Parameters:
|
69
|
+
entity_types: Types of entities to extract
|
70
|
+
relationship_types: Types of relationships to identify
|
71
|
+
max_hops: Maximum graph traversal depth
|
72
|
+
community_algorithm: Method for detecting topic communities
|
73
|
+
use_global_summary: Include high-level graph summaries
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
results: Retrieved entities and relationships
|
77
|
+
graph_context: Local and global graph information
|
78
|
+
reasoning_path: Multi-hop connections found
|
79
|
+
community_summaries: High-level topic summaries
|
80
|
+
"""
|
81
|
+
|
82
|
+
def __init__(
|
83
|
+
self,
|
84
|
+
name: str = "graph_rag",
|
85
|
+
entity_types: List[str] = None,
|
86
|
+
relationship_types: List[str] = None,
|
87
|
+
max_hops: int = 2,
|
88
|
+
community_algorithm: str = "louvain",
|
89
|
+
use_global_summary: bool = True,
|
90
|
+
):
|
91
|
+
self.entity_types = entity_types or [
|
92
|
+
"person",
|
93
|
+
"organization",
|
94
|
+
"concept",
|
95
|
+
"technology",
|
96
|
+
]
|
97
|
+
self.relationship_types = relationship_types or [
|
98
|
+
"relates_to",
|
99
|
+
"influences",
|
100
|
+
"uses",
|
101
|
+
"created_by",
|
102
|
+
]
|
103
|
+
self.max_hops = max_hops
|
104
|
+
self.community_algorithm = community_algorithm
|
105
|
+
self.use_global_summary = use_global_summary
|
106
|
+
super().__init__(name, self._create_workflow())
|
107
|
+
|
108
|
+
def _create_workflow(self) -> WorkflowNode:
|
109
|
+
"""Create knowledge graph RAG workflow"""
|
110
|
+
builder = WorkflowBuilder()
|
111
|
+
|
112
|
+
# Entity extraction
|
113
|
+
entity_extractor_id = builder.add_node(
|
114
|
+
"LLMAgentNode",
|
115
|
+
node_id="entity_extractor",
|
116
|
+
config={
|
117
|
+
"system_prompt": f"""Extract entities and relationships from text.
|
118
|
+
|
119
|
+
Entity types: {', '.join(self.entity_types)}
|
120
|
+
Relationship types: {', '.join(self.relationship_types)}
|
121
|
+
|
122
|
+
Return JSON:
|
123
|
+
{{
|
124
|
+
"entities": [
|
125
|
+
{{"name": "...", "type": "...", "description": "..."}}
|
126
|
+
],
|
127
|
+
"relationships": [
|
128
|
+
{{"source": "...", "target": "...", "type": "...", "description": "..."}}
|
129
|
+
]
|
130
|
+
}}""",
|
131
|
+
"model": "gpt-4",
|
132
|
+
},
|
133
|
+
)
|
134
|
+
|
135
|
+
# Graph builder
|
136
|
+
graph_builder_id = builder.add_node(
|
137
|
+
"PythonCodeNode",
|
138
|
+
node_id="graph_builder",
|
139
|
+
config={
|
140
|
+
"code": f"""
|
141
|
+
import networkx as nx
|
142
|
+
from collections import defaultdict
|
143
|
+
|
144
|
+
def build_knowledge_graph(extraction_results):
|
145
|
+
'''Build NetworkX graph from extracted entities and relationships'''
|
146
|
+
G = nx.MultiDiGraph()
|
147
|
+
|
148
|
+
# Add all entities as nodes
|
149
|
+
all_entities = []
|
150
|
+
all_relationships = []
|
151
|
+
|
152
|
+
for doc_extraction in extraction_results:
|
153
|
+
entities = doc_extraction.get("entities", [])
|
154
|
+
relationships = doc_extraction.get("relationships", [])
|
155
|
+
|
156
|
+
# Add entities
|
157
|
+
for entity in entities:
|
158
|
+
node_id = entity["name"].lower()
|
159
|
+
G.add_node(node_id,
|
160
|
+
name=entity["name"],
|
161
|
+
type=entity["type"],
|
162
|
+
description=entity.get("description", ""),
|
163
|
+
documents=set())
|
164
|
+
all_entities.append(entity)
|
165
|
+
|
166
|
+
# Add relationships
|
167
|
+
for rel in relationships:
|
168
|
+
source = rel["source"].lower()
|
169
|
+
target = rel["target"].lower()
|
170
|
+
G.add_edge(source, target,
|
171
|
+
type=rel["type"],
|
172
|
+
description=rel.get("description", ""))
|
173
|
+
all_relationships.append(rel)
|
174
|
+
|
175
|
+
# Detect communities
|
176
|
+
if len(G) > 0:
|
177
|
+
if "{self.community_algorithm}" == "louvain":
|
178
|
+
import community
|
179
|
+
communities = community.best_partition(G.to_undirected())
|
180
|
+
else:
|
181
|
+
# Simple connected components
|
182
|
+
communities = {{}}
|
183
|
+
for i, comp in enumerate(nx.weakly_connected_components(G)):
|
184
|
+
for node in comp:
|
185
|
+
communities[node] = i
|
186
|
+
else:
|
187
|
+
communities = {{}}
|
188
|
+
|
189
|
+
# Build community summaries
|
190
|
+
community_nodes = defaultdict(list)
|
191
|
+
for node, comm_id in communities.items():
|
192
|
+
community_nodes[comm_id].append(node)
|
193
|
+
|
194
|
+
graph_data = {{
|
195
|
+
"graph": nx.node_link_data(G),
|
196
|
+
"entities": all_entities,
|
197
|
+
"relationships": all_relationships,
|
198
|
+
"communities": communities,
|
199
|
+
"community_nodes": dict(community_nodes),
|
200
|
+
"stats": {{
|
201
|
+
"num_entities": len(G),
|
202
|
+
"num_relationships": len(G.edges()),
|
203
|
+
"num_communities": len(set(communities.values())) if communities else 0
|
204
|
+
}}
|
205
|
+
}}
|
206
|
+
|
207
|
+
result = {{"graph_data": build_knowledge_graph(extraction_results)}}
|
208
|
+
"""
|
209
|
+
},
|
210
|
+
)
|
211
|
+
|
212
|
+
# Query processor for graph
|
213
|
+
query_processor_id = builder.add_node(
|
214
|
+
"LLMAgentNode",
|
215
|
+
node_id="query_processor",
|
216
|
+
config={
|
217
|
+
"system_prompt": """Analyze the query to identify:
|
218
|
+
1. Key entities mentioned or implied
|
219
|
+
2. Types of relationships being asked about
|
220
|
+
3. Whether multi-hop reasoning is needed
|
221
|
+
4. The depth of analysis required
|
222
|
+
|
223
|
+
Return JSON:
|
224
|
+
{
|
225
|
+
"entities": ["entity1", "entity2"],
|
226
|
+
"relationship_types": ["type1", "type2"],
|
227
|
+
"requires_multi_hop": true/false,
|
228
|
+
"reasoning_type": "causal/comparative/analytical"
|
229
|
+
}""",
|
230
|
+
"model": "gpt-4",
|
231
|
+
},
|
232
|
+
)
|
233
|
+
|
234
|
+
# Graph traversal and retrieval
|
235
|
+
graph_retriever_id = builder.add_node(
|
236
|
+
"PythonCodeNode",
|
237
|
+
node_id="graph_retriever",
|
238
|
+
config={
|
239
|
+
"code": f"""
|
240
|
+
import networkx as nx
|
241
|
+
from collections import deque
|
242
|
+
|
243
|
+
def retrieve_from_graph(graph_data, query_analysis):
|
244
|
+
'''Retrieve relevant subgraph based on query analysis'''
|
245
|
+
# Reconstruct graph
|
246
|
+
G = nx.node_link_graph(graph_data["graph"])
|
247
|
+
|
248
|
+
query_entities = [e.lower() for e in query_analysis.get("entities", [])]
|
249
|
+
relationship_types = query_analysis.get("relationship_types", [])
|
250
|
+
requires_multi_hop = query_analysis.get("requires_multi_hop", False)
|
251
|
+
|
252
|
+
# Find relevant nodes
|
253
|
+
relevant_nodes = set()
|
254
|
+
for entity in query_entities:
|
255
|
+
# Fuzzy match entities
|
256
|
+
for node in G.nodes():
|
257
|
+
if entity in node or node in entity:
|
258
|
+
relevant_nodes.add(node)
|
259
|
+
|
260
|
+
# Multi-hop expansion if needed
|
261
|
+
if requires_multi_hop and relevant_nodes:
|
262
|
+
expanded_nodes = set(relevant_nodes)
|
263
|
+
for start_node in relevant_nodes:
|
264
|
+
# BFS up to max_hops
|
265
|
+
visited = {{start_node}}
|
266
|
+
queue = deque([(start_node, 0)])
|
267
|
+
|
268
|
+
while queue:
|
269
|
+
node, depth = queue.popleft()
|
270
|
+
if depth >= {self.max_hops}:
|
271
|
+
continue
|
272
|
+
|
273
|
+
# Check neighbors
|
274
|
+
for neighbor in G.neighbors(node):
|
275
|
+
if neighbor not in visited:
|
276
|
+
visited.add(neighbor)
|
277
|
+
expanded_nodes.add(neighbor)
|
278
|
+
queue.append((neighbor, depth + 1))
|
279
|
+
|
280
|
+
relevant_nodes = expanded_nodes
|
281
|
+
|
282
|
+
# Extract subgraph
|
283
|
+
if relevant_nodes:
|
284
|
+
subgraph = G.subgraph(relevant_nodes).copy()
|
285
|
+
|
286
|
+
# Get relevant relationships
|
287
|
+
relevant_edges = []
|
288
|
+
for u, v, data in subgraph.edges(data=True):
|
289
|
+
if not relationship_types or data.get("type") in relationship_types:
|
290
|
+
relevant_edges.append({{
|
291
|
+
"source": u,
|
292
|
+
"target": v,
|
293
|
+
"type": data.get("type"),
|
294
|
+
"description": data.get("description")
|
295
|
+
}})
|
296
|
+
|
297
|
+
# Get node details
|
298
|
+
relevant_entities = []
|
299
|
+
for node in relevant_nodes:
|
300
|
+
node_data = G.nodes[node]
|
301
|
+
relevant_entities.append({{
|
302
|
+
"name": node_data.get("name", node),
|
303
|
+
"type": node_data.get("type"),
|
304
|
+
"description": node_data.get("description"),
|
305
|
+
"centrality": nx.degree_centrality(subgraph).get(node, 0)
|
306
|
+
}})
|
307
|
+
|
308
|
+
# Sort by centrality
|
309
|
+
relevant_entities.sort(key=lambda x: x["centrality"], reverse=True)
|
310
|
+
|
311
|
+
else:
|
312
|
+
relevant_entities = []
|
313
|
+
relevant_edges = []
|
314
|
+
subgraph = nx.DiGraph()
|
315
|
+
|
316
|
+
# Get community context if available
|
317
|
+
communities = graph_data.get("communities", {{}})
|
318
|
+
community_context = {{}}
|
319
|
+
for node in relevant_nodes:
|
320
|
+
comm_id = communities.get(node)
|
321
|
+
if comm_id is not None:
|
322
|
+
community_nodes = graph_data.get("community_nodes", {{}}).get(str(comm_id), [])
|
323
|
+
community_context[comm_id] = community_nodes
|
324
|
+
|
325
|
+
retrieval_result = {{
|
326
|
+
"entities": relevant_entities[:20], # Top 20 by centrality
|
327
|
+
"relationships": relevant_edges[:30], # Top 30 relationships
|
328
|
+
"subgraph_stats": {{
|
329
|
+
"nodes": len(relevant_nodes),
|
330
|
+
"edges": len(relevant_edges)
|
331
|
+
}},
|
332
|
+
"community_context": community_context,
|
333
|
+
"query_entities_found": len([e for e in query_entities if any(e in n for n in relevant_nodes)])
|
334
|
+
}}
|
335
|
+
|
336
|
+
result = {{"graph_retrieval": retrieval_result}}
|
337
|
+
"""
|
338
|
+
},
|
339
|
+
)
|
340
|
+
|
341
|
+
# Global summary generator (if enabled)
|
342
|
+
if self.use_global_summary:
|
343
|
+
summary_generator_id = builder.add_node(
|
344
|
+
"LLMAgentNode",
|
345
|
+
node_id="summary_generator",
|
346
|
+
config={
|
347
|
+
"system_prompt": """Generate high-level summaries of document communities.
|
348
|
+
Focus on main themes, key entities, and important relationships.
|
349
|
+
Be concise but comprehensive.""",
|
350
|
+
"model": "gpt-4",
|
351
|
+
},
|
352
|
+
)
|
353
|
+
|
354
|
+
# Result synthesizer
|
355
|
+
result_synthesizer_id = builder.add_node(
|
356
|
+
"PythonCodeNode",
|
357
|
+
node_id="result_synthesizer",
|
358
|
+
config={
|
359
|
+
"code": """
|
360
|
+
# Combine all graph information
|
361
|
+
graph_retrieval = graph_retrieval
|
362
|
+
query = query
|
363
|
+
graph_data = graph_data
|
364
|
+
|
365
|
+
# Build context from retrieved subgraph
|
366
|
+
context_parts = []
|
367
|
+
|
368
|
+
# Add entity information
|
369
|
+
if graph_retrieval["entities"]:
|
370
|
+
context_parts.append("Key Entities:")
|
371
|
+
for entity in graph_retrieval["entities"][:10]:
|
372
|
+
context_parts.append(f"- {entity['name']} ({entity['type']}): {entity['description']}")
|
373
|
+
|
374
|
+
# Add relationship information
|
375
|
+
if graph_retrieval["relationships"]:
|
376
|
+
context_parts.append("\\nKey Relationships:")
|
377
|
+
for rel in graph_retrieval["relationships"][:10]:
|
378
|
+
context_parts.append(f"- {rel['source']} {rel['type']} {rel['target']}")
|
379
|
+
|
380
|
+
# Add community context
|
381
|
+
if graph_retrieval["community_context"]:
|
382
|
+
context_parts.append("\\nRelated Topic Clusters:")
|
383
|
+
for comm_id, nodes in list(graph_retrieval["community_context"].items())[:3]:
|
384
|
+
context_parts.append(f"- Cluster {comm_id}: {', '.join(nodes[:5])}")
|
385
|
+
|
386
|
+
context = "\\n".join(context_parts)
|
387
|
+
|
388
|
+
# Create reasoning path visualization
|
389
|
+
reasoning_path = []
|
390
|
+
entities = graph_retrieval["entities"]
|
391
|
+
if len(entities) > 1:
|
392
|
+
# Simple path representation
|
393
|
+
for i in range(min(3, len(entities)-1)):
|
394
|
+
reasoning_path.append({
|
395
|
+
"hop": i + 1,
|
396
|
+
"from": entities[i]["name"],
|
397
|
+
"to": entities[i+1]["name"],
|
398
|
+
"connection": "related through graph structure"
|
399
|
+
})
|
400
|
+
|
401
|
+
result = {
|
402
|
+
"graph_rag_results": {
|
403
|
+
"query": query,
|
404
|
+
"retrieved_entities": graph_retrieval["entities"],
|
405
|
+
"retrieved_relationships": graph_retrieval["relationships"],
|
406
|
+
"graph_context": context,
|
407
|
+
"reasoning_path": reasoning_path,
|
408
|
+
"subgraph_size": graph_retrieval["subgraph_stats"],
|
409
|
+
"community_info": {
|
410
|
+
"num_communities": len(graph_retrieval["community_context"]),
|
411
|
+
"communities_accessed": list(graph_retrieval["community_context"].keys())
|
412
|
+
},
|
413
|
+
"global_graph_stats": graph_data["stats"]
|
414
|
+
}
|
415
|
+
}
|
416
|
+
"""
|
417
|
+
},
|
418
|
+
)
|
419
|
+
|
420
|
+
# Connect workflow
|
421
|
+
builder.add_connection(
|
422
|
+
entity_extractor_id, "response", graph_builder_id, "extraction_results"
|
423
|
+
)
|
424
|
+
builder.add_connection(
|
425
|
+
query_processor_id, "response", graph_retriever_id, "query_analysis"
|
426
|
+
)
|
427
|
+
builder.add_connection(
|
428
|
+
graph_builder_id, "graph_data", graph_retriever_id, "graph_data"
|
429
|
+
)
|
430
|
+
builder.add_connection(
|
431
|
+
graph_retriever_id,
|
432
|
+
"graph_retrieval",
|
433
|
+
result_synthesizer_id,
|
434
|
+
"graph_retrieval",
|
435
|
+
)
|
436
|
+
builder.add_connection(
|
437
|
+
graph_builder_id, "graph_data", result_synthesizer_id, "graph_data"
|
438
|
+
)
|
439
|
+
|
440
|
+
if self.use_global_summary:
|
441
|
+
builder.add_connection(
|
442
|
+
graph_builder_id, "graph_data", summary_generator_id, "graph_data"
|
443
|
+
)
|
444
|
+
builder.add_connection(
|
445
|
+
summary_generator_id,
|
446
|
+
"response",
|
447
|
+
result_synthesizer_id,
|
448
|
+
"global_summaries",
|
449
|
+
)
|
450
|
+
|
451
|
+
return builder.build(name="graph_rag_workflow")
|
452
|
+
|
453
|
+
|
454
|
+
@register_node()
|
455
|
+
class GraphBuilderNode(Node):
|
456
|
+
"""
|
457
|
+
Dedicated Graph Construction Node
|
458
|
+
|
459
|
+
Builds knowledge graphs from documents with advanced features:
|
460
|
+
- Coreference resolution for entity consolidation
|
461
|
+
- Temporal relationship tracking
|
462
|
+
- Confidence scoring for relationships
|
463
|
+
- Incremental graph updates
|
464
|
+
|
465
|
+
When to use:
|
466
|
+
- Best for: Pre-building graphs for repeated queries
|
467
|
+
- Not ideal for: One-time queries, small document sets
|
468
|
+
- Performance: 100-500ms per document
|
469
|
+
- Graph quality: Depends on entity extraction quality
|
470
|
+
|
471
|
+
Example:
|
472
|
+
builder = GraphBuilderNode(
|
473
|
+
merge_similar_entities=True,
|
474
|
+
similarity_threshold=0.85
|
475
|
+
)
|
476
|
+
|
477
|
+
graph = await builder.run(
|
478
|
+
documents=documents,
|
479
|
+
existing_graph=previous_graph # Optional: update existing
|
480
|
+
)
|
481
|
+
|
482
|
+
Parameters:
|
483
|
+
merge_similar_entities: Consolidate similar entity names
|
484
|
+
similarity_threshold: Threshold for entity merging
|
485
|
+
track_temporal: Add timestamps to relationships
|
486
|
+
confidence_scoring: Calculate relationship confidence
|
487
|
+
|
488
|
+
Returns:
|
489
|
+
graph: NetworkX graph object
|
490
|
+
entity_map: Mapping of entities to canonical forms
|
491
|
+
statistics: Graph construction statistics
|
492
|
+
"""
|
493
|
+
|
494
|
+
def __init__(
|
495
|
+
self,
|
496
|
+
name: str = "graph_builder",
|
497
|
+
merge_similar_entities: bool = True,
|
498
|
+
similarity_threshold: float = 0.85,
|
499
|
+
track_temporal: bool = False,
|
500
|
+
confidence_scoring: bool = True,
|
501
|
+
):
|
502
|
+
self.merge_similar_entities = merge_similar_entities
|
503
|
+
self.similarity_threshold = similarity_threshold
|
504
|
+
self.track_temporal = track_temporal
|
505
|
+
self.confidence_scoring = confidence_scoring
|
506
|
+
super().__init__(name)
|
507
|
+
|
508
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
509
|
+
return {
|
510
|
+
"documents": NodeParameter(
|
511
|
+
name="documents",
|
512
|
+
type=list,
|
513
|
+
required=True,
|
514
|
+
description="Documents to build graph from",
|
515
|
+
),
|
516
|
+
"existing_graph": NodeParameter(
|
517
|
+
name="existing_graph",
|
518
|
+
type=dict,
|
519
|
+
required=False,
|
520
|
+
description="Existing graph to update",
|
521
|
+
),
|
522
|
+
"entity_types": NodeParameter(
|
523
|
+
name="entity_types",
|
524
|
+
type=list,
|
525
|
+
required=False,
|
526
|
+
description="Types of entities to extract",
|
527
|
+
),
|
528
|
+
}
|
529
|
+
|
530
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
531
|
+
"""Build or update knowledge graph"""
|
532
|
+
documents = kwargs.get("documents", [])
|
533
|
+
existing_graph = kwargs.get("existing_graph")
|
534
|
+
|
535
|
+
# Initialize or load graph
|
536
|
+
if existing_graph:
|
537
|
+
G = nx.node_link_graph(existing_graph)
|
538
|
+
else:
|
539
|
+
G = nx.MultiDiGraph()
|
540
|
+
|
541
|
+
# Entity extraction would happen here (simplified for example)
|
542
|
+
# In production, would use LLM or NER model
|
543
|
+
|
544
|
+
# Build entity map for deduplication
|
545
|
+
entity_map = {}
|
546
|
+
|
547
|
+
# Add sample graph building logic
|
548
|
+
for doc in documents:
|
549
|
+
doc_id = doc.get("id", hash(doc.get("content", "")))
|
550
|
+
|
551
|
+
# Simplified entity extraction
|
552
|
+
# In production, would use proper NER
|
553
|
+
words = doc.get("content", "").split()
|
554
|
+
|
555
|
+
# Add some sample entities
|
556
|
+
if "transformer" in doc.get("content", "").lower():
|
557
|
+
G.add_node("transformer", type="technology", documents={doc_id})
|
558
|
+
G.add_node("attention", type="concept", documents={doc_id})
|
559
|
+
G.add_edge("transformer", "attention", type="uses", confidence=0.9)
|
560
|
+
|
561
|
+
# Calculate graph statistics
|
562
|
+
stats = {
|
563
|
+
"total_nodes": len(G),
|
564
|
+
"total_edges": len(G.edges()),
|
565
|
+
"density": nx.density(G) if len(G) > 0 else 0,
|
566
|
+
"components": nx.number_weakly_connected_components(G) if len(G) > 0 else 0,
|
567
|
+
}
|
568
|
+
|
569
|
+
return {
|
570
|
+
"graph": nx.node_link_data(G),
|
571
|
+
"entity_map": entity_map,
|
572
|
+
"statistics": stats,
|
573
|
+
"build_metadata": {
|
574
|
+
"documents_processed": len(documents),
|
575
|
+
"merge_applied": self.merge_similar_entities,
|
576
|
+
"temporal_tracking": self.track_temporal,
|
577
|
+
},
|
578
|
+
}
|
579
|
+
|
580
|
+
|
581
|
+
@register_node()
|
582
|
+
class GraphQueryNode(Node):
|
583
|
+
"""
|
584
|
+
Advanced Graph Query Execution
|
585
|
+
|
586
|
+
Executes complex queries on knowledge graphs with support for:
|
587
|
+
- Path queries (find connections between entities)
|
588
|
+
- Pattern matching (find subgraphs matching criteria)
|
589
|
+
- Aggregation queries (community statistics)
|
590
|
+
- Temporal queries (time-based filtering)
|
591
|
+
|
592
|
+
When to use:
|
593
|
+
- Best for: Complex analytical queries, relationship exploration
|
594
|
+
- Not ideal for: Simple lookups, keyword search
|
595
|
+
- Performance: 50-500ms depending on graph size
|
596
|
+
- Flexibility: Supports Cypher-like query patterns
|
597
|
+
|
598
|
+
Example:
|
599
|
+
querier = GraphQueryNode()
|
600
|
+
|
601
|
+
# Find influence paths
|
602
|
+
result = await querier.run(
|
603
|
+
graph=knowledge_graph,
|
604
|
+
query_type="path",
|
605
|
+
source_entity="BERT",
|
606
|
+
target_entity="GPT",
|
607
|
+
max_length=4
|
608
|
+
)
|
609
|
+
|
610
|
+
Parameters:
|
611
|
+
query_type: Type of query (path, pattern, aggregate)
|
612
|
+
filters: Attribute filters for nodes/edges
|
613
|
+
aggregations: Statistical operations to perform
|
614
|
+
return_subgraph: Return matching subgraph
|
615
|
+
|
616
|
+
Returns:
|
617
|
+
matches: Entities/relationships matching query
|
618
|
+
paths: Connection paths found
|
619
|
+
aggregations: Statistical results
|
620
|
+
subgraph: Matching subgraph if requested
|
621
|
+
"""
|
622
|
+
|
623
|
+
def __init__(self, name: str = "graph_query"):
|
624
|
+
super().__init__(name)
|
625
|
+
|
626
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
627
|
+
return {
|
628
|
+
"graph": NodeParameter(
|
629
|
+
name="graph",
|
630
|
+
type=dict,
|
631
|
+
required=True,
|
632
|
+
description="Knowledge graph to query",
|
633
|
+
),
|
634
|
+
"query_type": NodeParameter(
|
635
|
+
name="query_type",
|
636
|
+
type=str,
|
637
|
+
required=True,
|
638
|
+
description="Type of query: path, pattern, aggregate",
|
639
|
+
),
|
640
|
+
"query_params": NodeParameter(
|
641
|
+
name="query_params",
|
642
|
+
type=dict,
|
643
|
+
required=True,
|
644
|
+
description="Query-specific parameters",
|
645
|
+
),
|
646
|
+
}
|
647
|
+
|
648
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
649
|
+
"""Execute graph query"""
|
650
|
+
graph_data = kwargs.get("graph", {})
|
651
|
+
query_type = kwargs.get("query_type", "path")
|
652
|
+
query_params = kwargs.get("query_params", {})
|
653
|
+
|
654
|
+
# Reconstruct graph
|
655
|
+
G = nx.node_link_graph(graph_data)
|
656
|
+
|
657
|
+
results = {
|
658
|
+
"query_type": query_type,
|
659
|
+
"query_params": query_params,
|
660
|
+
"matches": [],
|
661
|
+
"paths": [],
|
662
|
+
"aggregations": {},
|
663
|
+
}
|
664
|
+
|
665
|
+
if query_type == "path":
|
666
|
+
# Find paths between entities
|
667
|
+
source = query_params.get("source_entity", "").lower()
|
668
|
+
target = query_params.get("target_entity", "").lower()
|
669
|
+
max_length = query_params.get("max_length", 3)
|
670
|
+
|
671
|
+
if source in G and target in G:
|
672
|
+
try:
|
673
|
+
# Find all simple paths
|
674
|
+
paths = list(
|
675
|
+
nx.all_simple_paths(G, source, target, cutoff=max_length)
|
676
|
+
)
|
677
|
+
results["paths"] = [
|
678
|
+
{
|
679
|
+
"path": path,
|
680
|
+
"length": len(path) - 1,
|
681
|
+
"edges": [
|
682
|
+
(path[i], path[i + 1]) for i in range(len(path) - 1)
|
683
|
+
],
|
684
|
+
}
|
685
|
+
for path in paths[:10] # Limit to 10 paths
|
686
|
+
]
|
687
|
+
except nx.NetworkXNoPath:
|
688
|
+
results["paths"] = []
|
689
|
+
|
690
|
+
elif query_type == "pattern":
|
691
|
+
# Pattern matching (simplified)
|
692
|
+
pattern = query_params.get("pattern", {})
|
693
|
+
node_type = pattern.get("node_type")
|
694
|
+
|
695
|
+
matches = []
|
696
|
+
for node, data in G.nodes(data=True):
|
697
|
+
if not node_type or data.get("type") == node_type:
|
698
|
+
matches.append(
|
699
|
+
{"entity": node, "attributes": data, "degree": G.degree(node)}
|
700
|
+
)
|
701
|
+
results["matches"] = matches[:20]
|
702
|
+
|
703
|
+
elif query_type == "aggregate":
|
704
|
+
# Graph statistics
|
705
|
+
results["aggregations"] = {
|
706
|
+
"node_count": len(G),
|
707
|
+
"edge_count": len(G.edges()),
|
708
|
+
"density": nx.density(G),
|
709
|
+
"avg_degree": (
|
710
|
+
sum(dict(G.degree()).values()) / len(G) if len(G) > 0 else 0
|
711
|
+
),
|
712
|
+
"clustering_coefficient": (
|
713
|
+
nx.average_clustering(G.to_undirected()) if len(G) > 0 else 0
|
714
|
+
),
|
715
|
+
}
|
716
|
+
|
717
|
+
return results
|
718
|
+
|
719
|
+
|
720
|
+
# Export all graph nodes
|
721
|
+
__all__ = ["GraphRAGNode", "GraphBuilderNode", "GraphQueryNode"]
|