kailash 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. kailash/__init__.py +33 -1
  2. kailash/access_control/__init__.py +129 -0
  3. kailash/access_control/managers.py +461 -0
  4. kailash/access_control/rule_evaluators.py +467 -0
  5. kailash/access_control_abac.py +825 -0
  6. kailash/config/__init__.py +27 -0
  7. kailash/config/database_config.py +359 -0
  8. kailash/database/__init__.py +28 -0
  9. kailash/database/execution_pipeline.py +499 -0
  10. kailash/middleware/__init__.py +306 -0
  11. kailash/middleware/auth/__init__.py +33 -0
  12. kailash/middleware/auth/access_control.py +436 -0
  13. kailash/middleware/auth/auth_manager.py +422 -0
  14. kailash/middleware/auth/jwt_auth.py +477 -0
  15. kailash/middleware/auth/kailash_jwt_auth.py +616 -0
  16. kailash/middleware/communication/__init__.py +37 -0
  17. kailash/middleware/communication/ai_chat.py +989 -0
  18. kailash/middleware/communication/api_gateway.py +802 -0
  19. kailash/middleware/communication/events.py +470 -0
  20. kailash/middleware/communication/realtime.py +710 -0
  21. kailash/middleware/core/__init__.py +21 -0
  22. kailash/middleware/core/agent_ui.py +890 -0
  23. kailash/middleware/core/schema.py +643 -0
  24. kailash/middleware/core/workflows.py +396 -0
  25. kailash/middleware/database/__init__.py +63 -0
  26. kailash/middleware/database/base.py +113 -0
  27. kailash/middleware/database/base_models.py +525 -0
  28. kailash/middleware/database/enums.py +106 -0
  29. kailash/middleware/database/migrations.py +12 -0
  30. kailash/{api/database.py → middleware/database/models.py} +183 -291
  31. kailash/middleware/database/repositories.py +685 -0
  32. kailash/middleware/database/session_manager.py +19 -0
  33. kailash/middleware/mcp/__init__.py +38 -0
  34. kailash/middleware/mcp/client_integration.py +585 -0
  35. kailash/middleware/mcp/enhanced_server.py +576 -0
  36. kailash/nodes/__init__.py +27 -3
  37. kailash/nodes/admin/__init__.py +42 -0
  38. kailash/nodes/admin/audit_log.py +794 -0
  39. kailash/nodes/admin/permission_check.py +864 -0
  40. kailash/nodes/admin/role_management.py +823 -0
  41. kailash/nodes/admin/security_event.py +1523 -0
  42. kailash/nodes/admin/user_management.py +944 -0
  43. kailash/nodes/ai/a2a.py +24 -7
  44. kailash/nodes/ai/ai_providers.py +248 -40
  45. kailash/nodes/ai/embedding_generator.py +11 -11
  46. kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
  47. kailash/nodes/ai/llm_agent.py +436 -5
  48. kailash/nodes/ai/self_organizing.py +85 -10
  49. kailash/nodes/ai/vision_utils.py +148 -0
  50. kailash/nodes/alerts/__init__.py +26 -0
  51. kailash/nodes/alerts/base.py +234 -0
  52. kailash/nodes/alerts/discord.py +499 -0
  53. kailash/nodes/api/auth.py +287 -6
  54. kailash/nodes/api/rest.py +151 -0
  55. kailash/nodes/auth/__init__.py +17 -0
  56. kailash/nodes/auth/directory_integration.py +1228 -0
  57. kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
  58. kailash/nodes/auth/mfa.py +2338 -0
  59. kailash/nodes/auth/risk_assessment.py +872 -0
  60. kailash/nodes/auth/session_management.py +1093 -0
  61. kailash/nodes/auth/sso.py +1040 -0
  62. kailash/nodes/base.py +344 -13
  63. kailash/nodes/base_cycle_aware.py +4 -2
  64. kailash/nodes/base_with_acl.py +1 -1
  65. kailash/nodes/code/python.py +283 -10
  66. kailash/nodes/compliance/__init__.py +9 -0
  67. kailash/nodes/compliance/data_retention.py +1888 -0
  68. kailash/nodes/compliance/gdpr.py +2004 -0
  69. kailash/nodes/data/__init__.py +22 -2
  70. kailash/nodes/data/async_connection.py +469 -0
  71. kailash/nodes/data/async_sql.py +757 -0
  72. kailash/nodes/data/async_vector.py +598 -0
  73. kailash/nodes/data/readers.py +767 -0
  74. kailash/nodes/data/retrieval.py +360 -1
  75. kailash/nodes/data/sharepoint_graph.py +397 -21
  76. kailash/nodes/data/sql.py +94 -5
  77. kailash/nodes/data/streaming.py +68 -8
  78. kailash/nodes/data/vector_db.py +54 -4
  79. kailash/nodes/enterprise/__init__.py +13 -0
  80. kailash/nodes/enterprise/batch_processor.py +741 -0
  81. kailash/nodes/enterprise/data_lineage.py +497 -0
  82. kailash/nodes/logic/convergence.py +31 -9
  83. kailash/nodes/logic/operations.py +14 -3
  84. kailash/nodes/mixins/__init__.py +8 -0
  85. kailash/nodes/mixins/event_emitter.py +201 -0
  86. kailash/nodes/mixins/mcp.py +9 -4
  87. kailash/nodes/mixins/security.py +165 -0
  88. kailash/nodes/monitoring/__init__.py +7 -0
  89. kailash/nodes/monitoring/performance_benchmark.py +2497 -0
  90. kailash/nodes/rag/__init__.py +284 -0
  91. kailash/nodes/rag/advanced.py +1615 -0
  92. kailash/nodes/rag/agentic.py +773 -0
  93. kailash/nodes/rag/conversational.py +999 -0
  94. kailash/nodes/rag/evaluation.py +875 -0
  95. kailash/nodes/rag/federated.py +1188 -0
  96. kailash/nodes/rag/graph.py +721 -0
  97. kailash/nodes/rag/multimodal.py +671 -0
  98. kailash/nodes/rag/optimized.py +933 -0
  99. kailash/nodes/rag/privacy.py +1059 -0
  100. kailash/nodes/rag/query_processing.py +1335 -0
  101. kailash/nodes/rag/realtime.py +764 -0
  102. kailash/nodes/rag/registry.py +547 -0
  103. kailash/nodes/rag/router.py +837 -0
  104. kailash/nodes/rag/similarity.py +1854 -0
  105. kailash/nodes/rag/strategies.py +566 -0
  106. kailash/nodes/rag/workflows.py +575 -0
  107. kailash/nodes/security/__init__.py +19 -0
  108. kailash/nodes/security/abac_evaluator.py +1411 -0
  109. kailash/nodes/security/audit_log.py +103 -0
  110. kailash/nodes/security/behavior_analysis.py +1893 -0
  111. kailash/nodes/security/credential_manager.py +401 -0
  112. kailash/nodes/security/rotating_credentials.py +760 -0
  113. kailash/nodes/security/security_event.py +133 -0
  114. kailash/nodes/security/threat_detection.py +1103 -0
  115. kailash/nodes/testing/__init__.py +9 -0
  116. kailash/nodes/testing/credential_testing.py +499 -0
  117. kailash/nodes/transform/__init__.py +10 -2
  118. kailash/nodes/transform/chunkers.py +592 -1
  119. kailash/nodes/transform/processors.py +484 -14
  120. kailash/nodes/validation.py +321 -0
  121. kailash/runtime/access_controlled.py +1 -1
  122. kailash/runtime/async_local.py +41 -7
  123. kailash/runtime/docker.py +1 -1
  124. kailash/runtime/local.py +474 -55
  125. kailash/runtime/parallel.py +1 -1
  126. kailash/runtime/parallel_cyclic.py +1 -1
  127. kailash/runtime/testing.py +210 -2
  128. kailash/security.py +1 -1
  129. kailash/utils/migrations/__init__.py +25 -0
  130. kailash/utils/migrations/generator.py +433 -0
  131. kailash/utils/migrations/models.py +231 -0
  132. kailash/utils/migrations/runner.py +489 -0
  133. kailash/utils/secure_logging.py +342 -0
  134. kailash/workflow/__init__.py +16 -0
  135. kailash/workflow/cyclic_runner.py +3 -4
  136. kailash/workflow/graph.py +70 -2
  137. kailash/workflow/resilience.py +249 -0
  138. kailash/workflow/templates.py +726 -0
  139. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/METADATA +256 -20
  140. kailash-0.4.1.dist-info/RECORD +227 -0
  141. kailash/api/__init__.py +0 -17
  142. kailash/api/__main__.py +0 -6
  143. kailash/api/studio_secure.py +0 -893
  144. kailash/mcp/__main__.py +0 -13
  145. kailash/mcp/server_new.py +0 -336
  146. kailash/mcp/servers/__init__.py +0 -12
  147. kailash-0.3.2.dist-info/RECORD +0 -136
  148. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/WHEEL +0 -0
  149. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/entry_points.txt +0 -0
  150. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/licenses/LICENSE +0 -0
  151. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,721 @@
1
+ """
2
+ Graph-Enhanced RAG Implementation
3
+
4
+ Implements knowledge graph-based retrieval for complex reasoning:
5
+ - Entity and relationship extraction
6
+ - Community detection and summarization
7
+ - Multi-hop graph traversal
8
+ - Local and global context integration
9
+
10
+ Based on Microsoft GraphRAG (2024) and knowledge graph research.
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from collections import defaultdict
16
+ from typing import Any, Dict, List, Optional, Set, Tuple
17
+
18
+ import networkx as nx
19
+
20
+ from ...workflow.builder import WorkflowBuilder
21
+ from ..ai.llm_agent import LLMAgentNode
22
+ from ..base import Node, NodeParameter, register_node
23
+ from ..code.python import PythonCodeNode
24
+ from ..logic.workflow import WorkflowNode
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ @register_node()
30
+ class GraphRAGNode(WorkflowNode):
31
+ """
32
+ Knowledge Graph-Enhanced RAG
33
+
34
+ Builds and queries knowledge graphs from documents for superior reasoning
35
+ capabilities. Combines entity-centric retrieval with relationship traversal.
36
+
37
+ When to use:
38
+ - Best for: Complex multi-hop questions, relationship queries, analytical tasks
39
+ - Not ideal for: Simple factual lookups, real-time requirements
40
+ - Performance: 2-5 seconds (includes graph building)
41
+ - Quality improvement: 40-60% for complex reasoning tasks
42
+
43
+ Key features:
44
+ - Automatic entity and relationship extraction
45
+ - Community detection for topic clustering
46
+ - Multi-hop reasoning across connections
47
+ - Hierarchical summarization at multiple levels
48
+ - Combines local entity context with global graph understanding
49
+
50
+ Example:
51
+ graph_rag = GraphRAGNode(
52
+ entity_types=["person", "organization", "technology", "concept"],
53
+ max_hops=3
54
+ )
55
+
56
+ # Query: "How did key researchers influence the development of transformers?"
57
+ # GraphRAG will:
58
+ # 1. Extract entities (researchers, transformer, papers)
59
+ # 2. Find relationships (authored, influenced, cited)
60
+ # 3. Traverse graph to find influence paths
61
+ # 4. Synthesize multi-hop connections
62
+
63
+ result = await graph_rag.run(
64
+ documents=research_papers,
65
+ query="How did key researchers influence the development of transformers?"
66
+ )
67
+
68
+ Parameters:
69
+ entity_types: Types of entities to extract
70
+ relationship_types: Types of relationships to identify
71
+ max_hops: Maximum graph traversal depth
72
+ community_algorithm: Method for detecting topic communities
73
+ use_global_summary: Include high-level graph summaries
74
+
75
+ Returns:
76
+ results: Retrieved entities and relationships
77
+ graph_context: Local and global graph information
78
+ reasoning_path: Multi-hop connections found
79
+ community_summaries: High-level topic summaries
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ name: str = "graph_rag",
85
+ entity_types: List[str] = None,
86
+ relationship_types: List[str] = None,
87
+ max_hops: int = 2,
88
+ community_algorithm: str = "louvain",
89
+ use_global_summary: bool = True,
90
+ ):
91
+ self.entity_types = entity_types or [
92
+ "person",
93
+ "organization",
94
+ "concept",
95
+ "technology",
96
+ ]
97
+ self.relationship_types = relationship_types or [
98
+ "relates_to",
99
+ "influences",
100
+ "uses",
101
+ "created_by",
102
+ ]
103
+ self.max_hops = max_hops
104
+ self.community_algorithm = community_algorithm
105
+ self.use_global_summary = use_global_summary
106
+ super().__init__(name, self._create_workflow())
107
+
108
+ def _create_workflow(self) -> WorkflowNode:
109
+ """Create knowledge graph RAG workflow"""
110
+ builder = WorkflowBuilder()
111
+
112
+ # Entity extraction
113
+ entity_extractor_id = builder.add_node(
114
+ "LLMAgentNode",
115
+ node_id="entity_extractor",
116
+ config={
117
+ "system_prompt": f"""Extract entities and relationships from text.
118
+
119
+ Entity types: {', '.join(self.entity_types)}
120
+ Relationship types: {', '.join(self.relationship_types)}
121
+
122
+ Return JSON:
123
+ {{
124
+ "entities": [
125
+ {{"name": "...", "type": "...", "description": "..."}}
126
+ ],
127
+ "relationships": [
128
+ {{"source": "...", "target": "...", "type": "...", "description": "..."}}
129
+ ]
130
+ }}""",
131
+ "model": "gpt-4",
132
+ },
133
+ )
134
+
135
+ # Graph builder
136
+ graph_builder_id = builder.add_node(
137
+ "PythonCodeNode",
138
+ node_id="graph_builder",
139
+ config={
140
+ "code": f"""
141
+ import networkx as nx
142
+ from collections import defaultdict
143
+
144
+ def build_knowledge_graph(extraction_results):
145
+ '''Build NetworkX graph from extracted entities and relationships'''
146
+ G = nx.MultiDiGraph()
147
+
148
+ # Add all entities as nodes
149
+ all_entities = []
150
+ all_relationships = []
151
+
152
+ for doc_extraction in extraction_results:
153
+ entities = doc_extraction.get("entities", [])
154
+ relationships = doc_extraction.get("relationships", [])
155
+
156
+ # Add entities
157
+ for entity in entities:
158
+ node_id = entity["name"].lower()
159
+ G.add_node(node_id,
160
+ name=entity["name"],
161
+ type=entity["type"],
162
+ description=entity.get("description", ""),
163
+ documents=set())
164
+ all_entities.append(entity)
165
+
166
+ # Add relationships
167
+ for rel in relationships:
168
+ source = rel["source"].lower()
169
+ target = rel["target"].lower()
170
+ G.add_edge(source, target,
171
+ type=rel["type"],
172
+ description=rel.get("description", ""))
173
+ all_relationships.append(rel)
174
+
175
+ # Detect communities
176
+ if len(G) > 0:
177
+ if "{self.community_algorithm}" == "louvain":
178
+ import community
179
+ communities = community.best_partition(G.to_undirected())
180
+ else:
181
+ # Simple connected components
182
+ communities = {{}}
183
+ for i, comp in enumerate(nx.weakly_connected_components(G)):
184
+ for node in comp:
185
+ communities[node] = i
186
+ else:
187
+ communities = {{}}
188
+
189
+ # Build community summaries
190
+ community_nodes = defaultdict(list)
191
+ for node, comm_id in communities.items():
192
+ community_nodes[comm_id].append(node)
193
+
194
+ graph_data = {{
195
+ "graph": nx.node_link_data(G),
196
+ "entities": all_entities,
197
+ "relationships": all_relationships,
198
+ "communities": communities,
199
+ "community_nodes": dict(community_nodes),
200
+ "stats": {{
201
+ "num_entities": len(G),
202
+ "num_relationships": len(G.edges()),
203
+ "num_communities": len(set(communities.values())) if communities else 0
204
+ }}
205
+ }}
206
+
207
+ result = {{"graph_data": build_knowledge_graph(extraction_results)}}
208
+ """
209
+ },
210
+ )
211
+
212
+ # Query processor for graph
213
+ query_processor_id = builder.add_node(
214
+ "LLMAgentNode",
215
+ node_id="query_processor",
216
+ config={
217
+ "system_prompt": """Analyze the query to identify:
218
+ 1. Key entities mentioned or implied
219
+ 2. Types of relationships being asked about
220
+ 3. Whether multi-hop reasoning is needed
221
+ 4. The depth of analysis required
222
+
223
+ Return JSON:
224
+ {
225
+ "entities": ["entity1", "entity2"],
226
+ "relationship_types": ["type1", "type2"],
227
+ "requires_multi_hop": true/false,
228
+ "reasoning_type": "causal/comparative/analytical"
229
+ }""",
230
+ "model": "gpt-4",
231
+ },
232
+ )
233
+
234
+ # Graph traversal and retrieval
235
+ graph_retriever_id = builder.add_node(
236
+ "PythonCodeNode",
237
+ node_id="graph_retriever",
238
+ config={
239
+ "code": f"""
240
+ import networkx as nx
241
+ from collections import deque
242
+
243
+ def retrieve_from_graph(graph_data, query_analysis):
244
+ '''Retrieve relevant subgraph based on query analysis'''
245
+ # Reconstruct graph
246
+ G = nx.node_link_graph(graph_data["graph"])
247
+
248
+ query_entities = [e.lower() for e in query_analysis.get("entities", [])]
249
+ relationship_types = query_analysis.get("relationship_types", [])
250
+ requires_multi_hop = query_analysis.get("requires_multi_hop", False)
251
+
252
+ # Find relevant nodes
253
+ relevant_nodes = set()
254
+ for entity in query_entities:
255
+ # Fuzzy match entities
256
+ for node in G.nodes():
257
+ if entity in node or node in entity:
258
+ relevant_nodes.add(node)
259
+
260
+ # Multi-hop expansion if needed
261
+ if requires_multi_hop and relevant_nodes:
262
+ expanded_nodes = set(relevant_nodes)
263
+ for start_node in relevant_nodes:
264
+ # BFS up to max_hops
265
+ visited = {{start_node}}
266
+ queue = deque([(start_node, 0)])
267
+
268
+ while queue:
269
+ node, depth = queue.popleft()
270
+ if depth >= {self.max_hops}:
271
+ continue
272
+
273
+ # Check neighbors
274
+ for neighbor in G.neighbors(node):
275
+ if neighbor not in visited:
276
+ visited.add(neighbor)
277
+ expanded_nodes.add(neighbor)
278
+ queue.append((neighbor, depth + 1))
279
+
280
+ relevant_nodes = expanded_nodes
281
+
282
+ # Extract subgraph
283
+ if relevant_nodes:
284
+ subgraph = G.subgraph(relevant_nodes).copy()
285
+
286
+ # Get relevant relationships
287
+ relevant_edges = []
288
+ for u, v, data in subgraph.edges(data=True):
289
+ if not relationship_types or data.get("type") in relationship_types:
290
+ relevant_edges.append({{
291
+ "source": u,
292
+ "target": v,
293
+ "type": data.get("type"),
294
+ "description": data.get("description")
295
+ }})
296
+
297
+ # Get node details
298
+ relevant_entities = []
299
+ for node in relevant_nodes:
300
+ node_data = G.nodes[node]
301
+ relevant_entities.append({{
302
+ "name": node_data.get("name", node),
303
+ "type": node_data.get("type"),
304
+ "description": node_data.get("description"),
305
+ "centrality": nx.degree_centrality(subgraph).get(node, 0)
306
+ }})
307
+
308
+ # Sort by centrality
309
+ relevant_entities.sort(key=lambda x: x["centrality"], reverse=True)
310
+
311
+ else:
312
+ relevant_entities = []
313
+ relevant_edges = []
314
+ subgraph = nx.DiGraph()
315
+
316
+ # Get community context if available
317
+ communities = graph_data.get("communities", {{}})
318
+ community_context = {{}}
319
+ for node in relevant_nodes:
320
+ comm_id = communities.get(node)
321
+ if comm_id is not None:
322
+ community_nodes = graph_data.get("community_nodes", {{}}).get(str(comm_id), [])
323
+ community_context[comm_id] = community_nodes
324
+
325
+ retrieval_result = {{
326
+ "entities": relevant_entities[:20], # Top 20 by centrality
327
+ "relationships": relevant_edges[:30], # Top 30 relationships
328
+ "subgraph_stats": {{
329
+ "nodes": len(relevant_nodes),
330
+ "edges": len(relevant_edges)
331
+ }},
332
+ "community_context": community_context,
333
+ "query_entities_found": len([e for e in query_entities if any(e in n for n in relevant_nodes)])
334
+ }}
335
+
336
+ result = {{"graph_retrieval": retrieval_result}}
337
+ """
338
+ },
339
+ )
340
+
341
+ # Global summary generator (if enabled)
342
+ if self.use_global_summary:
343
+ summary_generator_id = builder.add_node(
344
+ "LLMAgentNode",
345
+ node_id="summary_generator",
346
+ config={
347
+ "system_prompt": """Generate high-level summaries of document communities.
348
+ Focus on main themes, key entities, and important relationships.
349
+ Be concise but comprehensive.""",
350
+ "model": "gpt-4",
351
+ },
352
+ )
353
+
354
+ # Result synthesizer
355
+ result_synthesizer_id = builder.add_node(
356
+ "PythonCodeNode",
357
+ node_id="result_synthesizer",
358
+ config={
359
+ "code": """
360
+ # Combine all graph information
361
+ graph_retrieval = graph_retrieval
362
+ query = query
363
+ graph_data = graph_data
364
+
365
+ # Build context from retrieved subgraph
366
+ context_parts = []
367
+
368
+ # Add entity information
369
+ if graph_retrieval["entities"]:
370
+ context_parts.append("Key Entities:")
371
+ for entity in graph_retrieval["entities"][:10]:
372
+ context_parts.append(f"- {entity['name']} ({entity['type']}): {entity['description']}")
373
+
374
+ # Add relationship information
375
+ if graph_retrieval["relationships"]:
376
+ context_parts.append("\\nKey Relationships:")
377
+ for rel in graph_retrieval["relationships"][:10]:
378
+ context_parts.append(f"- {rel['source']} {rel['type']} {rel['target']}")
379
+
380
+ # Add community context
381
+ if graph_retrieval["community_context"]:
382
+ context_parts.append("\\nRelated Topic Clusters:")
383
+ for comm_id, nodes in list(graph_retrieval["community_context"].items())[:3]:
384
+ context_parts.append(f"- Cluster {comm_id}: {', '.join(nodes[:5])}")
385
+
386
+ context = "\\n".join(context_parts)
387
+
388
+ # Create reasoning path visualization
389
+ reasoning_path = []
390
+ entities = graph_retrieval["entities"]
391
+ if len(entities) > 1:
392
+ # Simple path representation
393
+ for i in range(min(3, len(entities)-1)):
394
+ reasoning_path.append({
395
+ "hop": i + 1,
396
+ "from": entities[i]["name"],
397
+ "to": entities[i+1]["name"],
398
+ "connection": "related through graph structure"
399
+ })
400
+
401
+ result = {
402
+ "graph_rag_results": {
403
+ "query": query,
404
+ "retrieved_entities": graph_retrieval["entities"],
405
+ "retrieved_relationships": graph_retrieval["relationships"],
406
+ "graph_context": context,
407
+ "reasoning_path": reasoning_path,
408
+ "subgraph_size": graph_retrieval["subgraph_stats"],
409
+ "community_info": {
410
+ "num_communities": len(graph_retrieval["community_context"]),
411
+ "communities_accessed": list(graph_retrieval["community_context"].keys())
412
+ },
413
+ "global_graph_stats": graph_data["stats"]
414
+ }
415
+ }
416
+ """
417
+ },
418
+ )
419
+
420
+ # Connect workflow
421
+ builder.add_connection(
422
+ entity_extractor_id, "response", graph_builder_id, "extraction_results"
423
+ )
424
+ builder.add_connection(
425
+ query_processor_id, "response", graph_retriever_id, "query_analysis"
426
+ )
427
+ builder.add_connection(
428
+ graph_builder_id, "graph_data", graph_retriever_id, "graph_data"
429
+ )
430
+ builder.add_connection(
431
+ graph_retriever_id,
432
+ "graph_retrieval",
433
+ result_synthesizer_id,
434
+ "graph_retrieval",
435
+ )
436
+ builder.add_connection(
437
+ graph_builder_id, "graph_data", result_synthesizer_id, "graph_data"
438
+ )
439
+
440
+ if self.use_global_summary:
441
+ builder.add_connection(
442
+ graph_builder_id, "graph_data", summary_generator_id, "graph_data"
443
+ )
444
+ builder.add_connection(
445
+ summary_generator_id,
446
+ "response",
447
+ result_synthesizer_id,
448
+ "global_summaries",
449
+ )
450
+
451
+ return builder.build(name="graph_rag_workflow")
452
+
453
+
454
+ @register_node()
455
+ class GraphBuilderNode(Node):
456
+ """
457
+ Dedicated Graph Construction Node
458
+
459
+ Builds knowledge graphs from documents with advanced features:
460
+ - Coreference resolution for entity consolidation
461
+ - Temporal relationship tracking
462
+ - Confidence scoring for relationships
463
+ - Incremental graph updates
464
+
465
+ When to use:
466
+ - Best for: Pre-building graphs for repeated queries
467
+ - Not ideal for: One-time queries, small document sets
468
+ - Performance: 100-500ms per document
469
+ - Graph quality: Depends on entity extraction quality
470
+
471
+ Example:
472
+ builder = GraphBuilderNode(
473
+ merge_similar_entities=True,
474
+ similarity_threshold=0.85
475
+ )
476
+
477
+ graph = await builder.run(
478
+ documents=documents,
479
+ existing_graph=previous_graph # Optional: update existing
480
+ )
481
+
482
+ Parameters:
483
+ merge_similar_entities: Consolidate similar entity names
484
+ similarity_threshold: Threshold for entity merging
485
+ track_temporal: Add timestamps to relationships
486
+ confidence_scoring: Calculate relationship confidence
487
+
488
+ Returns:
489
+ graph: NetworkX graph object
490
+ entity_map: Mapping of entities to canonical forms
491
+ statistics: Graph construction statistics
492
+ """
493
+
494
+ def __init__(
495
+ self,
496
+ name: str = "graph_builder",
497
+ merge_similar_entities: bool = True,
498
+ similarity_threshold: float = 0.85,
499
+ track_temporal: bool = False,
500
+ confidence_scoring: bool = True,
501
+ ):
502
+ self.merge_similar_entities = merge_similar_entities
503
+ self.similarity_threshold = similarity_threshold
504
+ self.track_temporal = track_temporal
505
+ self.confidence_scoring = confidence_scoring
506
+ super().__init__(name)
507
+
508
+ def get_parameters(self) -> Dict[str, NodeParameter]:
509
+ return {
510
+ "documents": NodeParameter(
511
+ name="documents",
512
+ type=list,
513
+ required=True,
514
+ description="Documents to build graph from",
515
+ ),
516
+ "existing_graph": NodeParameter(
517
+ name="existing_graph",
518
+ type=dict,
519
+ required=False,
520
+ description="Existing graph to update",
521
+ ),
522
+ "entity_types": NodeParameter(
523
+ name="entity_types",
524
+ type=list,
525
+ required=False,
526
+ description="Types of entities to extract",
527
+ ),
528
+ }
529
+
530
+ def run(self, **kwargs) -> Dict[str, Any]:
531
+ """Build or update knowledge graph"""
532
+ documents = kwargs.get("documents", [])
533
+ existing_graph = kwargs.get("existing_graph")
534
+
535
+ # Initialize or load graph
536
+ if existing_graph:
537
+ G = nx.node_link_graph(existing_graph)
538
+ else:
539
+ G = nx.MultiDiGraph()
540
+
541
+ # Entity extraction would happen here (simplified for example)
542
+ # In production, would use LLM or NER model
543
+
544
+ # Build entity map for deduplication
545
+ entity_map = {}
546
+
547
+ # Add sample graph building logic
548
+ for doc in documents:
549
+ doc_id = doc.get("id", hash(doc.get("content", "")))
550
+
551
+ # Simplified entity extraction
552
+ # In production, would use proper NER
553
+ words = doc.get("content", "").split()
554
+
555
+ # Add some sample entities
556
+ if "transformer" in doc.get("content", "").lower():
557
+ G.add_node("transformer", type="technology", documents={doc_id})
558
+ G.add_node("attention", type="concept", documents={doc_id})
559
+ G.add_edge("transformer", "attention", type="uses", confidence=0.9)
560
+
561
+ # Calculate graph statistics
562
+ stats = {
563
+ "total_nodes": len(G),
564
+ "total_edges": len(G.edges()),
565
+ "density": nx.density(G) if len(G) > 0 else 0,
566
+ "components": nx.number_weakly_connected_components(G) if len(G) > 0 else 0,
567
+ }
568
+
569
+ return {
570
+ "graph": nx.node_link_data(G),
571
+ "entity_map": entity_map,
572
+ "statistics": stats,
573
+ "build_metadata": {
574
+ "documents_processed": len(documents),
575
+ "merge_applied": self.merge_similar_entities,
576
+ "temporal_tracking": self.track_temporal,
577
+ },
578
+ }
579
+
580
+
581
+ @register_node()
582
+ class GraphQueryNode(Node):
583
+ """
584
+ Advanced Graph Query Execution
585
+
586
+ Executes complex queries on knowledge graphs with support for:
587
+ - Path queries (find connections between entities)
588
+ - Pattern matching (find subgraphs matching criteria)
589
+ - Aggregation queries (community statistics)
590
+ - Temporal queries (time-based filtering)
591
+
592
+ When to use:
593
+ - Best for: Complex analytical queries, relationship exploration
594
+ - Not ideal for: Simple lookups, keyword search
595
+ - Performance: 50-500ms depending on graph size
596
+ - Flexibility: Supports Cypher-like query patterns
597
+
598
+ Example:
599
+ querier = GraphQueryNode()
600
+
601
+ # Find influence paths
602
+ result = await querier.run(
603
+ graph=knowledge_graph,
604
+ query_type="path",
605
+ source_entity="BERT",
606
+ target_entity="GPT",
607
+ max_length=4
608
+ )
609
+
610
+ Parameters:
611
+ query_type: Type of query (path, pattern, aggregate)
612
+ filters: Attribute filters for nodes/edges
613
+ aggregations: Statistical operations to perform
614
+ return_subgraph: Return matching subgraph
615
+
616
+ Returns:
617
+ matches: Entities/relationships matching query
618
+ paths: Connection paths found
619
+ aggregations: Statistical results
620
+ subgraph: Matching subgraph if requested
621
+ """
622
+
623
+ def __init__(self, name: str = "graph_query"):
624
+ super().__init__(name)
625
+
626
+ def get_parameters(self) -> Dict[str, NodeParameter]:
627
+ return {
628
+ "graph": NodeParameter(
629
+ name="graph",
630
+ type=dict,
631
+ required=True,
632
+ description="Knowledge graph to query",
633
+ ),
634
+ "query_type": NodeParameter(
635
+ name="query_type",
636
+ type=str,
637
+ required=True,
638
+ description="Type of query: path, pattern, aggregate",
639
+ ),
640
+ "query_params": NodeParameter(
641
+ name="query_params",
642
+ type=dict,
643
+ required=True,
644
+ description="Query-specific parameters",
645
+ ),
646
+ }
647
+
648
+ def run(self, **kwargs) -> Dict[str, Any]:
649
+ """Execute graph query"""
650
+ graph_data = kwargs.get("graph", {})
651
+ query_type = kwargs.get("query_type", "path")
652
+ query_params = kwargs.get("query_params", {})
653
+
654
+ # Reconstruct graph
655
+ G = nx.node_link_graph(graph_data)
656
+
657
+ results = {
658
+ "query_type": query_type,
659
+ "query_params": query_params,
660
+ "matches": [],
661
+ "paths": [],
662
+ "aggregations": {},
663
+ }
664
+
665
+ if query_type == "path":
666
+ # Find paths between entities
667
+ source = query_params.get("source_entity", "").lower()
668
+ target = query_params.get("target_entity", "").lower()
669
+ max_length = query_params.get("max_length", 3)
670
+
671
+ if source in G and target in G:
672
+ try:
673
+ # Find all simple paths
674
+ paths = list(
675
+ nx.all_simple_paths(G, source, target, cutoff=max_length)
676
+ )
677
+ results["paths"] = [
678
+ {
679
+ "path": path,
680
+ "length": len(path) - 1,
681
+ "edges": [
682
+ (path[i], path[i + 1]) for i in range(len(path) - 1)
683
+ ],
684
+ }
685
+ for path in paths[:10] # Limit to 10 paths
686
+ ]
687
+ except nx.NetworkXNoPath:
688
+ results["paths"] = []
689
+
690
+ elif query_type == "pattern":
691
+ # Pattern matching (simplified)
692
+ pattern = query_params.get("pattern", {})
693
+ node_type = pattern.get("node_type")
694
+
695
+ matches = []
696
+ for node, data in G.nodes(data=True):
697
+ if not node_type or data.get("type") == node_type:
698
+ matches.append(
699
+ {"entity": node, "attributes": data, "degree": G.degree(node)}
700
+ )
701
+ results["matches"] = matches[:20]
702
+
703
+ elif query_type == "aggregate":
704
+ # Graph statistics
705
+ results["aggregations"] = {
706
+ "node_count": len(G),
707
+ "edge_count": len(G.edges()),
708
+ "density": nx.density(G),
709
+ "avg_degree": (
710
+ sum(dict(G.degree()).values()) / len(G) if len(G) > 0 else 0
711
+ ),
712
+ "clustering_coefficient": (
713
+ nx.average_clustering(G.to_undirected()) if len(G) > 0 else 0
714
+ ),
715
+ }
716
+
717
+ return results
718
+
719
+
720
+ # Export all graph nodes
721
+ __all__ = ["GraphRAGNode", "GraphBuilderNode", "GraphQueryNode"]