kite-agent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kite/__init__.py +46 -0
- kite/ab_testing.py +384 -0
- kite/agent.py +556 -0
- kite/agents/__init__.py +3 -0
- kite/agents/plan_execute.py +191 -0
- kite/agents/react_agent.py +509 -0
- kite/agents/reflective_agent.py +90 -0
- kite/agents/rewoo.py +119 -0
- kite/agents/tot.py +151 -0
- kite/conversation.py +125 -0
- kite/core.py +974 -0
- kite/data_loaders.py +111 -0
- kite/embedding_providers.py +372 -0
- kite/llm_providers.py +1278 -0
- kite/memory/__init__.py +6 -0
- kite/memory/advanced_rag.py +333 -0
- kite/memory/graph_rag.py +719 -0
- kite/memory/session_memory.py +423 -0
- kite/memory/vector_memory.py +579 -0
- kite/monitoring.py +611 -0
- kite/observers.py +107 -0
- kite/optimization/__init__.py +9 -0
- kite/optimization/resource_router.py +80 -0
- kite/persistence.py +42 -0
- kite/pipeline/__init__.py +5 -0
- kite/pipeline/deterministic_pipeline.py +323 -0
- kite/pipeline/reactive_pipeline.py +171 -0
- kite/pipeline_manager.py +15 -0
- kite/routing/__init__.py +6 -0
- kite/routing/aggregator_router.py +325 -0
- kite/routing/llm_router.py +149 -0
- kite/routing/semantic_router.py +228 -0
- kite/safety/__init__.py +6 -0
- kite/safety/circuit_breaker.py +360 -0
- kite/safety/guardrails.py +82 -0
- kite/safety/idempotency_manager.py +304 -0
- kite/safety/kill_switch.py +75 -0
- kite/tool.py +183 -0
- kite/tool_registry.py +87 -0
- kite/tools/__init__.py +21 -0
- kite/tools/code_execution.py +53 -0
- kite/tools/contrib/__init__.py +19 -0
- kite/tools/contrib/calculator.py +26 -0
- kite/tools/contrib/datetime_utils.py +20 -0
- kite/tools/contrib/linkedin.py +428 -0
- kite/tools/contrib/web_search.py +30 -0
- kite/tools/mcp/__init__.py +31 -0
- kite/tools/mcp/database_mcp.py +267 -0
- kite/tools/mcp/gdrive_mcp_server.py +503 -0
- kite/tools/mcp/gmail_mcp_server.py +601 -0
- kite/tools/mcp/postgres_mcp_server.py +490 -0
- kite/tools/mcp/slack_mcp_server.py +538 -0
- kite/tools/mcp/stripe_mcp_server.py +219 -0
- kite/tools/search.py +90 -0
- kite/tools/system_tools.py +54 -0
- kite/tools_manager.py +27 -0
- kite_agent-0.1.0.dist-info/METADATA +621 -0
- kite_agent-0.1.0.dist-info/RECORD +61 -0
- kite_agent-0.1.0.dist-info/WHEEL +5 -0
- kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
- kite_agent-0.1.0.dist-info/top_level.txt +1 -0
kite/memory/graph_rag.py
ADDED
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GraphRAG Implementation
|
|
3
|
+
Based on Chapter 3.3: When Vector Search Isn't Enough
|
|
4
|
+
|
|
5
|
+
Relationship-aware knowledge retrieval using knowledge graphs.
|
|
6
|
+
|
|
7
|
+
From book - Vector DB Failure Mode:
|
|
8
|
+
Query: "Who approved the AlphaCorp contract?"
|
|
9
|
+
|
|
10
|
+
Vector search finds 3 separate documents but can't connect:
|
|
11
|
+
- "Project Zeus budget approved by Sarah"
|
|
12
|
+
- "David leads Project Zeus"
|
|
13
|
+
- "AlphaCorp partnership with David"
|
|
14
|
+
|
|
15
|
+
GraphRAG connects: Sarah Project Zeus David AlphaCorp [OK]
|
|
16
|
+
|
|
17
|
+
Run: python graph_rag.py
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import json
|
|
22
|
+
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from collections import defaultdict
|
|
25
|
+
import networkx as nx
|
|
26
|
+
from openai import OpenAI
|
|
27
|
+
from dotenv import load_dotenv
|
|
28
|
+
|
|
29
|
+
load_dotenv()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ============================================================================
|
|
33
|
+
# DATA MODELS
|
|
34
|
+
# ============================================================================
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class Entity:
|
|
38
|
+
"""A node in the knowledge graph."""
|
|
39
|
+
id: str
|
|
40
|
+
type: str # person, project, company, etc.
|
|
41
|
+
name: str
|
|
42
|
+
properties: Dict = field(default_factory=dict)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class Relationship:
|
|
47
|
+
"""An edge in the knowledge graph."""
|
|
48
|
+
source: str # entity id
|
|
49
|
+
target: str # entity id
|
|
50
|
+
type: str # manages, approves, partners_with, etc.
|
|
51
|
+
properties: Dict = field(default_factory=dict)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class Document:
|
|
56
|
+
"""A document with extracted entities and relationships."""
|
|
57
|
+
id: str
|
|
58
|
+
text: str
|
|
59
|
+
entities: List[Entity] = field(default_factory=list)
|
|
60
|
+
relationships: List[Relationship] = field(default_factory=list)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ============================================================================
|
|
64
|
+
# ENTITY & RELATIONSHIP EXTRACTION
|
|
65
|
+
# ============================================================================
|
|
66
|
+
|
|
67
|
+
class EntityExtractor:
|
|
68
|
+
"""
|
|
69
|
+
Extract entities and relationships from text using LLM.
|
|
70
|
+
|
|
71
|
+
This is the core of GraphRAG - converting unstructured text
|
|
72
|
+
into structured graph data.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, llm = None):
|
|
76
|
+
self.llm = llm
|
|
77
|
+
self.entity_types = [
|
|
78
|
+
"person", "company", "project", "product",
|
|
79
|
+
"location", "department", "role"
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
self.relationship_types = [
|
|
83
|
+
"manages", "works_on", "approves", "reports_to",
|
|
84
|
+
"partners_with", "owns", "leads", "member_of"
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
def extract(self, text: str) -> Tuple[List[Entity], List[Relationship]]:
|
|
88
|
+
"""
|
|
89
|
+
Extract entities and relationships from text.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
text: Input text
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
(entities, relationships)
|
|
96
|
+
"""
|
|
97
|
+
print(f" Extracting entities and relationships...")
|
|
98
|
+
|
|
99
|
+
# Create extraction prompt
|
|
100
|
+
prompt = f"""Extract entities and relationships from this text.
|
|
101
|
+
|
|
102
|
+
Text: {text}
|
|
103
|
+
|
|
104
|
+
Entity types: {', '.join(self.entity_types)}
|
|
105
|
+
Relationship types: {', '.join(self.relationship_types)}
|
|
106
|
+
|
|
107
|
+
Output ONLY valid JSON with this structure:
|
|
108
|
+
{{
|
|
109
|
+
"entities": [
|
|
110
|
+
{{"id": "e1", "type": "person", "name": "Sarah Johnson"}},
|
|
111
|
+
{{"id": "e2", "type": "project", "name": "Project Zeus"}}
|
|
112
|
+
],
|
|
113
|
+
"relationships": [
|
|
114
|
+
{{"source": "e1", "target": "e2", "type": "approves"}}
|
|
115
|
+
]
|
|
116
|
+
}}
|
|
117
|
+
|
|
118
|
+
Important:
|
|
119
|
+
- Use consistent IDs (e1, e2, e3...)
|
|
120
|
+
- Include all mentioned entities
|
|
121
|
+
- Capture all relationships
|
|
122
|
+
- Keep names as they appear in text"""
|
|
123
|
+
|
|
124
|
+
if self.llm:
|
|
125
|
+
response = self.llm.complete(prompt, temperature=0.1)
|
|
126
|
+
content = response.strip()
|
|
127
|
+
else:
|
|
128
|
+
return [], []
|
|
129
|
+
|
|
130
|
+
# Remove markdown if present
|
|
131
|
+
if content.startswith("```"):
|
|
132
|
+
content = content.split("```")[1]
|
|
133
|
+
if content.startswith("json"):
|
|
134
|
+
content = content[4:]
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
data = json.loads(content)
|
|
138
|
+
|
|
139
|
+
entities = [
|
|
140
|
+
Entity(
|
|
141
|
+
id=e["id"],
|
|
142
|
+
type=e["type"],
|
|
143
|
+
name=e["name"],
|
|
144
|
+
properties=e.get("properties", {})
|
|
145
|
+
)
|
|
146
|
+
for e in data.get("entities", [])
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
relationships = [
|
|
150
|
+
Relationship(
|
|
151
|
+
source=r["source"],
|
|
152
|
+
target=r["target"],
|
|
153
|
+
type=r["type"],
|
|
154
|
+
properties=r.get("properties", {})
|
|
155
|
+
)
|
|
156
|
+
for r in data.get("relationships", [])
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
print(f" [OK] Found {len(entities)} entities, {len(relationships)} relationships")
|
|
160
|
+
|
|
161
|
+
return entities, relationships
|
|
162
|
+
|
|
163
|
+
except json.JSONDecodeError as e:
|
|
164
|
+
print(f" Failed to parse: {e}")
|
|
165
|
+
return [], []
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ============================================================================
|
|
169
|
+
# KNOWLEDGE GRAPH
|
|
170
|
+
# ============================================================================
|
|
171
|
+
|
|
172
|
+
class KnowledgeGraph:
|
|
173
|
+
"""
|
|
174
|
+
Knowledge graph for storing and querying entity relationships.
|
|
175
|
+
|
|
176
|
+
Uses NetworkX for graph operations and path finding.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
def __init__(self):
|
|
180
|
+
self.graph = nx.DiGraph()
|
|
181
|
+
self.entities: Dict[str, Entity] = {}
|
|
182
|
+
self.entity_name_to_id: Dict[str, str] = {}
|
|
183
|
+
|
|
184
|
+
print("[OK] Knowledge Graph initialized")
|
|
185
|
+
|
|
186
|
+
def save_to_file(self, path: str):
|
|
187
|
+
"""Save graph to JSON file."""
|
|
188
|
+
data = nx.node_link_data(self.graph)
|
|
189
|
+
try:
|
|
190
|
+
with open(path, 'w') as f:
|
|
191
|
+
json.dump(data, f, indent=2)
|
|
192
|
+
print(f"[OK] Graph saved to {path}")
|
|
193
|
+
except Exception as e:
|
|
194
|
+
print(f"[ERROR] Failed to save graph: {e}")
|
|
195
|
+
|
|
196
|
+
def load_from_file(self, path: str):
|
|
197
|
+
"""Load graph from JSON file."""
|
|
198
|
+
if not os.path.exists(path):
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
with open(path, 'r') as f:
|
|
203
|
+
data = json.load(f)
|
|
204
|
+
|
|
205
|
+
self.graph = nx.node_link_graph(data)
|
|
206
|
+
|
|
207
|
+
# Rebuild entities dict from graph nodes
|
|
208
|
+
self.entities = {}
|
|
209
|
+
self.entity_name_to_id = {}
|
|
210
|
+
for node_id, attrs in self.graph.nodes(data=True):
|
|
211
|
+
entity = Entity(
|
|
212
|
+
id=node_id,
|
|
213
|
+
type=attrs.get('type', 'unknown'),
|
|
214
|
+
name=attrs.get('name', 'Unknown'),
|
|
215
|
+
properties={k:v for k,v in attrs.items() if k not in ['type', 'name']}
|
|
216
|
+
)
|
|
217
|
+
self.entities[node_id] = entity
|
|
218
|
+
self.entity_name_to_id[entity.name.lower()] = entity.id
|
|
219
|
+
|
|
220
|
+
print(f"[OK] Graph loaded from {path} ({len(self.entities)} entities)")
|
|
221
|
+
except Exception as e:
|
|
222
|
+
print(f"[ERROR] Failed to load graph: {e}")
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def add_entity(self, entity: Entity):
|
|
226
|
+
"""Add entity to graph."""
|
|
227
|
+
self.entities[entity.id] = entity
|
|
228
|
+
self.entity_name_to_id[entity.name.lower()] = entity.id
|
|
229
|
+
|
|
230
|
+
# Add node with attributes
|
|
231
|
+
self.graph.add_node(
|
|
232
|
+
entity.id,
|
|
233
|
+
type=entity.type,
|
|
234
|
+
name=entity.name,
|
|
235
|
+
**entity.properties
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def add_relationship(self, relationship: Relationship):
|
|
239
|
+
"""Add relationship to graph."""
|
|
240
|
+
# Ensure entities exist
|
|
241
|
+
if relationship.source not in self.entities:
|
|
242
|
+
print(f" [WARN] Warning: Source entity {relationship.source} not found")
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
if relationship.target not in self.entities:
|
|
246
|
+
print(f" [WARN] Warning: Target entity {relationship.target} not found")
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
# Add edge with attributes
|
|
250
|
+
self.graph.add_edge(
|
|
251
|
+
relationship.source,
|
|
252
|
+
relationship.target,
|
|
253
|
+
type=relationship.type,
|
|
254
|
+
**relationship.properties
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
def add_document(self, document: Document):
|
|
258
|
+
"""Add all entities and relationships from document."""
|
|
259
|
+
print(f"\n Adding document: {document.id}")
|
|
260
|
+
|
|
261
|
+
# Add entities
|
|
262
|
+
for entity in document.entities:
|
|
263
|
+
self.add_entity(entity)
|
|
264
|
+
|
|
265
|
+
# Add relationships
|
|
266
|
+
for rel in document.relationships:
|
|
267
|
+
self.add_relationship(rel)
|
|
268
|
+
|
|
269
|
+
print(f" [OK] Added {len(document.entities)} entities, {len(document.relationships)} relationships")
|
|
270
|
+
|
|
271
|
+
def find_entity(self, name: str) -> Optional[str]:
|
|
272
|
+
"""Find entity ID by name (case-insensitive)."""
|
|
273
|
+
return self.entity_name_to_id.get(name.lower())
|
|
274
|
+
|
|
275
|
+
def find_path(self, source_name: str, target_name: str) -> Optional[List[str]]:
|
|
276
|
+
"""
|
|
277
|
+
Find shortest path between two entities.
|
|
278
|
+
|
|
279
|
+
This is the key feature that vector search can't do!
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
source_name: Source entity name
|
|
283
|
+
target_name: Target entity name
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
List of entity IDs forming path, or None
|
|
287
|
+
"""
|
|
288
|
+
source_id = self.find_entity(source_name)
|
|
289
|
+
target_id = self.find_entity(target_name)
|
|
290
|
+
|
|
291
|
+
if not source_id:
|
|
292
|
+
print(f" Entity not found: {source_name}")
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
if not target_id:
|
|
296
|
+
print(f" Entity not found: {target_name}")
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
# Convert to undirected for path finding
|
|
301
|
+
undirected = self.graph.to_undirected()
|
|
302
|
+
path = nx.shortest_path(undirected, source_id, target_id)
|
|
303
|
+
return path
|
|
304
|
+
except nx.NetworkXNoPath:
|
|
305
|
+
print(f" No path between {source_name} and {target_name}")
|
|
306
|
+
return None
|
|
307
|
+
|
|
308
|
+
def get_neighbors(self, entity_name: str, max_hops: int = 1) -> List[Entity]:
|
|
309
|
+
"""
|
|
310
|
+
Get neighboring entities within N hops.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
entity_name: Entity to start from
|
|
314
|
+
max_hops: Maximum distance
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
List of neighboring entities
|
|
318
|
+
"""
|
|
319
|
+
entity_id = self.find_entity(entity_name)
|
|
320
|
+
|
|
321
|
+
if not entity_id:
|
|
322
|
+
return []
|
|
323
|
+
|
|
324
|
+
# BFS to find neighbors within max_hops
|
|
325
|
+
neighbors = set()
|
|
326
|
+
current_level = {entity_id}
|
|
327
|
+
|
|
328
|
+
for hop in range(max_hops):
|
|
329
|
+
next_level = set()
|
|
330
|
+
|
|
331
|
+
for node in current_level:
|
|
332
|
+
# Get successors and predecessors
|
|
333
|
+
next_level.update(self.graph.successors(node))
|
|
334
|
+
next_level.update(self.graph.predecessors(node))
|
|
335
|
+
|
|
336
|
+
neighbors.update(next_level)
|
|
337
|
+
current_level = next_level - neighbors - {entity_id}
|
|
338
|
+
|
|
339
|
+
return [self.entities[eid] for eid in neighbors if eid in self.entities]
|
|
340
|
+
|
|
341
|
+
def query_relationship(
|
|
342
|
+
self,
|
|
343
|
+
entity_name: str,
|
|
344
|
+
relationship_type: Optional[str] = None
|
|
345
|
+
) -> List[Tuple[Entity, str, Entity]]:
|
|
346
|
+
"""
|
|
347
|
+
Query relationships from an entity.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
entity_name: Starting entity
|
|
351
|
+
relationship_type: Filter by relationship type (optional)
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
List of (source, relationship, target) tuples
|
|
355
|
+
"""
|
|
356
|
+
entity_id = self.find_entity(entity_name)
|
|
357
|
+
|
|
358
|
+
if not entity_id:
|
|
359
|
+
return []
|
|
360
|
+
|
|
361
|
+
results = []
|
|
362
|
+
|
|
363
|
+
# Outgoing relationships
|
|
364
|
+
for target_id in self.graph.successors(entity_id):
|
|
365
|
+
edge_data = self.graph[entity_id][target_id]
|
|
366
|
+
rel_type = edge_data.get("type", "unknown")
|
|
367
|
+
|
|
368
|
+
if relationship_type is None or rel_type == relationship_type:
|
|
369
|
+
results.append((
|
|
370
|
+
self.entities[entity_id],
|
|
371
|
+
rel_type,
|
|
372
|
+
self.entities[target_id]
|
|
373
|
+
))
|
|
374
|
+
|
|
375
|
+
# Incoming relationships
|
|
376
|
+
for source_id in self.graph.predecessors(entity_id):
|
|
377
|
+
edge_data = self.graph[source_id][entity_id]
|
|
378
|
+
rel_type = edge_data.get("type", "unknown")
|
|
379
|
+
|
|
380
|
+
if relationship_type is None or rel_type == relationship_type:
|
|
381
|
+
results.append((
|
|
382
|
+
self.entities[source_id],
|
|
383
|
+
rel_type,
|
|
384
|
+
self.entities[entity_id]
|
|
385
|
+
))
|
|
386
|
+
|
|
387
|
+
return results
|
|
388
|
+
|
|
389
|
+
def get_stats(self) -> Dict:
|
|
390
|
+
"""Get graph statistics."""
|
|
391
|
+
return {
|
|
392
|
+
"total_entities": len(self.entities),
|
|
393
|
+
"total_relationships": self.graph.number_of_edges(),
|
|
394
|
+
"entity_types": len(set(e.type for e in self.entities.values())),
|
|
395
|
+
"connected_components": nx.number_weakly_connected_components(self.graph)
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# ============================================================================
|
|
400
|
+
# GRAPH RAG SYSTEM
|
|
401
|
+
# ============================================================================
|
|
402
|
+
|
|
403
|
+
class GraphRAG:
|
|
404
|
+
"""
|
|
405
|
+
Complete GraphRAG system combining graph and vector search.
|
|
406
|
+
|
|
407
|
+
From Chapter 3.3:
|
|
408
|
+
- Vector search for fuzzy matching
|
|
409
|
+
- Graph search for relationships
|
|
410
|
+
- Hybrid approach for best results
|
|
411
|
+
"""
|
|
412
|
+
|
|
413
|
+
def __init__(self, llm = None, persist_path: str = None):
|
|
414
|
+
self.graph = KnowledgeGraph()
|
|
415
|
+
self.extractor = EntityExtractor(llm=llm)
|
|
416
|
+
self.documents: Dict[str, str] = {}
|
|
417
|
+
self.persist_path = persist_path
|
|
418
|
+
|
|
419
|
+
if self.persist_path and os.path.exists(self.persist_path):
|
|
420
|
+
self.graph.load_from_file(self.persist_path)
|
|
421
|
+
|
|
422
|
+
print("[OK] GraphRAG system initialized")
|
|
423
|
+
|
|
424
|
+
def add_document(self, doc_id: str, text: str):
|
|
425
|
+
"""
|
|
426
|
+
Add document and extract graph structure.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
doc_id: Document ID
|
|
430
|
+
text: Document text
|
|
431
|
+
"""
|
|
432
|
+
print(f"\n Processing document: {doc_id}")
|
|
433
|
+
|
|
434
|
+
# Store document
|
|
435
|
+
self.documents[doc_id] = text
|
|
436
|
+
|
|
437
|
+
# Extract entities and relationships
|
|
438
|
+
entities, relationships = self.extractor.extract(text)
|
|
439
|
+
|
|
440
|
+
# Create document object
|
|
441
|
+
document = Document(
|
|
442
|
+
id=doc_id,
|
|
443
|
+
text=text,
|
|
444
|
+
entities=entities,
|
|
445
|
+
relationships=relationships
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
# Add to graph
|
|
449
|
+
self.graph.add_document(document)
|
|
450
|
+
|
|
451
|
+
# Auto-save
|
|
452
|
+
if self.persist_path:
|
|
453
|
+
self.graph.save_to_file(self.persist_path)
|
|
454
|
+
|
|
455
|
+
def hybrid_search(self, query: str, top_k: int = 3) -> Dict[str, Any]:
|
|
456
|
+
"""
|
|
457
|
+
Hybrid search combining graph and relationship reasoning.
|
|
458
|
+
"""
|
|
459
|
+
answer = self.answer_relationship_query(query)
|
|
460
|
+
|
|
461
|
+
is_relevant = "Connection found" in answer or "approved" in answer or "involved with" in answer
|
|
462
|
+
|
|
463
|
+
return {
|
|
464
|
+
"answer": answer,
|
|
465
|
+
"success": True,
|
|
466
|
+
"source": "graph_rag",
|
|
467
|
+
"is_relevant": is_relevant,
|
|
468
|
+
"documents": [{"content": answer, "id": "graph_result"}] if is_relevant else []
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
def query(self, query_str: str) -> Dict[str, Any]:
|
|
472
|
+
"""Query for entities and relationships."""
|
|
473
|
+
answer = self.answer_relationship_query(query_str)
|
|
474
|
+
|
|
475
|
+
# Extract entities mentioned in query
|
|
476
|
+
entities = []
|
|
477
|
+
for entity in self.graph.entities.values():
|
|
478
|
+
if entity.name.lower() in query_str.lower():
|
|
479
|
+
entities.append(entity.name)
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
"answer": answer,
|
|
483
|
+
"entities": entities,
|
|
484
|
+
"success": True
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
def answer_relationship_query(self, query: str) -> str:
|
|
488
|
+
"""
|
|
489
|
+
Answer queries about relationships.
|
|
490
|
+
|
|
491
|
+
This is what vector search fails at!
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
query: Natural language query
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
Answer based on graph analysis
|
|
498
|
+
"""
|
|
499
|
+
print(f"\n Query: {query}")
|
|
500
|
+
|
|
501
|
+
# Simple pattern matching for demo
|
|
502
|
+
# In production, use LLM to understand query intent
|
|
503
|
+
|
|
504
|
+
query_lower = query.lower()
|
|
505
|
+
|
|
506
|
+
# Pattern: "Who approved X?"
|
|
507
|
+
if "who approved" in query_lower or "who signed off" in query_lower:
|
|
508
|
+
return self._handle_approval_query(query)
|
|
509
|
+
|
|
510
|
+
# Pattern: "How is X related to Y?"
|
|
511
|
+
elif "related to" in query_lower or "connection between" in query_lower:
|
|
512
|
+
return self._handle_connection_query(query)
|
|
513
|
+
|
|
514
|
+
# Pattern: "What does X work on?"
|
|
515
|
+
elif "work on" in query_lower or "working on" in query_lower:
|
|
516
|
+
return self._handle_works_on_query(query)
|
|
517
|
+
|
|
518
|
+
else:
|
|
519
|
+
return "I can answer questions about relationships, approvals, and connections. Try: 'Who approved the AlphaCorp contract?'"
|
|
520
|
+
|
|
521
|
+
def _handle_approval_query(self, query: str) -> str:
|
|
522
|
+
"""Handle 'who approved X' queries."""
|
|
523
|
+
# Extract what was approved (simple approach)
|
|
524
|
+
words = query.lower().split()
|
|
525
|
+
|
|
526
|
+
# Look for entity names in query
|
|
527
|
+
for entity_id, entity in self.graph.entities.items():
|
|
528
|
+
if entity.name.lower() in query.lower():
|
|
529
|
+
# Find who approved it
|
|
530
|
+
approvers = []
|
|
531
|
+
for source_id in self.graph.graph.predecessors(entity_id):
|
|
532
|
+
edge = self.graph.graph[source_id][entity_id]
|
|
533
|
+
if edge.get("type") == "approves":
|
|
534
|
+
approver = self.graph.entities[source_id]
|
|
535
|
+
approvers.append(approver.name)
|
|
536
|
+
|
|
537
|
+
if approvers:
|
|
538
|
+
return f"{', '.join(approvers)} approved {entity.name}."
|
|
539
|
+
|
|
540
|
+
return "I couldn't find approval information for that."
|
|
541
|
+
|
|
542
|
+
def _handle_connection_query(self, query: str) -> str:
|
|
543
|
+
"""Handle 'how is X related to Y' queries."""
|
|
544
|
+
# Extract entity names (simplified)
|
|
545
|
+
words = query.split()
|
|
546
|
+
|
|
547
|
+
entity_names = []
|
|
548
|
+
for entity in self.graph.entities.values():
|
|
549
|
+
if entity.name.lower() in query.lower():
|
|
550
|
+
entity_names.append(entity.name)
|
|
551
|
+
|
|
552
|
+
if len(entity_names) >= 2:
|
|
553
|
+
path = self.graph.find_path(entity_names[0], entity_names[1])
|
|
554
|
+
|
|
555
|
+
if path:
|
|
556
|
+
# Build readable path
|
|
557
|
+
path_desc = []
|
|
558
|
+
for i in range(len(path) - 1):
|
|
559
|
+
source = self.graph.entities[path[i]]
|
|
560
|
+
target = self.graph.entities[path[i + 1]]
|
|
561
|
+
edge = self.graph.graph.get_edge_data(path[i], path[i + 1])
|
|
562
|
+
|
|
563
|
+
if not edge:
|
|
564
|
+
edge = self.graph.graph.get_edge_data(path[i + 1], path[i])
|
|
565
|
+
|
|
566
|
+
rel_type = edge.get("type", "connected to") if edge else "connected to"
|
|
567
|
+
path_desc.append(f"{source.name} {rel_type} {target.name}")
|
|
568
|
+
|
|
569
|
+
return f"Connection found: {' '.join(path_desc)}"
|
|
570
|
+
|
|
571
|
+
return "I couldn't find a connection between those entities."
|
|
572
|
+
|
|
573
|
+
def _handle_works_on_query(self, query: str) -> str:
|
|
574
|
+
"""Handle 'what does X work on' queries."""
|
|
575
|
+
for entity_id, entity in self.graph.entities.items():
|
|
576
|
+
if entity.name.lower() in query.lower() and entity.type == "person":
|
|
577
|
+
# Find projects they work on
|
|
578
|
+
projects = []
|
|
579
|
+
for target_id in self.graph.graph.successors(entity_id):
|
|
580
|
+
edge = self.graph.graph[entity_id][target_id]
|
|
581
|
+
if edge.get("type") in ["works_on", "leads"]:
|
|
582
|
+
target = self.graph.entities[target_id]
|
|
583
|
+
projects.append(f"{target.name} ({edge.get('type')})")
|
|
584
|
+
|
|
585
|
+
if projects:
|
|
586
|
+
return f"{entity.name} is involved with: {', '.join(projects)}"
|
|
587
|
+
|
|
588
|
+
return "I couldn't find work information for that person."
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
# ============================================================================
|
|
592
|
+
# DEMO
|
|
593
|
+
# ============================================================================
|
|
594
|
+
|
|
595
|
+
def demo():
|
|
596
|
+
print("=" * 70)
|
|
597
|
+
print("GRAPHRAG DEMO")
|
|
598
|
+
print("=" * 70)
|
|
599
|
+
print("\nBased on Chapter 3.3: When Vector Search Isn't Enough")
|
|
600
|
+
print("\nDemonstrating relationship-aware queries that")
|
|
601
|
+
print("vector databases cannot answer.\n")
|
|
602
|
+
print("=" * 70)
|
|
603
|
+
|
|
604
|
+
# Initialize GraphRAG
|
|
605
|
+
graph_rag = GraphRAG()
|
|
606
|
+
|
|
607
|
+
# Add documents (from book example)
|
|
608
|
+
documents = {
|
|
609
|
+
"doc1": """
|
|
610
|
+
The Project Zeus budget for Q4 2025 was approved by Sarah Johnson,
|
|
611
|
+
the VP of Engineering. The project aims to modernize our infrastructure
|
|
612
|
+
and is expected to cost $2.5M over 6 months.
|
|
613
|
+
""",
|
|
614
|
+
|
|
615
|
+
"doc2": """
|
|
616
|
+
David Chen has been leading Project Zeus since September 2025.
|
|
617
|
+
He reports directly to Sarah Johnson and manages a team of 12 engineers.
|
|
618
|
+
The project is currently on schedule and within budget.
|
|
619
|
+
""",
|
|
620
|
+
|
|
621
|
+
"doc3": """
|
|
622
|
+
AlphaCorp has entered into a strategic partnership with our company.
|
|
623
|
+
David Chen negotiated the terms of the partnership, which includes
|
|
624
|
+
joint development on Project Zeus infrastructure components.
|
|
625
|
+
""",
|
|
626
|
+
|
|
627
|
+
"doc4": """
|
|
628
|
+
The AlphaCorp contract was finalized in October 2025.
|
|
629
|
+
The partnership focuses on cloud infrastructure and will leverage
|
|
630
|
+
the technologies developed in Project Zeus.
|
|
631
|
+
"""
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
print("\n Adding documents to GraphRAG...")
|
|
635
|
+
for doc_id, text in documents.items():
|
|
636
|
+
graph_rag.add_document(doc_id, text.strip())
|
|
637
|
+
|
|
638
|
+
# Show graph statistics
|
|
639
|
+
print("\n" + "="*70)
|
|
640
|
+
print("KNOWLEDGE GRAPH STATISTICS")
|
|
641
|
+
print("="*70)
|
|
642
|
+
stats = graph_rag.graph.get_stats()
|
|
643
|
+
print(f"Total entities: {stats['total_entities']}")
|
|
644
|
+
print(f"Total relationships: {stats['total_relationships']}")
|
|
645
|
+
print(f"Entity types: {stats['entity_types']}")
|
|
646
|
+
print(f"Connected components: {stats['connected_components']}")
|
|
647
|
+
|
|
648
|
+
# Test queries (from book example)
|
|
649
|
+
print("\n" + "="*70)
|
|
650
|
+
print("RELATIONSHIP QUERIES")
|
|
651
|
+
print("="*70)
|
|
652
|
+
print("\nThese queries require graph traversal.")
|
|
653
|
+
print("Vector search would fail!\n")
|
|
654
|
+
|
|
655
|
+
queries = [
|
|
656
|
+
"Who approved the AlphaCorp contract?",
|
|
657
|
+
"How is Sarah Johnson related to AlphaCorp?",
|
|
658
|
+
"What does David Chen work on?"
|
|
659
|
+
]
|
|
660
|
+
|
|
661
|
+
for i, query in enumerate(queries, 1):
|
|
662
|
+
print(f"\n{' '*70}")
|
|
663
|
+
print(f"Query {i}: {query}")
|
|
664
|
+
print(' '*70)
|
|
665
|
+
|
|
666
|
+
answer = graph_rag.answer_relationship_query(query)
|
|
667
|
+
print(f"\n Answer:\n{answer}")
|
|
668
|
+
|
|
669
|
+
# Show path finding
|
|
670
|
+
print("\n" + "="*70)
|
|
671
|
+
print("PATH FINDING EXAMPLE")
|
|
672
|
+
print("="*70)
|
|
673
|
+
print("\nFinding connection: Sarah Johnson AlphaCorp")
|
|
674
|
+
|
|
675
|
+
path = graph_rag.graph.find_path("Sarah Johnson", "AlphaCorp")
|
|
676
|
+
|
|
677
|
+
if path:
|
|
678
|
+
print("\n[LINK] Connection found:")
|
|
679
|
+
for i in range(len(path)):
|
|
680
|
+
entity = graph_rag.graph.entities[path[i]]
|
|
681
|
+
print(f" {i+1}. {entity.name} ({entity.type})")
|
|
682
|
+
|
|
683
|
+
if i < len(path) - 1:
|
|
684
|
+
edge = graph_rag.graph.graph.get_edge_data(path[i], path[i+1])
|
|
685
|
+
if not edge:
|
|
686
|
+
edge = graph_rag.graph.graph.get_edge_data(path[i+1], path[i])
|
|
687
|
+
rel_type = edge.get("type", " ") if edge else " "
|
|
688
|
+
print(f" {rel_type}")
|
|
689
|
+
|
|
690
|
+
print("\n" + "="*70)
|
|
691
|
+
print("WHY VECTOR SEARCH FAILS (From Book)")
|
|
692
|
+
print("="*70)
|
|
693
|
+
print("""
|
|
694
|
+
Vector Search Approach:
|
|
695
|
+
1. Query: "Who approved AlphaCorp contract?"
|
|
696
|
+
2. Finds 3 separate documents:
|
|
697
|
+
- Document 1: "Sarah approved Project Zeus"
|
|
698
|
+
- Document 2: "David leads Project Zeus"
|
|
699
|
+
- Document 3: "AlphaCorp partnership with David"
|
|
700
|
+
3. CANNOT connect the dots!
|
|
701
|
+
|
|
702
|
+
GraphRAG Approach:
|
|
703
|
+
1. Query: "Who approved AlphaCorp contract?"
|
|
704
|
+
2. Traverses graph:
|
|
705
|
+
Sarah approves Project Zeus
|
|
706
|
+
David leads Project Zeus
|
|
707
|
+
David negotiates AlphaCorp partnership
|
|
708
|
+
3. Finds path: Sarah Project Zeus David AlphaCorp [OK]
|
|
709
|
+
4. Answer: "Sarah approved the project that led to AlphaCorp partnership"
|
|
710
|
+
|
|
711
|
+
Key Insight:
|
|
712
|
+
- Vectors: Good for similarity matching
|
|
713
|
+
- Graphs: Good for relationship reasoning
|
|
714
|
+
- Hybrid: Best of both worlds!
|
|
715
|
+
""")
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
if __name__ == "__main__":
|
|
719
|
+
demo()
|