kite-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. kite/__init__.py +46 -0
  2. kite/ab_testing.py +384 -0
  3. kite/agent.py +556 -0
  4. kite/agents/__init__.py +3 -0
  5. kite/agents/plan_execute.py +191 -0
  6. kite/agents/react_agent.py +509 -0
  7. kite/agents/reflective_agent.py +90 -0
  8. kite/agents/rewoo.py +119 -0
  9. kite/agents/tot.py +151 -0
  10. kite/conversation.py +125 -0
  11. kite/core.py +974 -0
  12. kite/data_loaders.py +111 -0
  13. kite/embedding_providers.py +372 -0
  14. kite/llm_providers.py +1278 -0
  15. kite/memory/__init__.py +6 -0
  16. kite/memory/advanced_rag.py +333 -0
  17. kite/memory/graph_rag.py +719 -0
  18. kite/memory/session_memory.py +423 -0
  19. kite/memory/vector_memory.py +579 -0
  20. kite/monitoring.py +611 -0
  21. kite/observers.py +107 -0
  22. kite/optimization/__init__.py +9 -0
  23. kite/optimization/resource_router.py +80 -0
  24. kite/persistence.py +42 -0
  25. kite/pipeline/__init__.py +5 -0
  26. kite/pipeline/deterministic_pipeline.py +323 -0
  27. kite/pipeline/reactive_pipeline.py +171 -0
  28. kite/pipeline_manager.py +15 -0
  29. kite/routing/__init__.py +6 -0
  30. kite/routing/aggregator_router.py +325 -0
  31. kite/routing/llm_router.py +149 -0
  32. kite/routing/semantic_router.py +228 -0
  33. kite/safety/__init__.py +6 -0
  34. kite/safety/circuit_breaker.py +360 -0
  35. kite/safety/guardrails.py +82 -0
  36. kite/safety/idempotency_manager.py +304 -0
  37. kite/safety/kill_switch.py +75 -0
  38. kite/tool.py +183 -0
  39. kite/tool_registry.py +87 -0
  40. kite/tools/__init__.py +21 -0
  41. kite/tools/code_execution.py +53 -0
  42. kite/tools/contrib/__init__.py +19 -0
  43. kite/tools/contrib/calculator.py +26 -0
  44. kite/tools/contrib/datetime_utils.py +20 -0
  45. kite/tools/contrib/linkedin.py +428 -0
  46. kite/tools/contrib/web_search.py +30 -0
  47. kite/tools/mcp/__init__.py +31 -0
  48. kite/tools/mcp/database_mcp.py +267 -0
  49. kite/tools/mcp/gdrive_mcp_server.py +503 -0
  50. kite/tools/mcp/gmail_mcp_server.py +601 -0
  51. kite/tools/mcp/postgres_mcp_server.py +490 -0
  52. kite/tools/mcp/slack_mcp_server.py +538 -0
  53. kite/tools/mcp/stripe_mcp_server.py +219 -0
  54. kite/tools/search.py +90 -0
  55. kite/tools/system_tools.py +54 -0
  56. kite/tools_manager.py +27 -0
  57. kite_agent-0.1.0.dist-info/METADATA +621 -0
  58. kite_agent-0.1.0.dist-info/RECORD +61 -0
  59. kite_agent-0.1.0.dist-info/WHEEL +5 -0
  60. kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
  61. kite_agent-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,719 @@
1
+ """
2
+ GraphRAG Implementation
3
+ Based on Chapter 3.3: When Vector Search Isn't Enough
4
+
5
+ Relationship-aware knowledge retrieval using knowledge graphs.
6
+
7
+ From book - Vector DB Failure Mode:
8
+ Query: "Who approved the AlphaCorp contract?"
9
+
10
+ Vector search finds 3 separate documents but can't connect:
11
+ - "Project Zeus budget approved by Sarah"
12
+ - "David leads Project Zeus"
13
+ - "AlphaCorp partnership with David"
14
+
15
+ GraphRAG connects: Sarah Project Zeus David AlphaCorp [OK]
16
+
17
+ Run: python graph_rag.py
18
+ """
19
+
20
+ import os
21
+ import json
22
+ from typing import Dict, List, Optional, Set, Tuple, Any
23
+ from dataclasses import dataclass, field
24
+ from collections import defaultdict
25
+ import networkx as nx
26
+ from openai import OpenAI
27
+ from dotenv import load_dotenv
28
+
29
+ load_dotenv()
30
+
31
+
32
+ # ============================================================================
33
+ # DATA MODELS
34
+ # ============================================================================
35
+
36
+ @dataclass
37
+ class Entity:
38
+ """A node in the knowledge graph."""
39
+ id: str
40
+ type: str # person, project, company, etc.
41
+ name: str
42
+ properties: Dict = field(default_factory=dict)
43
+
44
+
45
+ @dataclass
46
+ class Relationship:
47
+ """An edge in the knowledge graph."""
48
+ source: str # entity id
49
+ target: str # entity id
50
+ type: str # manages, approves, partners_with, etc.
51
+ properties: Dict = field(default_factory=dict)
52
+
53
+
54
+ @dataclass
55
+ class Document:
56
+ """A document with extracted entities and relationships."""
57
+ id: str
58
+ text: str
59
+ entities: List[Entity] = field(default_factory=list)
60
+ relationships: List[Relationship] = field(default_factory=list)
61
+
62
+
63
+ # ============================================================================
64
+ # ENTITY & RELATIONSHIP EXTRACTION
65
+ # ============================================================================
66
+
67
+ class EntityExtractor:
68
+ """
69
+ Extract entities and relationships from text using LLM.
70
+
71
+ This is the core of GraphRAG - converting unstructured text
72
+ into structured graph data.
73
+ """
74
+
75
+ def __init__(self, llm = None):
76
+ self.llm = llm
77
+ self.entity_types = [
78
+ "person", "company", "project", "product",
79
+ "location", "department", "role"
80
+ ]
81
+
82
+ self.relationship_types = [
83
+ "manages", "works_on", "approves", "reports_to",
84
+ "partners_with", "owns", "leads", "member_of"
85
+ ]
86
+
87
+ def extract(self, text: str) -> Tuple[List[Entity], List[Relationship]]:
88
+ """
89
+ Extract entities and relationships from text.
90
+
91
+ Args:
92
+ text: Input text
93
+
94
+ Returns:
95
+ (entities, relationships)
96
+ """
97
+ print(f" Extracting entities and relationships...")
98
+
99
+ # Create extraction prompt
100
+ prompt = f"""Extract entities and relationships from this text.
101
+
102
+ Text: {text}
103
+
104
+ Entity types: {', '.join(self.entity_types)}
105
+ Relationship types: {', '.join(self.relationship_types)}
106
+
107
+ Output ONLY valid JSON with this structure:
108
+ {{
109
+ "entities": [
110
+ {{"id": "e1", "type": "person", "name": "Sarah Johnson"}},
111
+ {{"id": "e2", "type": "project", "name": "Project Zeus"}}
112
+ ],
113
+ "relationships": [
114
+ {{"source": "e1", "target": "e2", "type": "approves"}}
115
+ ]
116
+ }}
117
+
118
+ Important:
119
+ - Use consistent IDs (e1, e2, e3...)
120
+ - Include all mentioned entities
121
+ - Capture all relationships
122
+ - Keep names as they appear in text"""
123
+
124
+ if self.llm:
125
+ response = self.llm.complete(prompt, temperature=0.1)
126
+ content = response.strip()
127
+ else:
128
+ return [], []
129
+
130
+ # Remove markdown if present
131
+ if content.startswith("```"):
132
+ content = content.split("```")[1]
133
+ if content.startswith("json"):
134
+ content = content[4:]
135
+
136
+ try:
137
+ data = json.loads(content)
138
+
139
+ entities = [
140
+ Entity(
141
+ id=e["id"],
142
+ type=e["type"],
143
+ name=e["name"],
144
+ properties=e.get("properties", {})
145
+ )
146
+ for e in data.get("entities", [])
147
+ ]
148
+
149
+ relationships = [
150
+ Relationship(
151
+ source=r["source"],
152
+ target=r["target"],
153
+ type=r["type"],
154
+ properties=r.get("properties", {})
155
+ )
156
+ for r in data.get("relationships", [])
157
+ ]
158
+
159
+ print(f" [OK] Found {len(entities)} entities, {len(relationships)} relationships")
160
+
161
+ return entities, relationships
162
+
163
+ except json.JSONDecodeError as e:
164
+ print(f" Failed to parse: {e}")
165
+ return [], []
166
+
167
+
168
+ # ============================================================================
169
+ # KNOWLEDGE GRAPH
170
+ # ============================================================================
171
+
172
+ class KnowledgeGraph:
173
+ """
174
+ Knowledge graph for storing and querying entity relationships.
175
+
176
+ Uses NetworkX for graph operations and path finding.
177
+ """
178
+
179
+ def __init__(self):
180
+ self.graph = nx.DiGraph()
181
+ self.entities: Dict[str, Entity] = {}
182
+ self.entity_name_to_id: Dict[str, str] = {}
183
+
184
+ print("[OK] Knowledge Graph initialized")
185
+
186
+ def save_to_file(self, path: str):
187
+ """Save graph to JSON file."""
188
+ data = nx.node_link_data(self.graph)
189
+ try:
190
+ with open(path, 'w') as f:
191
+ json.dump(data, f, indent=2)
192
+ print(f"[OK] Graph saved to {path}")
193
+ except Exception as e:
194
+ print(f"[ERROR] Failed to save graph: {e}")
195
+
196
+ def load_from_file(self, path: str):
197
+ """Load graph from JSON file."""
198
+ if not os.path.exists(path):
199
+ return
200
+
201
+ try:
202
+ with open(path, 'r') as f:
203
+ data = json.load(f)
204
+
205
+ self.graph = nx.node_link_graph(data)
206
+
207
+ # Rebuild entities dict from graph nodes
208
+ self.entities = {}
209
+ self.entity_name_to_id = {}
210
+ for node_id, attrs in self.graph.nodes(data=True):
211
+ entity = Entity(
212
+ id=node_id,
213
+ type=attrs.get('type', 'unknown'),
214
+ name=attrs.get('name', 'Unknown'),
215
+ properties={k:v for k,v in attrs.items() if k not in ['type', 'name']}
216
+ )
217
+ self.entities[node_id] = entity
218
+ self.entity_name_to_id[entity.name.lower()] = entity.id
219
+
220
+ print(f"[OK] Graph loaded from {path} ({len(self.entities)} entities)")
221
+ except Exception as e:
222
+ print(f"[ERROR] Failed to load graph: {e}")
223
+
224
+
225
+ def add_entity(self, entity: Entity):
226
+ """Add entity to graph."""
227
+ self.entities[entity.id] = entity
228
+ self.entity_name_to_id[entity.name.lower()] = entity.id
229
+
230
+ # Add node with attributes
231
+ self.graph.add_node(
232
+ entity.id,
233
+ type=entity.type,
234
+ name=entity.name,
235
+ **entity.properties
236
+ )
237
+
238
+ def add_relationship(self, relationship: Relationship):
239
+ """Add relationship to graph."""
240
+ # Ensure entities exist
241
+ if relationship.source not in self.entities:
242
+ print(f" [WARN] Warning: Source entity {relationship.source} not found")
243
+ return
244
+
245
+ if relationship.target not in self.entities:
246
+ print(f" [WARN] Warning: Target entity {relationship.target} not found")
247
+ return
248
+
249
+ # Add edge with attributes
250
+ self.graph.add_edge(
251
+ relationship.source,
252
+ relationship.target,
253
+ type=relationship.type,
254
+ **relationship.properties
255
+ )
256
+
257
+ def add_document(self, document: Document):
258
+ """Add all entities and relationships from document."""
259
+ print(f"\n Adding document: {document.id}")
260
+
261
+ # Add entities
262
+ for entity in document.entities:
263
+ self.add_entity(entity)
264
+
265
+ # Add relationships
266
+ for rel in document.relationships:
267
+ self.add_relationship(rel)
268
+
269
+ print(f" [OK] Added {len(document.entities)} entities, {len(document.relationships)} relationships")
270
+
271
+ def find_entity(self, name: str) -> Optional[str]:
272
+ """Find entity ID by name (case-insensitive)."""
273
+ return self.entity_name_to_id.get(name.lower())
274
+
275
+ def find_path(self, source_name: str, target_name: str) -> Optional[List[str]]:
276
+ """
277
+ Find shortest path between two entities.
278
+
279
+ This is the key feature that vector search can't do!
280
+
281
+ Args:
282
+ source_name: Source entity name
283
+ target_name: Target entity name
284
+
285
+ Returns:
286
+ List of entity IDs forming path, or None
287
+ """
288
+ source_id = self.find_entity(source_name)
289
+ target_id = self.find_entity(target_name)
290
+
291
+ if not source_id:
292
+ print(f" Entity not found: {source_name}")
293
+ return None
294
+
295
+ if not target_id:
296
+ print(f" Entity not found: {target_name}")
297
+ return None
298
+
299
+ try:
300
+ # Convert to undirected for path finding
301
+ undirected = self.graph.to_undirected()
302
+ path = nx.shortest_path(undirected, source_id, target_id)
303
+ return path
304
+ except nx.NetworkXNoPath:
305
+ print(f" No path between {source_name} and {target_name}")
306
+ return None
307
+
308
+ def get_neighbors(self, entity_name: str, max_hops: int = 1) -> List[Entity]:
309
+ """
310
+ Get neighboring entities within N hops.
311
+
312
+ Args:
313
+ entity_name: Entity to start from
314
+ max_hops: Maximum distance
315
+
316
+ Returns:
317
+ List of neighboring entities
318
+ """
319
+ entity_id = self.find_entity(entity_name)
320
+
321
+ if not entity_id:
322
+ return []
323
+
324
+ # BFS to find neighbors within max_hops
325
+ neighbors = set()
326
+ current_level = {entity_id}
327
+
328
+ for hop in range(max_hops):
329
+ next_level = set()
330
+
331
+ for node in current_level:
332
+ # Get successors and predecessors
333
+ next_level.update(self.graph.successors(node))
334
+ next_level.update(self.graph.predecessors(node))
335
+
336
+ neighbors.update(next_level)
337
+ current_level = next_level - neighbors - {entity_id}
338
+
339
+ return [self.entities[eid] for eid in neighbors if eid in self.entities]
340
+
341
+ def query_relationship(
342
+ self,
343
+ entity_name: str,
344
+ relationship_type: Optional[str] = None
345
+ ) -> List[Tuple[Entity, str, Entity]]:
346
+ """
347
+ Query relationships from an entity.
348
+
349
+ Args:
350
+ entity_name: Starting entity
351
+ relationship_type: Filter by relationship type (optional)
352
+
353
+ Returns:
354
+ List of (source, relationship, target) tuples
355
+ """
356
+ entity_id = self.find_entity(entity_name)
357
+
358
+ if not entity_id:
359
+ return []
360
+
361
+ results = []
362
+
363
+ # Outgoing relationships
364
+ for target_id in self.graph.successors(entity_id):
365
+ edge_data = self.graph[entity_id][target_id]
366
+ rel_type = edge_data.get("type", "unknown")
367
+
368
+ if relationship_type is None or rel_type == relationship_type:
369
+ results.append((
370
+ self.entities[entity_id],
371
+ rel_type,
372
+ self.entities[target_id]
373
+ ))
374
+
375
+ # Incoming relationships
376
+ for source_id in self.graph.predecessors(entity_id):
377
+ edge_data = self.graph[source_id][entity_id]
378
+ rel_type = edge_data.get("type", "unknown")
379
+
380
+ if relationship_type is None or rel_type == relationship_type:
381
+ results.append((
382
+ self.entities[source_id],
383
+ rel_type,
384
+ self.entities[entity_id]
385
+ ))
386
+
387
+ return results
388
+
389
+ def get_stats(self) -> Dict:
390
+ """Get graph statistics."""
391
+ return {
392
+ "total_entities": len(self.entities),
393
+ "total_relationships": self.graph.number_of_edges(),
394
+ "entity_types": len(set(e.type for e in self.entities.values())),
395
+ "connected_components": nx.number_weakly_connected_components(self.graph)
396
+ }
397
+
398
+
399
+ # ============================================================================
400
+ # GRAPH RAG SYSTEM
401
+ # ============================================================================
402
+
403
+ class GraphRAG:
404
+ """
405
+ Complete GraphRAG system combining graph and vector search.
406
+
407
+ From Chapter 3.3:
408
+ - Vector search for fuzzy matching
409
+ - Graph search for relationships
410
+ - Hybrid approach for best results
411
+ """
412
+
413
+ def __init__(self, llm = None, persist_path: str = None):
414
+ self.graph = KnowledgeGraph()
415
+ self.extractor = EntityExtractor(llm=llm)
416
+ self.documents: Dict[str, str] = {}
417
+ self.persist_path = persist_path
418
+
419
+ if self.persist_path and os.path.exists(self.persist_path):
420
+ self.graph.load_from_file(self.persist_path)
421
+
422
+ print("[OK] GraphRAG system initialized")
423
+
424
+ def add_document(self, doc_id: str, text: str):
425
+ """
426
+ Add document and extract graph structure.
427
+
428
+ Args:
429
+ doc_id: Document ID
430
+ text: Document text
431
+ """
432
+ print(f"\n Processing document: {doc_id}")
433
+
434
+ # Store document
435
+ self.documents[doc_id] = text
436
+
437
+ # Extract entities and relationships
438
+ entities, relationships = self.extractor.extract(text)
439
+
440
+ # Create document object
441
+ document = Document(
442
+ id=doc_id,
443
+ text=text,
444
+ entities=entities,
445
+ relationships=relationships
446
+ )
447
+
448
+ # Add to graph
449
+ self.graph.add_document(document)
450
+
451
+ # Auto-save
452
+ if self.persist_path:
453
+ self.graph.save_to_file(self.persist_path)
454
+
455
+ def hybrid_search(self, query: str, top_k: int = 3) -> Dict[str, Any]:
456
+ """
457
+ Hybrid search combining graph and relationship reasoning.
458
+ """
459
+ answer = self.answer_relationship_query(query)
460
+
461
+ is_relevant = "Connection found" in answer or "approved" in answer or "involved with" in answer
462
+
463
+ return {
464
+ "answer": answer,
465
+ "success": True,
466
+ "source": "graph_rag",
467
+ "is_relevant": is_relevant,
468
+ "documents": [{"content": answer, "id": "graph_result"}] if is_relevant else []
469
+ }
470
+
471
+ def query(self, query_str: str) -> Dict[str, Any]:
472
+ """Query for entities and relationships."""
473
+ answer = self.answer_relationship_query(query_str)
474
+
475
+ # Extract entities mentioned in query
476
+ entities = []
477
+ for entity in self.graph.entities.values():
478
+ if entity.name.lower() in query_str.lower():
479
+ entities.append(entity.name)
480
+
481
+ return {
482
+ "answer": answer,
483
+ "entities": entities,
484
+ "success": True
485
+ }
486
+
487
+ def answer_relationship_query(self, query: str) -> str:
488
+ """
489
+ Answer queries about relationships.
490
+
491
+ This is what vector search fails at!
492
+
493
+ Args:
494
+ query: Natural language query
495
+
496
+ Returns:
497
+ Answer based on graph analysis
498
+ """
499
+ print(f"\n Query: {query}")
500
+
501
+ # Simple pattern matching for demo
502
+ # In production, use LLM to understand query intent
503
+
504
+ query_lower = query.lower()
505
+
506
+ # Pattern: "Who approved X?"
507
+ if "who approved" in query_lower or "who signed off" in query_lower:
508
+ return self._handle_approval_query(query)
509
+
510
+ # Pattern: "How is X related to Y?"
511
+ elif "related to" in query_lower or "connection between" in query_lower:
512
+ return self._handle_connection_query(query)
513
+
514
+ # Pattern: "What does X work on?"
515
+ elif "work on" in query_lower or "working on" in query_lower:
516
+ return self._handle_works_on_query(query)
517
+
518
+ else:
519
+ return "I can answer questions about relationships, approvals, and connections. Try: 'Who approved the AlphaCorp contract?'"
520
+
521
+ def _handle_approval_query(self, query: str) -> str:
522
+ """Handle 'who approved X' queries."""
523
+ # Extract what was approved (simple approach)
524
+ words = query.lower().split()
525
+
526
+ # Look for entity names in query
527
+ for entity_id, entity in self.graph.entities.items():
528
+ if entity.name.lower() in query.lower():
529
+ # Find who approved it
530
+ approvers = []
531
+ for source_id in self.graph.graph.predecessors(entity_id):
532
+ edge = self.graph.graph[source_id][entity_id]
533
+ if edge.get("type") == "approves":
534
+ approver = self.graph.entities[source_id]
535
+ approvers.append(approver.name)
536
+
537
+ if approvers:
538
+ return f"{', '.join(approvers)} approved {entity.name}."
539
+
540
+ return "I couldn't find approval information for that."
541
+
542
+ def _handle_connection_query(self, query: str) -> str:
543
+ """Handle 'how is X related to Y' queries."""
544
+ # Extract entity names (simplified)
545
+ words = query.split()
546
+
547
+ entity_names = []
548
+ for entity in self.graph.entities.values():
549
+ if entity.name.lower() in query.lower():
550
+ entity_names.append(entity.name)
551
+
552
+ if len(entity_names) >= 2:
553
+ path = self.graph.find_path(entity_names[0], entity_names[1])
554
+
555
+ if path:
556
+ # Build readable path
557
+ path_desc = []
558
+ for i in range(len(path) - 1):
559
+ source = self.graph.entities[path[i]]
560
+ target = self.graph.entities[path[i + 1]]
561
+ edge = self.graph.graph.get_edge_data(path[i], path[i + 1])
562
+
563
+ if not edge:
564
+ edge = self.graph.graph.get_edge_data(path[i + 1], path[i])
565
+
566
+ rel_type = edge.get("type", "connected to") if edge else "connected to"
567
+ path_desc.append(f"{source.name} {rel_type} {target.name}")
568
+
569
+ return f"Connection found: {' '.join(path_desc)}"
570
+
571
+ return "I couldn't find a connection between those entities."
572
+
573
+ def _handle_works_on_query(self, query: str) -> str:
574
+ """Handle 'what does X work on' queries."""
575
+ for entity_id, entity in self.graph.entities.items():
576
+ if entity.name.lower() in query.lower() and entity.type == "person":
577
+ # Find projects they work on
578
+ projects = []
579
+ for target_id in self.graph.graph.successors(entity_id):
580
+ edge = self.graph.graph[entity_id][target_id]
581
+ if edge.get("type") in ["works_on", "leads"]:
582
+ target = self.graph.entities[target_id]
583
+ projects.append(f"{target.name} ({edge.get('type')})")
584
+
585
+ if projects:
586
+ return f"{entity.name} is involved with: {', '.join(projects)}"
587
+
588
+ return "I couldn't find work information for that person."
589
+
590
+
591
+ # ============================================================================
592
+ # DEMO
593
+ # ============================================================================
594
+
595
+ def demo():
596
+ print("=" * 70)
597
+ print("GRAPHRAG DEMO")
598
+ print("=" * 70)
599
+ print("\nBased on Chapter 3.3: When Vector Search Isn't Enough")
600
+ print("\nDemonstrating relationship-aware queries that")
601
+ print("vector databases cannot answer.\n")
602
+ print("=" * 70)
603
+
604
+ # Initialize GraphRAG
605
+ graph_rag = GraphRAG()
606
+
607
+ # Add documents (from book example)
608
+ documents = {
609
+ "doc1": """
610
+ The Project Zeus budget for Q4 2025 was approved by Sarah Johnson,
611
+ the VP of Engineering. The project aims to modernize our infrastructure
612
+ and is expected to cost $2.5M over 6 months.
613
+ """,
614
+
615
+ "doc2": """
616
+ David Chen has been leading Project Zeus since September 2025.
617
+ He reports directly to Sarah Johnson and manages a team of 12 engineers.
618
+ The project is currently on schedule and within budget.
619
+ """,
620
+
621
+ "doc3": """
622
+ AlphaCorp has entered into a strategic partnership with our company.
623
+ David Chen negotiated the terms of the partnership, which includes
624
+ joint development on Project Zeus infrastructure components.
625
+ """,
626
+
627
+ "doc4": """
628
+ The AlphaCorp contract was finalized in October 2025.
629
+ The partnership focuses on cloud infrastructure and will leverage
630
+ the technologies developed in Project Zeus.
631
+ """
632
+ }
633
+
634
+ print("\n Adding documents to GraphRAG...")
635
+ for doc_id, text in documents.items():
636
+ graph_rag.add_document(doc_id, text.strip())
637
+
638
+ # Show graph statistics
639
+ print("\n" + "="*70)
640
+ print("KNOWLEDGE GRAPH STATISTICS")
641
+ print("="*70)
642
+ stats = graph_rag.graph.get_stats()
643
+ print(f"Total entities: {stats['total_entities']}")
644
+ print(f"Total relationships: {stats['total_relationships']}")
645
+ print(f"Entity types: {stats['entity_types']}")
646
+ print(f"Connected components: {stats['connected_components']}")
647
+
648
+ # Test queries (from book example)
649
+ print("\n" + "="*70)
650
+ print("RELATIONSHIP QUERIES")
651
+ print("="*70)
652
+ print("\nThese queries require graph traversal.")
653
+ print("Vector search would fail!\n")
654
+
655
+ queries = [
656
+ "Who approved the AlphaCorp contract?",
657
+ "How is Sarah Johnson related to AlphaCorp?",
658
+ "What does David Chen work on?"
659
+ ]
660
+
661
+ for i, query in enumerate(queries, 1):
662
+ print(f"\n{' '*70}")
663
+ print(f"Query {i}: {query}")
664
+ print(' '*70)
665
+
666
+ answer = graph_rag.answer_relationship_query(query)
667
+ print(f"\n Answer:\n{answer}")
668
+
669
+ # Show path finding
670
+ print("\n" + "="*70)
671
+ print("PATH FINDING EXAMPLE")
672
+ print("="*70)
673
+ print("\nFinding connection: Sarah Johnson AlphaCorp")
674
+
675
+ path = graph_rag.graph.find_path("Sarah Johnson", "AlphaCorp")
676
+
677
+ if path:
678
+ print("\n[LINK] Connection found:")
679
+ for i in range(len(path)):
680
+ entity = graph_rag.graph.entities[path[i]]
681
+ print(f" {i+1}. {entity.name} ({entity.type})")
682
+
683
+ if i < len(path) - 1:
684
+ edge = graph_rag.graph.graph.get_edge_data(path[i], path[i+1])
685
+ if not edge:
686
+ edge = graph_rag.graph.graph.get_edge_data(path[i+1], path[i])
687
+ rel_type = edge.get("type", " ") if edge else " "
688
+ print(f" {rel_type}")
689
+
690
+ print("\n" + "="*70)
691
+ print("WHY VECTOR SEARCH FAILS (From Book)")
692
+ print("="*70)
693
+ print("""
694
+ Vector Search Approach:
695
+ 1. Query: "Who approved AlphaCorp contract?"
696
+ 2. Finds 3 separate documents:
697
+ - Document 1: "Sarah approved Project Zeus"
698
+ - Document 2: "David leads Project Zeus"
699
+ - Document 3: "AlphaCorp partnership with David"
700
+ 3. CANNOT connect the dots!
701
+
702
+ GraphRAG Approach:
703
+ 1. Query: "Who approved AlphaCorp contract?"
704
+ 2. Traverses graph:
705
+ Sarah approves Project Zeus
706
+ David leads Project Zeus
707
+ David negotiates AlphaCorp partnership
708
+ 3. Finds path: Sarah Project Zeus David AlphaCorp [OK]
709
+ 4. Answer: "Sarah approved the project that led to AlphaCorp partnership"
710
+
711
+ Key Insight:
712
+ - Vectors: Good for similarity matching
713
+ - Graphs: Good for relationship reasoning
714
+ - Hybrid: Best of both worlds!
715
+ """)
716
+
717
+
718
+ if __name__ == "__main__":
719
+ demo()