kite-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. kite/__init__.py +46 -0
  2. kite/ab_testing.py +384 -0
  3. kite/agent.py +556 -0
  4. kite/agents/__init__.py +3 -0
  5. kite/agents/plan_execute.py +191 -0
  6. kite/agents/react_agent.py +509 -0
  7. kite/agents/reflective_agent.py +90 -0
  8. kite/agents/rewoo.py +119 -0
  9. kite/agents/tot.py +151 -0
  10. kite/conversation.py +125 -0
  11. kite/core.py +974 -0
  12. kite/data_loaders.py +111 -0
  13. kite/embedding_providers.py +372 -0
  14. kite/llm_providers.py +1278 -0
  15. kite/memory/__init__.py +6 -0
  16. kite/memory/advanced_rag.py +333 -0
  17. kite/memory/graph_rag.py +719 -0
  18. kite/memory/session_memory.py +423 -0
  19. kite/memory/vector_memory.py +579 -0
  20. kite/monitoring.py +611 -0
  21. kite/observers.py +107 -0
  22. kite/optimization/__init__.py +9 -0
  23. kite/optimization/resource_router.py +80 -0
  24. kite/persistence.py +42 -0
  25. kite/pipeline/__init__.py +5 -0
  26. kite/pipeline/deterministic_pipeline.py +323 -0
  27. kite/pipeline/reactive_pipeline.py +171 -0
  28. kite/pipeline_manager.py +15 -0
  29. kite/routing/__init__.py +6 -0
  30. kite/routing/aggregator_router.py +325 -0
  31. kite/routing/llm_router.py +149 -0
  32. kite/routing/semantic_router.py +228 -0
  33. kite/safety/__init__.py +6 -0
  34. kite/safety/circuit_breaker.py +360 -0
  35. kite/safety/guardrails.py +82 -0
  36. kite/safety/idempotency_manager.py +304 -0
  37. kite/safety/kill_switch.py +75 -0
  38. kite/tool.py +183 -0
  39. kite/tool_registry.py +87 -0
  40. kite/tools/__init__.py +21 -0
  41. kite/tools/code_execution.py +53 -0
  42. kite/tools/contrib/__init__.py +19 -0
  43. kite/tools/contrib/calculator.py +26 -0
  44. kite/tools/contrib/datetime_utils.py +20 -0
  45. kite/tools/contrib/linkedin.py +428 -0
  46. kite/tools/contrib/web_search.py +30 -0
  47. kite/tools/mcp/__init__.py +31 -0
  48. kite/tools/mcp/database_mcp.py +267 -0
  49. kite/tools/mcp/gdrive_mcp_server.py +503 -0
  50. kite/tools/mcp/gmail_mcp_server.py +601 -0
  51. kite/tools/mcp/postgres_mcp_server.py +490 -0
  52. kite/tools/mcp/slack_mcp_server.py +538 -0
  53. kite/tools/mcp/stripe_mcp_server.py +219 -0
  54. kite/tools/search.py +90 -0
  55. kite/tools/system_tools.py +54 -0
  56. kite/tools_manager.py +27 -0
  57. kite_agent-0.1.0.dist-info/METADATA +621 -0
  58. kite_agent-0.1.0.dist-info/RECORD +61 -0
  59. kite_agent-0.1.0.dist-info/WHEEL +5 -0
  60. kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
  61. kite_agent-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,423 @@
1
+ """
2
+ Session Memory Manager
3
+ Based on Chapter 3.2: Short-Term Memory - Managing the "Now"
4
+
5
+ The Goldfish Metaphor from book:
6
+ - Sharp about 2 minutes ago
7
+ - Total amnesia about 5 minutes ago
8
+
9
+ Strategy: Sliding window with compression to avoid exponential cost growth.
10
+
11
+ Run: python session_memory.py
12
+ """
13
+
14
+ import os
15
+ from typing import List, Dict, Optional
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime
18
+ from dotenv import load_dotenv
19
+
20
+ load_dotenv()
21
+
22
+
23
+ @dataclass
24
+ class Message:
25
+ """A conversation message."""
26
+ role: str # 'user' or 'assistant'
27
+ content: str
28
+ timestamp: datetime = field(default_factory=datetime.now)
29
+ tokens: int = 0
30
+
31
+
32
+ @dataclass
33
+ class SessionStats:
34
+ """Statistics for a session."""
35
+ total_messages: int = 0
36
+ total_tokens: int = 0
37
+ compressed_count: int = 0
38
+ window_size: int = 0
39
+
40
+
41
+ class SessionMemory:
42
+ """
43
+ Sliding window memory manager for conversations.
44
+
45
+ Features from Chapter 3.2:
46
+ - Keep last N messages (configurable window)
47
+ - Compress older messages for context
48
+ - Fixed cost per conversation
49
+ - No exponential growth
50
+
51
+ Example:
52
+ memory = SessionMemoryManager(window_size=10)
53
+
54
+ # Add messages
55
+ memory.add_user_message("Hello")
56
+ memory.add_assistant_message("Hi! How can I help?")
57
+
58
+ # Get messages for LLM
59
+ messages = memory.get_messages()
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ llm = None,
65
+ window_size: int = 10,
66
+ compression_enabled: bool = True,
67
+ max_tokens_per_message: int = 500
68
+ ):
69
+ """
70
+ Initialize session memory.
71
+
72
+ Args:
73
+ window_size: Number of recent messages to keep
74
+ compression_enabled: Whether to compress old messages
75
+ max_tokens_per_message: Soft limit for message length
76
+ """
77
+ self.llm = llm
78
+ self.window_size = window_size
79
+ self.compression_enabled = compression_enabled
80
+ self.max_tokens_per_message = max_tokens_per_message
81
+
82
+ # Message storage
83
+ self.messages: List[Message] = []
84
+
85
+ # Compressed history (for context)
86
+ self.compressed_history: Optional[str] = None
87
+
88
+ # Statistics
89
+ self.stats = SessionStats(window_size=window_size)
90
+
91
+ print(f"[OK] Session Memory initialized")
92
+ print(f" Window size: {window_size} messages")
93
+ print(f" Compression: {'enabled' if compression_enabled else 'disabled'}")
94
+
95
+ def _estimate_tokens(self, text: str) -> int:
96
+ """
97
+ Estimate token count.
98
+
99
+ Rough estimation: 1 token 4 characters in English.
100
+ For production, use tiktoken library.
101
+ """
102
+ return len(text) // 4
103
+
104
+ def _compress_messages(self, messages: List[Message]) -> str:
105
+ """
106
+ Compress messages into summary.
107
+
108
+ Uses LLM to create concise summary of conversation history.
109
+ This is much cheaper than keeping all messages.
110
+ """
111
+ if not messages:
112
+ return ""
113
+
114
+ # Build text from messages
115
+ conversation = "\n".join([
116
+ f"{msg.role.upper()}: {msg.content}"
117
+ for msg in messages
118
+ ])
119
+
120
+ # Ask LLM to summarize
121
+ prompt = f"""Summarize this conversation history in 2-3 sentences. Focus on key topics and decisions.
122
+
123
+ Conversation:
124
+ {conversation}
125
+
126
+ Summary:"""
127
+
128
+ if self.llm:
129
+ response = self.llm.complete(prompt, max_tokens=150, temperature=0.3)
130
+ summary = response
131
+ else:
132
+ # Fallback for demo/testing if no LLM provided
133
+ summary = "Summary unavailable (No LLM provider)"
134
+
135
+ print(f" Compressed {len(messages)} messages {len(summary)} chars")
136
+
137
+ return summary
138
+
139
+ def add_message(self, role: str, content: str, session_id: Optional[str] = None):
140
+ """Add a message to memory with a specific role."""
141
+ if role.lower() == "user":
142
+ self.add_user_message(content)
143
+ else:
144
+ self.add_assistant_message(content)
145
+
146
+ def add_user_message(self, content: str):
147
+ """Add user message to memory."""
148
+ message = Message(
149
+ role="user",
150
+ content=content,
151
+ tokens=self._estimate_tokens(content)
152
+ )
153
+
154
+ self.messages.append(message)
155
+ self.stats.total_messages += 1
156
+ self.stats.total_tokens += message.tokens
157
+
158
+ # Apply sliding window
159
+ self._apply_sliding_window()
160
+
161
+ def add_assistant_message(self, content: str):
162
+ """Add assistant message to memory."""
163
+ message = Message(
164
+ role="assistant",
165
+ content=content,
166
+ tokens=self._estimate_tokens(content)
167
+ )
168
+
169
+ self.messages.append(message)
170
+ self.stats.total_messages += 1
171
+ self.stats.total_tokens += message.tokens
172
+
173
+ # Apply sliding window
174
+ self._apply_sliding_window()
175
+
176
+ def _apply_sliding_window(self):
177
+ """
178
+ Apply sliding window logic.
179
+
180
+ From Chapter 3.2:
181
+ - Keep last N messages (sharp memory)
182
+ - Compress older messages (compressed context)
183
+ - Total amnesia for very old (to save cost)
184
+ """
185
+ if len(self.messages) <= self.window_size:
186
+ # Within window, no action needed
187
+ return
188
+
189
+ # Messages outside window
190
+ old_messages = self.messages[:-self.window_size]
191
+
192
+ # Keep only window
193
+ self.messages = self.messages[-self.window_size:]
194
+
195
+ # Compress old messages if enabled
196
+ if self.compression_enabled and old_messages:
197
+ new_compression = self._compress_messages(old_messages)
198
+
199
+ # Merge with existing compression
200
+ if self.compressed_history:
201
+ # Combine old and new compression
202
+ combined = f"{self.compressed_history}\n{new_compression}"
203
+ # Compress the compression if it gets too long
204
+ if len(combined) > 1000:
205
+ self.compressed_history = self._compress_messages([
206
+ Message(role="system", content=combined)
207
+ ])
208
+ else:
209
+ self.compressed_history = combined
210
+ else:
211
+ self.compressed_history = new_compression
212
+
213
+ self.stats.compressed_count += len(old_messages)
214
+
215
+ print(f" Sliding window applied: {len(old_messages)} messages compressed")
216
+
217
+ def get_messages(self, include_compression: bool = True) -> List[Dict]:
218
+ """
219
+ Get messages in format for LLM API.
220
+
221
+ Args:
222
+ include_compression: Whether to include compressed history
223
+
224
+ Returns:
225
+ List of message dictionaries
226
+ """
227
+ result = []
228
+
229
+ # Add compressed history as system message
230
+ if include_compression and self.compressed_history:
231
+ result.append({
232
+ "role": "system",
233
+ "content": f"Previous conversation summary: {self.compressed_history}"
234
+ })
235
+
236
+ # Add current window
237
+ for msg in self.messages:
238
+ result.append({
239
+ "role": msg.role,
240
+ "content": msg.content
241
+ })
242
+
243
+ return result
244
+
245
+ def get_token_count(self) -> Dict[str, int]:
246
+ """
247
+ Get token counts for cost estimation.
248
+
249
+ This shows the cost savings from sliding window approach.
250
+ """
251
+ # Current window tokens
252
+ window_tokens = sum(msg.tokens for msg in self.messages)
253
+
254
+ # Compressed history tokens
255
+ compression_tokens = self._estimate_tokens(self.compressed_history or "")
256
+
257
+ # What it would be WITHOUT compression
258
+ naive_tokens = self.stats.total_tokens
259
+
260
+ # Actual tokens with compression
261
+ actual_tokens = window_tokens + compression_tokens
262
+
263
+ return {
264
+ "window_tokens": window_tokens,
265
+ "compression_tokens": compression_tokens,
266
+ "actual_tokens": actual_tokens,
267
+ "naive_tokens": naive_tokens,
268
+ "savings": naive_tokens - actual_tokens,
269
+ "savings_percent": (
270
+ ((naive_tokens - actual_tokens) / naive_tokens * 100)
271
+ if naive_tokens > 0 else 0
272
+ )
273
+ }
274
+
275
+ def clear(self):
276
+ """Clear all memory."""
277
+ self.messages.clear()
278
+ self.compressed_history = None
279
+ self.stats = SessionStats(window_size=self.window_size)
280
+ print("[OK] Session memory cleared")
281
+
282
+ def get_stats(self) -> SessionStats:
283
+ """Get session statistics."""
284
+ return self.stats
285
+
286
+
287
+ # ============================================================================
288
+ # DEMO
289
+ # ============================================================================
290
+
291
+ def demo():
292
+ print("=" * 70)
293
+ print("SESSION MEMORY MANAGER DEMO")
294
+ print("=" * 70)
295
+ print("\nBased on Chapter 3.2: The Goldfish Metaphor")
296
+ print("- Sharp about recent messages")
297
+ print("- Compressed for older context")
298
+ print("- Total amnesia for very old (cost savings!)")
299
+ print("=" * 70)
300
+
301
+ # Initialize with small window for demo
302
+ memory = SessionMemoryManager(
303
+ window_size=6, # Keep last 6 messages
304
+ compression_enabled=True
305
+ )
306
+
307
+ # Simulate a long conversation
308
+ conversation = [
309
+ ("user", "Hi! I need help with my Python code."),
310
+ ("assistant", "Of course! I'd be happy to help. What's the issue?"),
311
+ ("user", "I'm getting a TypeError when I try to concatenate strings."),
312
+ ("assistant", "That usually happens when you try to concatenate a string with a non-string type. Can you show me the code?"),
313
+ ("user", "Sure: result = 'Count: ' + count"),
314
+ ("assistant", "I see the issue! 'count' is probably an integer. You need to convert it: result = 'Count: ' + str(count)"),
315
+ ("user", "That worked! Thanks!"),
316
+ ("assistant", "Great! Is there anything else you need help with?"),
317
+ ("user", "Actually yes, how do I read a CSV file?"),
318
+ ("assistant", "You can use the csv module or pandas. Here's a simple example with csv module..."),
319
+ ("user", "What about using pandas instead?"),
320
+ ("assistant", "With pandas it's even easier: import pandas as pd; df = pd.read_csv('file.csv')"),
321
+ ("user", "Perfect! One more thing - how do I handle errors?"),
322
+ ("assistant", "Use try-except blocks to handle exceptions gracefully..."),
323
+ ]
324
+
325
+ print("\n Simulating 14-message conversation...")
326
+ print(f" Window size: {memory.window_size} messages\n")
327
+
328
+ for i, (role, content) in enumerate(conversation, 1):
329
+ print(f"{i}. {role.upper()}: {content[:50]}...")
330
+
331
+ if role == "user":
332
+ memory.add_user_message(content)
333
+ else:
334
+ memory.add_assistant_message(content)
335
+
336
+ # Show window status every few messages
337
+ if i % 4 == 0:
338
+ print(f"\n [CHART] After {i} messages:")
339
+ print(f" In window: {len(memory.messages)}")
340
+ print(f" Compressed: {memory.stats.compressed_count}")
341
+ if memory.compressed_history:
342
+ print(f" Compression: {len(memory.compressed_history)} chars")
343
+ print()
344
+
345
+ # Show final state
346
+ print("\n" + "="*70)
347
+ print("FINAL MEMORY STATE")
348
+ print("="*70)
349
+
350
+ messages = memory.get_messages()
351
+
352
+ if memory.compressed_history:
353
+ print(f"\n Compressed History:")
354
+ print(f" {memory.compressed_history}\n")
355
+
356
+ print(f" Current Window ({len(memory.messages)} messages):")
357
+ for msg in memory.messages:
358
+ print(f" {msg.role.upper()}: {msg.content[:60]}...")
359
+
360
+ # Token analysis
361
+ print("\n" + "="*70)
362
+ print(" COST ANALYSIS")
363
+ print("="*70)
364
+
365
+ tokens = memory.get_token_count()
366
+
367
+ print(f"\nToken Counts:")
368
+ print(f" Window tokens: {tokens['window_tokens']:,}")
369
+ print(f" Compression tokens: {tokens['compression_tokens']:,}")
370
+ print(f" Actual total: {tokens['actual_tokens']:,}")
371
+ print()
372
+ print(f"Without Compression:")
373
+ print(f" Naive total: {tokens['naive_tokens']:,}")
374
+ print()
375
+ print(f" Savings:")
376
+ print(f" Tokens saved: {tokens['savings']:,}")
377
+ print(f" Percentage: {tokens['savings_percent']:.1f}%")
378
+
379
+ # Cost estimate
380
+ cost_per_1k = 0.0001 # Rough estimate
381
+ actual_cost = (tokens['actual_tokens'] / 1000) * cost_per_1k
382
+ naive_cost = (tokens['naive_tokens'] / 1000) * cost_per_1k
383
+
384
+ print(f"\nCost Estimate (at ${cost_per_1k:.4f} per 1K tokens):")
385
+ print(f" With sliding window: ${actual_cost:.6f} per query")
386
+ print(f" Without compression: ${naive_cost:.6f} per query")
387
+ print(f" Savings: ${naive_cost - actual_cost:.6f} per query")
388
+
389
+ # Scale to many queries
390
+ queries_per_day = 1000
391
+ print(f"\nFor {queries_per_day:,} queries/day (30 days):")
392
+ print(f" With sliding window: ${actual_cost * queries_per_day * 30:.2f}/month")
393
+ print(f" Without: ${naive_cost * queries_per_day * 30:.2f}/month")
394
+ print(f" Total savings: ${(naive_cost - actual_cost) * queries_per_day * 30:.2f}/month")
395
+
396
+ print("\n" + "="*70)
397
+ print("KEY INSIGHTS FROM CHAPTER 3.2")
398
+ print("="*70)
399
+ print("""
400
+ 1. GOLDFISH MEMORY STRATEGY
401
+ - Keep recent messages (sharp memory)
402
+ - Compress older context
403
+ - Forget very old (cost savings)
404
+
405
+ 2. COST CONTROL
406
+ - Fixed cost per conversation
407
+ - No exponential growth
408
+ - Predictable budgeting
409
+
410
+ 3. QUALITY MAINTAINED
411
+ - Important context preserved via compression
412
+ - Recent messages at full detail
413
+ - Good balance of context vs. cost
414
+
415
+ 4. SCALABILITY
416
+ - Window size adjustable
417
+ - Compression depth configurable
418
+ - Works for short or long conversations
419
+ """)
420
+
421
+
422
+ if __name__ == "__main__":
423
+ demo()