kite-agent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kite/__init__.py +46 -0
- kite/ab_testing.py +384 -0
- kite/agent.py +556 -0
- kite/agents/__init__.py +3 -0
- kite/agents/plan_execute.py +191 -0
- kite/agents/react_agent.py +509 -0
- kite/agents/reflective_agent.py +90 -0
- kite/agents/rewoo.py +119 -0
- kite/agents/tot.py +151 -0
- kite/conversation.py +125 -0
- kite/core.py +974 -0
- kite/data_loaders.py +111 -0
- kite/embedding_providers.py +372 -0
- kite/llm_providers.py +1278 -0
- kite/memory/__init__.py +6 -0
- kite/memory/advanced_rag.py +333 -0
- kite/memory/graph_rag.py +719 -0
- kite/memory/session_memory.py +423 -0
- kite/memory/vector_memory.py +579 -0
- kite/monitoring.py +611 -0
- kite/observers.py +107 -0
- kite/optimization/__init__.py +9 -0
- kite/optimization/resource_router.py +80 -0
- kite/persistence.py +42 -0
- kite/pipeline/__init__.py +5 -0
- kite/pipeline/deterministic_pipeline.py +323 -0
- kite/pipeline/reactive_pipeline.py +171 -0
- kite/pipeline_manager.py +15 -0
- kite/routing/__init__.py +6 -0
- kite/routing/aggregator_router.py +325 -0
- kite/routing/llm_router.py +149 -0
- kite/routing/semantic_router.py +228 -0
- kite/safety/__init__.py +6 -0
- kite/safety/circuit_breaker.py +360 -0
- kite/safety/guardrails.py +82 -0
- kite/safety/idempotency_manager.py +304 -0
- kite/safety/kill_switch.py +75 -0
- kite/tool.py +183 -0
- kite/tool_registry.py +87 -0
- kite/tools/__init__.py +21 -0
- kite/tools/code_execution.py +53 -0
- kite/tools/contrib/__init__.py +19 -0
- kite/tools/contrib/calculator.py +26 -0
- kite/tools/contrib/datetime_utils.py +20 -0
- kite/tools/contrib/linkedin.py +428 -0
- kite/tools/contrib/web_search.py +30 -0
- kite/tools/mcp/__init__.py +31 -0
- kite/tools/mcp/database_mcp.py +267 -0
- kite/tools/mcp/gdrive_mcp_server.py +503 -0
- kite/tools/mcp/gmail_mcp_server.py +601 -0
- kite/tools/mcp/postgres_mcp_server.py +490 -0
- kite/tools/mcp/slack_mcp_server.py +538 -0
- kite/tools/mcp/stripe_mcp_server.py +219 -0
- kite/tools/search.py +90 -0
- kite/tools/system_tools.py +54 -0
- kite/tools_manager.py +27 -0
- kite_agent-0.1.0.dist-info/METADATA +621 -0
- kite_agent-0.1.0.dist-info/RECORD +61 -0
- kite_agent-0.1.0.dist-info/WHEEL +5 -0
- kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
- kite_agent-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session Memory Manager
|
|
3
|
+
Based on Chapter 3.2: Short-Term Memory - Managing the "Now"
|
|
4
|
+
|
|
5
|
+
The Goldfish Metaphor from book:
|
|
6
|
+
- Sharp about 2 minutes ago
|
|
7
|
+
- Total amnesia about 5 minutes ago
|
|
8
|
+
|
|
9
|
+
Strategy: Sliding window with compression to avoid exponential cost growth.
|
|
10
|
+
|
|
11
|
+
Run: python session_memory.py
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from typing import List, Dict, Optional
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from dotenv import load_dotenv
|
|
19
|
+
|
|
20
|
+
load_dotenv()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Message:
|
|
25
|
+
"""A conversation message."""
|
|
26
|
+
role: str # 'user' or 'assistant'
|
|
27
|
+
content: str
|
|
28
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
29
|
+
tokens: int = 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class SessionStats:
|
|
34
|
+
"""Statistics for a session."""
|
|
35
|
+
total_messages: int = 0
|
|
36
|
+
total_tokens: int = 0
|
|
37
|
+
compressed_count: int = 0
|
|
38
|
+
window_size: int = 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SessionMemory:
|
|
42
|
+
"""
|
|
43
|
+
Sliding window memory manager for conversations.
|
|
44
|
+
|
|
45
|
+
Features from Chapter 3.2:
|
|
46
|
+
- Keep last N messages (configurable window)
|
|
47
|
+
- Compress older messages for context
|
|
48
|
+
- Fixed cost per conversation
|
|
49
|
+
- No exponential growth
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
memory = SessionMemoryManager(window_size=10)
|
|
53
|
+
|
|
54
|
+
# Add messages
|
|
55
|
+
memory.add_user_message("Hello")
|
|
56
|
+
memory.add_assistant_message("Hi! How can I help?")
|
|
57
|
+
|
|
58
|
+
# Get messages for LLM
|
|
59
|
+
messages = memory.get_messages()
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
llm = None,
|
|
65
|
+
window_size: int = 10,
|
|
66
|
+
compression_enabled: bool = True,
|
|
67
|
+
max_tokens_per_message: int = 500
|
|
68
|
+
):
|
|
69
|
+
"""
|
|
70
|
+
Initialize session memory.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
window_size: Number of recent messages to keep
|
|
74
|
+
compression_enabled: Whether to compress old messages
|
|
75
|
+
max_tokens_per_message: Soft limit for message length
|
|
76
|
+
"""
|
|
77
|
+
self.llm = llm
|
|
78
|
+
self.window_size = window_size
|
|
79
|
+
self.compression_enabled = compression_enabled
|
|
80
|
+
self.max_tokens_per_message = max_tokens_per_message
|
|
81
|
+
|
|
82
|
+
# Message storage
|
|
83
|
+
self.messages: List[Message] = []
|
|
84
|
+
|
|
85
|
+
# Compressed history (for context)
|
|
86
|
+
self.compressed_history: Optional[str] = None
|
|
87
|
+
|
|
88
|
+
# Statistics
|
|
89
|
+
self.stats = SessionStats(window_size=window_size)
|
|
90
|
+
|
|
91
|
+
print(f"[OK] Session Memory initialized")
|
|
92
|
+
print(f" Window size: {window_size} messages")
|
|
93
|
+
print(f" Compression: {'enabled' if compression_enabled else 'disabled'}")
|
|
94
|
+
|
|
95
|
+
def _estimate_tokens(self, text: str) -> int:
|
|
96
|
+
"""
|
|
97
|
+
Estimate token count.
|
|
98
|
+
|
|
99
|
+
Rough estimation: 1 token 4 characters in English.
|
|
100
|
+
For production, use tiktoken library.
|
|
101
|
+
"""
|
|
102
|
+
return len(text) // 4
|
|
103
|
+
|
|
104
|
+
def _compress_messages(self, messages: List[Message]) -> str:
|
|
105
|
+
"""
|
|
106
|
+
Compress messages into summary.
|
|
107
|
+
|
|
108
|
+
Uses LLM to create concise summary of conversation history.
|
|
109
|
+
This is much cheaper than keeping all messages.
|
|
110
|
+
"""
|
|
111
|
+
if not messages:
|
|
112
|
+
return ""
|
|
113
|
+
|
|
114
|
+
# Build text from messages
|
|
115
|
+
conversation = "\n".join([
|
|
116
|
+
f"{msg.role.upper()}: {msg.content}"
|
|
117
|
+
for msg in messages
|
|
118
|
+
])
|
|
119
|
+
|
|
120
|
+
# Ask LLM to summarize
|
|
121
|
+
prompt = f"""Summarize this conversation history in 2-3 sentences. Focus on key topics and decisions.
|
|
122
|
+
|
|
123
|
+
Conversation:
|
|
124
|
+
{conversation}
|
|
125
|
+
|
|
126
|
+
Summary:"""
|
|
127
|
+
|
|
128
|
+
if self.llm:
|
|
129
|
+
response = self.llm.complete(prompt, max_tokens=150, temperature=0.3)
|
|
130
|
+
summary = response
|
|
131
|
+
else:
|
|
132
|
+
# Fallback for demo/testing if no LLM provided
|
|
133
|
+
summary = "Summary unavailable (No LLM provider)"
|
|
134
|
+
|
|
135
|
+
print(f" Compressed {len(messages)} messages {len(summary)} chars")
|
|
136
|
+
|
|
137
|
+
return summary
|
|
138
|
+
|
|
139
|
+
def add_message(self, role: str, content: str, session_id: Optional[str] = None):
|
|
140
|
+
"""Add a message to memory with a specific role."""
|
|
141
|
+
if role.lower() == "user":
|
|
142
|
+
self.add_user_message(content)
|
|
143
|
+
else:
|
|
144
|
+
self.add_assistant_message(content)
|
|
145
|
+
|
|
146
|
+
def add_user_message(self, content: str):
|
|
147
|
+
"""Add user message to memory."""
|
|
148
|
+
message = Message(
|
|
149
|
+
role="user",
|
|
150
|
+
content=content,
|
|
151
|
+
tokens=self._estimate_tokens(content)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
self.messages.append(message)
|
|
155
|
+
self.stats.total_messages += 1
|
|
156
|
+
self.stats.total_tokens += message.tokens
|
|
157
|
+
|
|
158
|
+
# Apply sliding window
|
|
159
|
+
self._apply_sliding_window()
|
|
160
|
+
|
|
161
|
+
def add_assistant_message(self, content: str):
|
|
162
|
+
"""Add assistant message to memory."""
|
|
163
|
+
message = Message(
|
|
164
|
+
role="assistant",
|
|
165
|
+
content=content,
|
|
166
|
+
tokens=self._estimate_tokens(content)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
self.messages.append(message)
|
|
170
|
+
self.stats.total_messages += 1
|
|
171
|
+
self.stats.total_tokens += message.tokens
|
|
172
|
+
|
|
173
|
+
# Apply sliding window
|
|
174
|
+
self._apply_sliding_window()
|
|
175
|
+
|
|
176
|
+
def _apply_sliding_window(self):
|
|
177
|
+
"""
|
|
178
|
+
Apply sliding window logic.
|
|
179
|
+
|
|
180
|
+
From Chapter 3.2:
|
|
181
|
+
- Keep last N messages (sharp memory)
|
|
182
|
+
- Compress older messages (compressed context)
|
|
183
|
+
- Total amnesia for very old (to save cost)
|
|
184
|
+
"""
|
|
185
|
+
if len(self.messages) <= self.window_size:
|
|
186
|
+
# Within window, no action needed
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
# Messages outside window
|
|
190
|
+
old_messages = self.messages[:-self.window_size]
|
|
191
|
+
|
|
192
|
+
# Keep only window
|
|
193
|
+
self.messages = self.messages[-self.window_size:]
|
|
194
|
+
|
|
195
|
+
# Compress old messages if enabled
|
|
196
|
+
if self.compression_enabled and old_messages:
|
|
197
|
+
new_compression = self._compress_messages(old_messages)
|
|
198
|
+
|
|
199
|
+
# Merge with existing compression
|
|
200
|
+
if self.compressed_history:
|
|
201
|
+
# Combine old and new compression
|
|
202
|
+
combined = f"{self.compressed_history}\n{new_compression}"
|
|
203
|
+
# Compress the compression if it gets too long
|
|
204
|
+
if len(combined) > 1000:
|
|
205
|
+
self.compressed_history = self._compress_messages([
|
|
206
|
+
Message(role="system", content=combined)
|
|
207
|
+
])
|
|
208
|
+
else:
|
|
209
|
+
self.compressed_history = combined
|
|
210
|
+
else:
|
|
211
|
+
self.compressed_history = new_compression
|
|
212
|
+
|
|
213
|
+
self.stats.compressed_count += len(old_messages)
|
|
214
|
+
|
|
215
|
+
print(f" Sliding window applied: {len(old_messages)} messages compressed")
|
|
216
|
+
|
|
217
|
+
def get_messages(self, include_compression: bool = True) -> List[Dict]:
|
|
218
|
+
"""
|
|
219
|
+
Get messages in format for LLM API.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
include_compression: Whether to include compressed history
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
List of message dictionaries
|
|
226
|
+
"""
|
|
227
|
+
result = []
|
|
228
|
+
|
|
229
|
+
# Add compressed history as system message
|
|
230
|
+
if include_compression and self.compressed_history:
|
|
231
|
+
result.append({
|
|
232
|
+
"role": "system",
|
|
233
|
+
"content": f"Previous conversation summary: {self.compressed_history}"
|
|
234
|
+
})
|
|
235
|
+
|
|
236
|
+
# Add current window
|
|
237
|
+
for msg in self.messages:
|
|
238
|
+
result.append({
|
|
239
|
+
"role": msg.role,
|
|
240
|
+
"content": msg.content
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
return result
|
|
244
|
+
|
|
245
|
+
def get_token_count(self) -> Dict[str, int]:
|
|
246
|
+
"""
|
|
247
|
+
Get token counts for cost estimation.
|
|
248
|
+
|
|
249
|
+
This shows the cost savings from sliding window approach.
|
|
250
|
+
"""
|
|
251
|
+
# Current window tokens
|
|
252
|
+
window_tokens = sum(msg.tokens for msg in self.messages)
|
|
253
|
+
|
|
254
|
+
# Compressed history tokens
|
|
255
|
+
compression_tokens = self._estimate_tokens(self.compressed_history or "")
|
|
256
|
+
|
|
257
|
+
# What it would be WITHOUT compression
|
|
258
|
+
naive_tokens = self.stats.total_tokens
|
|
259
|
+
|
|
260
|
+
# Actual tokens with compression
|
|
261
|
+
actual_tokens = window_tokens + compression_tokens
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
"window_tokens": window_tokens,
|
|
265
|
+
"compression_tokens": compression_tokens,
|
|
266
|
+
"actual_tokens": actual_tokens,
|
|
267
|
+
"naive_tokens": naive_tokens,
|
|
268
|
+
"savings": naive_tokens - actual_tokens,
|
|
269
|
+
"savings_percent": (
|
|
270
|
+
((naive_tokens - actual_tokens) / naive_tokens * 100)
|
|
271
|
+
if naive_tokens > 0 else 0
|
|
272
|
+
)
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
def clear(self):
|
|
276
|
+
"""Clear all memory."""
|
|
277
|
+
self.messages.clear()
|
|
278
|
+
self.compressed_history = None
|
|
279
|
+
self.stats = SessionStats(window_size=self.window_size)
|
|
280
|
+
print("[OK] Session memory cleared")
|
|
281
|
+
|
|
282
|
+
def get_stats(self) -> SessionStats:
|
|
283
|
+
"""Get session statistics."""
|
|
284
|
+
return self.stats
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# ============================================================================
|
|
288
|
+
# DEMO
|
|
289
|
+
# ============================================================================
|
|
290
|
+
|
|
291
|
+
def demo():
|
|
292
|
+
print("=" * 70)
|
|
293
|
+
print("SESSION MEMORY MANAGER DEMO")
|
|
294
|
+
print("=" * 70)
|
|
295
|
+
print("\nBased on Chapter 3.2: The Goldfish Metaphor")
|
|
296
|
+
print("- Sharp about recent messages")
|
|
297
|
+
print("- Compressed for older context")
|
|
298
|
+
print("- Total amnesia for very old (cost savings!)")
|
|
299
|
+
print("=" * 70)
|
|
300
|
+
|
|
301
|
+
# Initialize with small window for demo
|
|
302
|
+
memory = SessionMemoryManager(
|
|
303
|
+
window_size=6, # Keep last 6 messages
|
|
304
|
+
compression_enabled=True
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Simulate a long conversation
|
|
308
|
+
conversation = [
|
|
309
|
+
("user", "Hi! I need help with my Python code."),
|
|
310
|
+
("assistant", "Of course! I'd be happy to help. What's the issue?"),
|
|
311
|
+
("user", "I'm getting a TypeError when I try to concatenate strings."),
|
|
312
|
+
("assistant", "That usually happens when you try to concatenate a string with a non-string type. Can you show me the code?"),
|
|
313
|
+
("user", "Sure: result = 'Count: ' + count"),
|
|
314
|
+
("assistant", "I see the issue! 'count' is probably an integer. You need to convert it: result = 'Count: ' + str(count)"),
|
|
315
|
+
("user", "That worked! Thanks!"),
|
|
316
|
+
("assistant", "Great! Is there anything else you need help with?"),
|
|
317
|
+
("user", "Actually yes, how do I read a CSV file?"),
|
|
318
|
+
("assistant", "You can use the csv module or pandas. Here's a simple example with csv module..."),
|
|
319
|
+
("user", "What about using pandas instead?"),
|
|
320
|
+
("assistant", "With pandas it's even easier: import pandas as pd; df = pd.read_csv('file.csv')"),
|
|
321
|
+
("user", "Perfect! One more thing - how do I handle errors?"),
|
|
322
|
+
("assistant", "Use try-except blocks to handle exceptions gracefully..."),
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
print("\n Simulating 14-message conversation...")
|
|
326
|
+
print(f" Window size: {memory.window_size} messages\n")
|
|
327
|
+
|
|
328
|
+
for i, (role, content) in enumerate(conversation, 1):
|
|
329
|
+
print(f"{i}. {role.upper()}: {content[:50]}...")
|
|
330
|
+
|
|
331
|
+
if role == "user":
|
|
332
|
+
memory.add_user_message(content)
|
|
333
|
+
else:
|
|
334
|
+
memory.add_assistant_message(content)
|
|
335
|
+
|
|
336
|
+
# Show window status every few messages
|
|
337
|
+
if i % 4 == 0:
|
|
338
|
+
print(f"\n [CHART] After {i} messages:")
|
|
339
|
+
print(f" In window: {len(memory.messages)}")
|
|
340
|
+
print(f" Compressed: {memory.stats.compressed_count}")
|
|
341
|
+
if memory.compressed_history:
|
|
342
|
+
print(f" Compression: {len(memory.compressed_history)} chars")
|
|
343
|
+
print()
|
|
344
|
+
|
|
345
|
+
# Show final state
|
|
346
|
+
print("\n" + "="*70)
|
|
347
|
+
print("FINAL MEMORY STATE")
|
|
348
|
+
print("="*70)
|
|
349
|
+
|
|
350
|
+
messages = memory.get_messages()
|
|
351
|
+
|
|
352
|
+
if memory.compressed_history:
|
|
353
|
+
print(f"\n Compressed History:")
|
|
354
|
+
print(f" {memory.compressed_history}\n")
|
|
355
|
+
|
|
356
|
+
print(f" Current Window ({len(memory.messages)} messages):")
|
|
357
|
+
for msg in memory.messages:
|
|
358
|
+
print(f" {msg.role.upper()}: {msg.content[:60]}...")
|
|
359
|
+
|
|
360
|
+
# Token analysis
|
|
361
|
+
print("\n" + "="*70)
|
|
362
|
+
print(" COST ANALYSIS")
|
|
363
|
+
print("="*70)
|
|
364
|
+
|
|
365
|
+
tokens = memory.get_token_count()
|
|
366
|
+
|
|
367
|
+
print(f"\nToken Counts:")
|
|
368
|
+
print(f" Window tokens: {tokens['window_tokens']:,}")
|
|
369
|
+
print(f" Compression tokens: {tokens['compression_tokens']:,}")
|
|
370
|
+
print(f" Actual total: {tokens['actual_tokens']:,}")
|
|
371
|
+
print()
|
|
372
|
+
print(f"Without Compression:")
|
|
373
|
+
print(f" Naive total: {tokens['naive_tokens']:,}")
|
|
374
|
+
print()
|
|
375
|
+
print(f" Savings:")
|
|
376
|
+
print(f" Tokens saved: {tokens['savings']:,}")
|
|
377
|
+
print(f" Percentage: {tokens['savings_percent']:.1f}%")
|
|
378
|
+
|
|
379
|
+
# Cost estimate
|
|
380
|
+
cost_per_1k = 0.0001 # Rough estimate
|
|
381
|
+
actual_cost = (tokens['actual_tokens'] / 1000) * cost_per_1k
|
|
382
|
+
naive_cost = (tokens['naive_tokens'] / 1000) * cost_per_1k
|
|
383
|
+
|
|
384
|
+
print(f"\nCost Estimate (at ${cost_per_1k:.4f} per 1K tokens):")
|
|
385
|
+
print(f" With sliding window: ${actual_cost:.6f} per query")
|
|
386
|
+
print(f" Without compression: ${naive_cost:.6f} per query")
|
|
387
|
+
print(f" Savings: ${naive_cost - actual_cost:.6f} per query")
|
|
388
|
+
|
|
389
|
+
# Scale to many queries
|
|
390
|
+
queries_per_day = 1000
|
|
391
|
+
print(f"\nFor {queries_per_day:,} queries/day (30 days):")
|
|
392
|
+
print(f" With sliding window: ${actual_cost * queries_per_day * 30:.2f}/month")
|
|
393
|
+
print(f" Without: ${naive_cost * queries_per_day * 30:.2f}/month")
|
|
394
|
+
print(f" Total savings: ${(naive_cost - actual_cost) * queries_per_day * 30:.2f}/month")
|
|
395
|
+
|
|
396
|
+
print("\n" + "="*70)
|
|
397
|
+
print("KEY INSIGHTS FROM CHAPTER 3.2")
|
|
398
|
+
print("="*70)
|
|
399
|
+
print("""
|
|
400
|
+
1. GOLDFISH MEMORY STRATEGY
|
|
401
|
+
- Keep recent messages (sharp memory)
|
|
402
|
+
- Compress older context
|
|
403
|
+
- Forget very old (cost savings)
|
|
404
|
+
|
|
405
|
+
2. COST CONTROL
|
|
406
|
+
- Fixed cost per conversation
|
|
407
|
+
- No exponential growth
|
|
408
|
+
- Predictable budgeting
|
|
409
|
+
|
|
410
|
+
3. QUALITY MAINTAINED
|
|
411
|
+
- Important context preserved via compression
|
|
412
|
+
- Recent messages at full detail
|
|
413
|
+
- Good balance of context vs. cost
|
|
414
|
+
|
|
415
|
+
4. SCALABILITY
|
|
416
|
+
- Window size adjustable
|
|
417
|
+
- Compression depth configurable
|
|
418
|
+
- Works for short or long conversations
|
|
419
|
+
""")
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
if __name__ == "__main__":
|
|
423
|
+
demo()
|