kite-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. kite/__init__.py +46 -0
  2. kite/ab_testing.py +384 -0
  3. kite/agent.py +556 -0
  4. kite/agents/__init__.py +3 -0
  5. kite/agents/plan_execute.py +191 -0
  6. kite/agents/react_agent.py +509 -0
  7. kite/agents/reflective_agent.py +90 -0
  8. kite/agents/rewoo.py +119 -0
  9. kite/agents/tot.py +151 -0
  10. kite/conversation.py +125 -0
  11. kite/core.py +974 -0
  12. kite/data_loaders.py +111 -0
  13. kite/embedding_providers.py +372 -0
  14. kite/llm_providers.py +1278 -0
  15. kite/memory/__init__.py +6 -0
  16. kite/memory/advanced_rag.py +333 -0
  17. kite/memory/graph_rag.py +719 -0
  18. kite/memory/session_memory.py +423 -0
  19. kite/memory/vector_memory.py +579 -0
  20. kite/monitoring.py +611 -0
  21. kite/observers.py +107 -0
  22. kite/optimization/__init__.py +9 -0
  23. kite/optimization/resource_router.py +80 -0
  24. kite/persistence.py +42 -0
  25. kite/pipeline/__init__.py +5 -0
  26. kite/pipeline/deterministic_pipeline.py +323 -0
  27. kite/pipeline/reactive_pipeline.py +171 -0
  28. kite/pipeline_manager.py +15 -0
  29. kite/routing/__init__.py +6 -0
  30. kite/routing/aggregator_router.py +325 -0
  31. kite/routing/llm_router.py +149 -0
  32. kite/routing/semantic_router.py +228 -0
  33. kite/safety/__init__.py +6 -0
  34. kite/safety/circuit_breaker.py +360 -0
  35. kite/safety/guardrails.py +82 -0
  36. kite/safety/idempotency_manager.py +304 -0
  37. kite/safety/kill_switch.py +75 -0
  38. kite/tool.py +183 -0
  39. kite/tool_registry.py +87 -0
  40. kite/tools/__init__.py +21 -0
  41. kite/tools/code_execution.py +53 -0
  42. kite/tools/contrib/__init__.py +19 -0
  43. kite/tools/contrib/calculator.py +26 -0
  44. kite/tools/contrib/datetime_utils.py +20 -0
  45. kite/tools/contrib/linkedin.py +428 -0
  46. kite/tools/contrib/web_search.py +30 -0
  47. kite/tools/mcp/__init__.py +31 -0
  48. kite/tools/mcp/database_mcp.py +267 -0
  49. kite/tools/mcp/gdrive_mcp_server.py +503 -0
  50. kite/tools/mcp/gmail_mcp_server.py +601 -0
  51. kite/tools/mcp/postgres_mcp_server.py +490 -0
  52. kite/tools/mcp/slack_mcp_server.py +538 -0
  53. kite/tools/mcp/stripe_mcp_server.py +219 -0
  54. kite/tools/search.py +90 -0
  55. kite/tools/system_tools.py +54 -0
  56. kite/tools_manager.py +27 -0
  57. kite_agent-0.1.0.dist-info/METADATA +621 -0
  58. kite_agent-0.1.0.dist-info/RECORD +61 -0
  59. kite_agent-0.1.0.dist-info/WHEEL +5 -0
  60. kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
  61. kite_agent-0.1.0.dist-info/top_level.txt +1 -0
kite/monitoring.py ADDED
@@ -0,0 +1,611 @@
1
+ """
2
+ Monitoring & Observability System
3
+ Complete production monitoring with metrics, tracing, and alerting.
4
+ """
5
+
6
+ import time
7
+ import logging
8
+ import threading
9
+ from datetime import datetime
10
+ from typing import Dict, Any, Optional, Callable, List
11
+ from functools import wraps
12
+ from collections import defaultdict, deque
13
+ import threading
14
+
15
+ try:
16
+ from prometheus_client import (
17
+ Counter, Histogram, Gauge, Info,
18
+ start_http_server, generate_latest
19
+ )
20
+ PROMETHEUS_AVAILABLE = True
21
+ except ImportError:
22
+ PROMETHEUS_AVAILABLE = False
23
+
24
+ # Cost per 1k tokens (Input, Output)
25
+ MODEL_COSTS = {
26
+ "gpt-4": (0.03, 0.06),
27
+ "gpt-3.5-turbo": (0.0005, 0.0015),
28
+ "claude-3-opus": (0.015, 0.075),
29
+ "claude-3-sonnet": (0.003, 0.015),
30
+ "claude-3-haiku": (0.00025, 0.00125),
31
+ "deepseek-r1:14b": (0.0, 0.0), # Local
32
+ "ollama": (0.0, 0.0),
33
+ "default": (0.0, 0.0)
34
+ }
35
+
36
+ class MetricsCollector:
37
+ """
38
+ Production-grade metrics collection.
39
+
40
+ Tracks:
41
+ - Request counts
42
+ - Latencies
43
+ - Error rates
44
+ - Resource usage
45
+ - Custom metrics
46
+ """
47
+
48
+ def __init__(self, enable_prometheus: bool = True):
49
+ self.enable_prometheus = enable_prometheus and PROMETHEUS_AVAILABLE
50
+
51
+ # In-memory metrics
52
+ self.metrics = defaultdict(lambda: {
53
+ 'count': 0,
54
+ 'errors': 0,
55
+ 'total_latency': 0,
56
+ 'min_latency': float('inf'),
57
+ 'max_latency': 0,
58
+ 'max_latency': 0,
59
+ 'outcomes': defaultdict(int),
60
+ 'tokens_in': 0,
61
+ 'tokens_out': 0,
62
+ 'cost': 0.0
63
+ })
64
+
65
+ self.lock = threading.RLock()
66
+
67
+ # History for Dashboard
68
+ self.request_history = deque(maxlen=1000)
69
+ self.error_logs = deque(maxlen=1000)
70
+ self.max_history = 1000
71
+
72
+ # Prometheus metrics
73
+ if self.enable_prometheus:
74
+ self._init_prometheus()
75
+
76
+ def _init_prometheus(self):
77
+ """Initialize Prometheus metrics."""
78
+ # Request metrics
79
+ self.request_counter = Counter(
80
+ 'kite_requests_total',
81
+ 'Total requests',
82
+ ['component', 'operation']
83
+ )
84
+
85
+ self.request_latency = Histogram(
86
+ 'kite_request_latency_seconds',
87
+ 'Request latency',
88
+ ['component', 'operation']
89
+ )
90
+
91
+ self.error_counter = Counter(
92
+ 'kite_errors_total',
93
+ 'Total errors',
94
+ ['component', 'operation', 'error_type']
95
+ )
96
+
97
+ # Resource metrics
98
+ self.active_requests = Gauge(
99
+ 'kite_active_requests',
100
+ 'Active requests',
101
+ ['component']
102
+ )
103
+
104
+ # LLM metrics
105
+ self.llm_tokens = Counter(
106
+ 'kite_llm_tokens_total',
107
+ 'Total LLM tokens',
108
+ ['provider', 'model', 'type']
109
+ )
110
+
111
+ self.llm_cost = Counter(
112
+ 'kite_llm_cost_usd',
113
+ 'Total LLM cost in USD',
114
+ ['provider', 'model']
115
+ )
116
+
117
+ # Memory metrics
118
+ self.memory_operations = Counter(
119
+ 'kite_memory_operations_total',
120
+ 'Memory operations',
121
+ ['type', 'operation']
122
+ )
123
+
124
+ # Circuit breaker metrics
125
+ self.circuit_breaker_state = Gauge(
126
+ 'kite_circuit_breaker_state',
127
+ 'Circuit breaker state (0=closed, 1=open)',
128
+ ['component']
129
+ )
130
+
131
+ # Info
132
+ self.info = Info(
133
+ 'kite',
134
+ 'Framework information'
135
+ )
136
+ self.info.info({
137
+ 'version': '1.0.0',
138
+ 'python_version': '3.11'
139
+ })
140
+
141
+ def record_request(self, component: str, operation: str,
142
+ latency: float, success: bool = True,
143
+ error_type: Optional[str] = None):
144
+ """Record a component request with timing and status."""
145
+ key = f"{component}.{operation}"
146
+ with self.lock:
147
+ # Update metrics
148
+ data = self.metrics[key]
149
+ data['count'] += 1
150
+ data['total_latency'] += latency
151
+ data['min_latency'] = min(data['min_latency'], latency)
152
+ data['max_latency'] = max(data['max_latency'], latency)
153
+
154
+ if not success:
155
+ data['errors'] += 1
156
+
157
+ # Update history
158
+ self.request_history.append({
159
+ 'timestamp': datetime.now(),
160
+ 'component': component,
161
+ 'operation': operation,
162
+ 'latency': latency,
163
+ 'success': success,
164
+ 'error_type': error_type
165
+ })
166
+
167
+ if not success and error_type:
168
+ self.error_logs.append({
169
+ 'timestamp': datetime.now(),
170
+ 'component': component,
171
+ 'error': error_type
172
+ })
173
+
174
+ # History
175
+ with self.lock:
176
+ self.request_history.append({
177
+ 'component': component,
178
+ 'operation': operation,
179
+ 'latency': latency,
180
+ 'success': success,
181
+ 'error_type': error_type,
182
+ 'timestamp': time.time()
183
+ })
184
+ if len(self.request_history) > self.max_history:
185
+ self.request_history.pop(0)
186
+
187
+ if not success:
188
+ self.error_logs.append({
189
+ 'component': component,
190
+ 'operation': operation,
191
+ 'error': error_type,
192
+ 'timestamp': time.time()
193
+ })
194
+ if len(self.error_logs) > self.max_history:
195
+ self.error_logs.pop(0)
196
+
197
+ # Prometheus
198
+ if self.enable_prometheus:
199
+ self.request_counter.labels(
200
+ component=component,
201
+ operation=operation
202
+ ).inc()
203
+
204
+ self.request_latency.labels(
205
+ component=component,
206
+ operation=operation
207
+ ).observe(latency)
208
+
209
+ if not success:
210
+ self.error_counter.labels(
211
+ component=component,
212
+ operation=operation,
213
+ error_type=error_type or 'unknown'
214
+ ).inc()
215
+
216
+ def record_outcome(self, component: str, outcome_type: str):
217
+ """Record a domain-specific outcome for a component."""
218
+ with self.lock:
219
+ self.metrics[component]['outcomes'][outcome_type] += 1
220
+
221
+ def record_llm_usage(self, provider: str, model: str,
222
+ prompt_tokens: int, completion_tokens: int,
223
+ cost: float = 0):
224
+ """Record LLM usage."""
225
+ if self.enable_prometheus:
226
+ self.llm_tokens.labels(
227
+ provider=provider,
228
+ model=model,
229
+ type='prompt'
230
+ ).inc(prompt_tokens)
231
+
232
+ self.llm_tokens.labels(
233
+ provider=provider,
234
+ model=model,
235
+ type='completion'
236
+ ).inc(completion_tokens)
237
+
238
+ if cost > 0:
239
+ self.llm_cost.labels(
240
+ provider=provider,
241
+ model=model
242
+ ).inc(cost)
243
+
244
+ # In-memory tracking
245
+ key = f"llm_usage.{model}"
246
+ with self.lock:
247
+ data = self.metrics[key]
248
+ data['count'] += 1
249
+ data['tokens_in'] += prompt_tokens
250
+ data['tokens_out'] += completion_tokens
251
+
252
+ # Calculate cost if not provided
253
+ if cost == 0:
254
+ rates = MODEL_COSTS.get(model, MODEL_COSTS.get(model.split(':')[0], MODEL_COSTS['default']))
255
+ estimated_cost = (prompt_tokens / 1000 * rates[0]) + (completion_tokens / 1000 * rates[1])
256
+ data['cost'] += estimated_cost
257
+ else:
258
+ data['cost'] += cost
259
+
260
+ def record_memory_operation(self, mem_type: str, operation: str):
261
+ """Record memory operation."""
262
+ if self.enable_prometheus:
263
+ self.memory_operations.labels(
264
+ type=mem_type,
265
+ operation=operation
266
+ ).inc()
267
+
268
+ def set_circuit_breaker_state(self, component: str, is_open: bool):
269
+ """Set circuit breaker state."""
270
+ if self.enable_prometheus:
271
+ self.circuit_breaker_state.labels(
272
+ component=component
273
+ ).set(1 if is_open else 0)
274
+
275
+ def get_metrics(self) -> Dict:
276
+ """Get current metrics snapshot."""
277
+ with self.lock:
278
+ return dict(self.metrics)
279
+
280
+ def start_server(self, port: int = 9090):
281
+ """Start Prometheus metrics server."""
282
+ if self.enable_prometheus:
283
+ start_http_server(port)
284
+ logging.info(f"Metrics server started on port {port}")
285
+
286
+ def get_history(self) -> list:
287
+ """Get recent request history."""
288
+ with self.lock:
289
+ return list(self.request_history)
290
+
291
+ def get_error_logs(self) -> list:
292
+ """Get recent error logs."""
293
+ with self.lock:
294
+ return list(self.error_logs)
295
+
296
+ def get_summary(self) -> Dict:
297
+ """Get high-level system summary."""
298
+ history = self.get_history()
299
+ total_requests = len(self.request_history)
300
+ total_errors = len(self.error_logs)
301
+
302
+ if not history:
303
+ return {
304
+ "status": "idle",
305
+ "success_rate": 1.0,
306
+ "avg_latency": 0.0,
307
+ "total_requests": total_requests,
308
+ "total_errors": total_errors
309
+ }
310
+
311
+ recent = history[-100:]
312
+ success_rate = sum(1 for r in recent if r['success']) / len(recent) if recent else 0
313
+ avg_latency = sum(r['latency'] for r in recent) / len(recent) if recent else 0
314
+
315
+ return {
316
+ "status": "healthy" if success_rate > 0.9 else "degraded",
317
+ "success_rate": success_rate,
318
+ "avg_latency": avg_latency,
319
+ "total_requests": total_requests,
320
+ "total_errors": total_errors
321
+ }
322
+
323
+ def get_detailed_report(self) -> str:
324
+ """Generate a human-readable detailed report of all metrics."""
325
+ with self.lock:
326
+ if not self.metrics:
327
+ return "No metrics recorded."
328
+
329
+ report = []
330
+ report.append("\n" + "="*60)
331
+ report.append(" 📊 KITE SYSTEM PERFORMANCE REPORT")
332
+ report.append("="*60)
333
+
334
+ # 1. Summary Metrics
335
+ summary = self.get_summary()
336
+ report.append(f"Status: {summary['status'].upper()}")
337
+ report.append(f"Total Calls: {summary['total_requests']}")
338
+ report.append(f"Success Rate: {summary['success_rate']:.1%}")
339
+ # report.append(f"Avg Latency: {summary['avg_latency']:.3f}s")
340
+ report.append("-" * 60)
341
+
342
+ # 2. Per-Component breakdown
343
+ report.append(f"{'Component':<20} | {'Calls':<6} | {'Errors':<6} | {'Avg Latency':<10} | {'Outcomes'}")
344
+ report.append("-" * 60)
345
+
346
+ for key, data in sorted(self.metrics.items()):
347
+ avg_l = data['total_latency'] / data['count'] if data['count'] > 0 else 0
348
+ outcomes_str = ", ".join([f"{k}:{v}" for k, v in data['outcomes'].items()])
349
+
350
+ # Special formatting for LLM usage
351
+ if key.startswith("llm_usage"):
352
+ report.append(f"{key:<30} | {data['count']:<6} | {data['tokens_in']} in / {data['tokens_out']} out | ${data['cost']:.4f}")
353
+ else:
354
+ report.append(f"{key:<20} | {data['count']:<6} | {data['errors']:<6} | {avg_l:<10.3f}s | {outcomes_str}")
355
+
356
+ report.append("="*60 + "\n")
357
+ return "\n".join(report)
358
+
359
+
360
+ class Tracer:
361
+ """
362
+ Distributed tracing for agent operations.
363
+ Tracks operation flow and timing.
364
+ """
365
+
366
+ def __init__(self):
367
+ self.traces = []
368
+ self.current_trace = None
369
+ self.lock = threading.Lock()
370
+
371
+ def start_trace(self, trace_id: str, operation: str, metadata: Dict = None):
372
+ """Start a new trace."""
373
+ trace = {
374
+ 'trace_id': trace_id,
375
+ 'operation': operation,
376
+ 'start_time': time.time(),
377
+ 'metadata': metadata or {},
378
+ 'spans': []
379
+ }
380
+
381
+ with self.lock:
382
+ self.traces.append(trace)
383
+ self.current_trace = trace
384
+
385
+ return trace
386
+
387
+ def add_span(self, name: str, metadata: Dict = None):
388
+ """Add a span to current trace."""
389
+ if not self.current_trace:
390
+ return
391
+
392
+ span = {
393
+ 'name': name,
394
+ 'start_time': time.time(),
395
+ 'metadata': metadata or {}
396
+ }
397
+
398
+ with self.lock:
399
+ self.current_trace['spans'].append(span)
400
+
401
+ return span
402
+
403
+ def end_span(self, span: Dict):
404
+ """End a span."""
405
+ span['end_time'] = time.time()
406
+ span['duration'] = span['end_time'] - span['start_time']
407
+
408
+ def end_trace(self):
409
+ """End current trace."""
410
+ if self.current_trace:
411
+ self.current_trace['end_time'] = time.time()
412
+ self.current_trace['duration'] = (
413
+ self.current_trace['end_time'] -
414
+ self.current_trace['start_time']
415
+ )
416
+ self.current_trace = None
417
+
418
+ def get_traces(self, limit: int = 100) -> list:
419
+ """Get recent traces."""
420
+ with self.lock:
421
+ return self.traces[-limit:]
422
+
423
+
424
+ def monitor(component: str, operation: str):
425
+ """
426
+ Decorator to monitor function execution.
427
+
428
+ Usage:
429
+ @monitor('llm', 'chat')
430
+ def chat(messages):
431
+ return llm.chat(messages)
432
+ """
433
+ def decorator(func: Callable):
434
+ @wraps(func)
435
+ def wrapper(*args, **kwargs):
436
+ start_time = time.time()
437
+ success = True
438
+ error_type = None
439
+
440
+ try:
441
+ result = func(*args, **kwargs)
442
+ return result
443
+ except Exception as e:
444
+ success = False
445
+ error_type = type(e).__name__
446
+ raise
447
+ finally:
448
+ latency = time.time() - start_time
449
+
450
+ # Get metrics collector from args if available
451
+ if args and hasattr(args[0], 'metrics'):
452
+ metrics = args[0].metrics
453
+ metrics.record_request(
454
+ component, operation, latency,
455
+ success, error_type
456
+ )
457
+
458
+ return wrapper
459
+ return decorator
460
+
461
+
462
+ class HealthCheck:
463
+ """
464
+ Health check system for services.
465
+ """
466
+
467
+ def __init__(self):
468
+ self.checks = {}
469
+ self.lock = threading.Lock()
470
+
471
+ def register(self, name: str, check_func: Callable):
472
+ """Register a health check."""
473
+ with self.lock:
474
+ self.checks[name] = check_func
475
+
476
+ def run_checks(self) -> Dict:
477
+ """Run all health checks."""
478
+ results = {}
479
+
480
+ for name, check_func in self.checks.items():
481
+ try:
482
+ result = check_func()
483
+ results[name] = {
484
+ 'status': 'healthy' if result else 'unhealthy',
485
+ 'success': result
486
+ }
487
+ except Exception as e:
488
+ results[name] = {
489
+ 'status': 'error',
490
+ 'error': str(e)
491
+ }
492
+
493
+ # Overall status
494
+ all_healthy = all(
495
+ r['status'] == 'healthy'
496
+ for r in results.values()
497
+ )
498
+
499
+ return {
500
+ 'status': 'healthy' if all_healthy else 'unhealthy',
501
+ 'checks': results
502
+ }
503
+
504
+
505
+ class AlertManager:
506
+ """
507
+ Alert manager for threshold-based alerts.
508
+ """
509
+
510
+ def __init__(self, metrics: MetricsCollector):
511
+ self.metrics = metrics
512
+ self.alerts = []
513
+ self.thresholds = {}
514
+
515
+ def set_threshold(self, metric: str, threshold: float,
516
+ comparison: str = '>'):
517
+ """Set alert threshold."""
518
+ self.thresholds[metric] = {
519
+ 'threshold': threshold,
520
+ 'comparison': comparison
521
+ }
522
+
523
+ def check_alerts(self) -> list:
524
+ """Check for threshold violations."""
525
+ alerts = []
526
+ metrics_data = self.metrics.get_metrics()
527
+
528
+ for metric, config in self.thresholds.items():
529
+ if metric in metrics_data:
530
+ value = metrics_data[metric]['count']
531
+ threshold = config['threshold']
532
+
533
+ if config['comparison'] == '>' and value > threshold:
534
+ alerts.append({
535
+ 'metric': metric,
536
+ 'value': value,
537
+ 'threshold': threshold,
538
+ 'message': f"{metric} exceeded threshold: {value} > {threshold}"
539
+ })
540
+
541
+ return alerts
542
+
543
+
544
+ # Global instances
545
+ _metrics = None
546
+ _tracer = None
547
+ _health = None
548
+ _alerts = None
549
+
550
+
551
+ def get_metrics() -> MetricsCollector:
552
+ """Get global metrics collector."""
553
+ global _metrics
554
+ if _metrics is None:
555
+ _metrics = MetricsCollector()
556
+ return _metrics
557
+
558
+
559
+ def get_tracer() -> Tracer:
560
+ """Get global tracer."""
561
+ global _tracer
562
+ if _tracer is None:
563
+ _tracer = Tracer()
564
+ return _tracer
565
+
566
+
567
+ def get_health_check() -> HealthCheck:
568
+ """Get global health check."""
569
+ global _health
570
+ if _health is None:
571
+ _health = HealthCheck()
572
+ return _health
573
+
574
+
575
+ def get_alert_manager() -> AlertManager:
576
+ """Get global alert manager."""
577
+ global _alerts
578
+ if _alerts is None:
579
+ _alerts = AlertManager(get_metrics())
580
+ return _alerts
581
+
582
+
583
+ if __name__ == "__main__":
584
+ # Example usage
585
+ print("Monitoring System Example\n")
586
+
587
+ metrics = get_metrics()
588
+ tracer = get_tracer()
589
+ health = get_health_check()
590
+
591
+ # Record some metrics
592
+ metrics.record_request('llm', 'chat', 0.5, success=True)
593
+ metrics.record_request('llm', 'chat', 0.3, success=True)
594
+ metrics.record_request('llm', 'chat', 1.2, success=False, error_type='Timeout')
595
+
596
+ # Start a trace
597
+ trace = tracer.start_trace('trace-1', 'agent_run')
598
+ span = tracer.add_span('llm_call')
599
+ time.sleep(0.1)
600
+ tracer.end_span(span)
601
+ tracer.end_trace()
602
+
603
+ # Health check
604
+ health.register('llm', lambda: True)
605
+ health.register('memory', lambda: True)
606
+
607
+ print("Metrics:", metrics.get_metrics())
608
+ print("\nTraces:", len(tracer.get_traces()))
609
+ print("\nHealth:", health.run_checks())
610
+
611
+ print("\n[OK] Monitoring system working")