kite-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. kite/__init__.py +46 -0
  2. kite/ab_testing.py +384 -0
  3. kite/agent.py +556 -0
  4. kite/agents/__init__.py +3 -0
  5. kite/agents/plan_execute.py +191 -0
  6. kite/agents/react_agent.py +509 -0
  7. kite/agents/reflective_agent.py +90 -0
  8. kite/agents/rewoo.py +119 -0
  9. kite/agents/tot.py +151 -0
  10. kite/conversation.py +125 -0
  11. kite/core.py +974 -0
  12. kite/data_loaders.py +111 -0
  13. kite/embedding_providers.py +372 -0
  14. kite/llm_providers.py +1278 -0
  15. kite/memory/__init__.py +6 -0
  16. kite/memory/advanced_rag.py +333 -0
  17. kite/memory/graph_rag.py +719 -0
  18. kite/memory/session_memory.py +423 -0
  19. kite/memory/vector_memory.py +579 -0
  20. kite/monitoring.py +611 -0
  21. kite/observers.py +107 -0
  22. kite/optimization/__init__.py +9 -0
  23. kite/optimization/resource_router.py +80 -0
  24. kite/persistence.py +42 -0
  25. kite/pipeline/__init__.py +5 -0
  26. kite/pipeline/deterministic_pipeline.py +323 -0
  27. kite/pipeline/reactive_pipeline.py +171 -0
  28. kite/pipeline_manager.py +15 -0
  29. kite/routing/__init__.py +6 -0
  30. kite/routing/aggregator_router.py +325 -0
  31. kite/routing/llm_router.py +149 -0
  32. kite/routing/semantic_router.py +228 -0
  33. kite/safety/__init__.py +6 -0
  34. kite/safety/circuit_breaker.py +360 -0
  35. kite/safety/guardrails.py +82 -0
  36. kite/safety/idempotency_manager.py +304 -0
  37. kite/safety/kill_switch.py +75 -0
  38. kite/tool.py +183 -0
  39. kite/tool_registry.py +87 -0
  40. kite/tools/__init__.py +21 -0
  41. kite/tools/code_execution.py +53 -0
  42. kite/tools/contrib/__init__.py +19 -0
  43. kite/tools/contrib/calculator.py +26 -0
  44. kite/tools/contrib/datetime_utils.py +20 -0
  45. kite/tools/contrib/linkedin.py +428 -0
  46. kite/tools/contrib/web_search.py +30 -0
  47. kite/tools/mcp/__init__.py +31 -0
  48. kite/tools/mcp/database_mcp.py +267 -0
  49. kite/tools/mcp/gdrive_mcp_server.py +503 -0
  50. kite/tools/mcp/gmail_mcp_server.py +601 -0
  51. kite/tools/mcp/postgres_mcp_server.py +490 -0
  52. kite/tools/mcp/slack_mcp_server.py +538 -0
  53. kite/tools/mcp/stripe_mcp_server.py +219 -0
  54. kite/tools/search.py +90 -0
  55. kite/tools/system_tools.py +54 -0
  56. kite/tools_manager.py +27 -0
  57. kite_agent-0.1.0.dist-info/METADATA +621 -0
  58. kite_agent-0.1.0.dist-info/RECORD +61 -0
  59. kite_agent-0.1.0.dist-info/WHEEL +5 -0
  60. kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
  61. kite_agent-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,6 @@
1
+ """Memory systems module."""
2
+ from .vector_memory import VectorMemory
3
+ from .session_memory import SessionMemory
4
+ from .graph_rag import GraphRAG
5
+
6
+ __all__ = ['VectorMemory', 'SessionMemory', 'GraphRAG']
@@ -0,0 +1,333 @@
1
+ """
2
+ Advanced RAG System
3
+ Enhances VectorMemory with query transformations, hybrid search, and re-ranking.
4
+ """
5
+
6
+ import json
7
+ from typing import List, Dict, Optional, Tuple, Any
8
+ from .vector_memory import VectorMemory
9
+ import numpy as np
10
+ from rank_bm25 import BM25Okapi
11
+ import math
12
+ import os
13
+
14
+ # Advanced Rerankers
15
+ try:
16
+ import cohere
17
+ except ImportError:
18
+ cohere = None
19
+ try:
20
+ from sentence_transformers import CrossEncoder
21
+ except ImportError:
22
+ CrossEncoder = None
23
+
24
+ class QueryTransformer:
25
+ """
26
+ Transforms user queries to improve retrieval performance.
27
+ """
28
+ def __init__(self, llm):
29
+ self.llm = llm
30
+
31
+ def hyde(self, query: str) -> str:
32
+ """Hypothetical Document Embedding (HyDE)."""
33
+ prompt = f"Please write a short hypothetical document that answers the following query. This document will be used to retrieve relevant information from a database.\n\nQuery: {query}\n\nHypothetical Document:"
34
+ hypothetical_doc = self.llm.complete(prompt)
35
+ return hypothetical_doc
36
+
37
+ def multi_query(self, query: str, n: int = 3) -> List[str]:
38
+ """Generate multiple variations of the query."""
39
+ prompt = f"Generate {n} different variations of the following search query to improve retrieval. Output as a JSON list of strings.\n\nQuery: {query}\n\nVariations:"
40
+ response = self.llm.complete(prompt)
41
+ try:
42
+ # Try to parse JSON
43
+ queries = json.loads(response)
44
+ if isinstance(queries, list):
45
+ return queries
46
+ except:
47
+ # Fallback to line splitting if JSON fails
48
+ return [line.strip() for line in response.split('\n') if line.strip() and not line.startswith('[') and not line.endswith(']')][:n]
49
+ return [query]
50
+
51
+ def expand(self, query: str) -> str:
52
+ """Query expansion with relevant keywords."""
53
+ prompt = f"Expand the following search query with relevant keywords and technical terms to improve search results. Return just the expanded query.\n\nQuery: {query}"
54
+ return self.llm.complete(prompt).strip()
55
+
56
+ def step_back(self, query: str) -> str:
57
+ """Step-back prompting: generate a more general query."""
58
+ prompt = f"Generate a broader, higher-level technical question that provides context for the following specific query. This will help retrieve foundational concepts.\n\nSpecific Query: {query}\n\nStep-back Query:"
59
+ return self.llm.complete(prompt).strip()
60
+
61
+
62
+ class AdvancedRAG:
63
+ """
64
+ Wraps VectorMemory with advanced search strategies.
65
+ """
66
+ def __init__(self, vector_memory: VectorMemory, llm = None):
67
+ self.memory = vector_memory
68
+ self.llm = llm or vector_memory.embedding_provider # Fallback to provider if needed for llm
69
+ self.transformer = QueryTransformer(self.llm) if self.llm else None
70
+ self.bm25 = None
71
+ self.corpus = []
72
+ self.id_map = []
73
+ self.cohere_client = None
74
+ self.cross_encoder = None
75
+
76
+ # Recursive Retrieval mappings
77
+ self.child_to_parent = {} # child_id -> parent_id
78
+ self.parents = {} # parent_id -> parent_text
79
+
80
+ def initialize_bm25(self, documents: List[Dict]):
81
+ """
82
+ Initialize BM25 index with a list of documents.
83
+ documents: list of {'id': id, 'text': text}
84
+ """
85
+ self.corpus = [doc['text'].lower().split() for doc in documents]
86
+ self.id_map = [doc['id'] for doc in documents]
87
+ self.bm25 = BM25Okapi(self.corpus)
88
+ print(f"[OK] BM25 initialized with {len(self.corpus)} documents")
89
+
90
+ def search(self,
91
+ query: str,
92
+ strategy: str = "simple",
93
+ k: int = 5,
94
+ alpha: float = 0.5) -> List[Tuple]:
95
+ """
96
+ Perform search using specified strategy.
97
+ Strategies: simple, hyde, multi_query, hybrid
98
+ """
99
+ if strategy == "hyde" and self.transformer:
100
+ transformed_query = self.transformer.hyde(query)
101
+ return self.memory.search(transformed_query, k=k)
102
+
103
+ elif strategy == "multi_query" and self.transformer:
104
+ queries = self.transformer.multi_query(query)
105
+ all_results = []
106
+ for q in queries:
107
+ all_results.extend(self.memory.search(q, k=k))
108
+
109
+ # Simple deduplication by doc_id
110
+ seen = set()
111
+ unique_results = []
112
+ for res in all_results:
113
+ if res[0] not in seen:
114
+ unique_results.append(res)
115
+ seen.add(res[0])
116
+ return unique_results[:k]
117
+
118
+ elif strategy == "hybrid":
119
+ return self._hybrid_search(query, k=k, alpha=alpha)
120
+
121
+ elif strategy == "recursive":
122
+ return self.search_recursive(query, k=k)
123
+
124
+ return self.memory.search(query, k=k)
125
+
126
+ def hybrid_search(self, query: str, top_k: int = 5, alpha: float = 0.5) -> Dict[str, Any]:
127
+ """
128
+ Public hybrid search that returns a structured dictionary.
129
+ """
130
+ results = self._hybrid_search(query, k=top_k, alpha=alpha)
131
+
132
+ if not results:
133
+ return {
134
+ "answer": "No relevant documents found.",
135
+ "documents": [],
136
+ "success": False
137
+ }
138
+
139
+ return {
140
+ "answer": results[0][1], # Top result as answer
141
+ "documents": [{"id": r[0], "content": r[1], "score": r[2]} for r in results],
142
+ "success": True,
143
+ "source": "hybrid_rag"
144
+ }
145
+
146
+ def _hybrid_search(self, query: str, k: int = 5, alpha: float = 0.5) -> List[Tuple]:
147
+ """
148
+ Combines BM25 and Vector Search using Weighted Fusion.
149
+ alpha: 0 = semantic only, 1 = keyword only
150
+ """
151
+ # 1. Semantic Search
152
+ vector_results = self.memory.search(query, k=k*2)
153
+
154
+ # 2. Keyword Search
155
+ if not self.bm25:
156
+ return vector_results[:k]
157
+
158
+ tokenized_query = query.lower().split()
159
+ bm25_scores = self.bm25.get_scores(tokenized_query)
160
+
161
+ # Normalize scores to 0-1 range
162
+ if len(bm25_scores) > 0:
163
+ max_s = max(bm25_scores)
164
+ min_s = min(bm25_scores)
165
+ if max_s > min_s:
166
+ bm25_scores = [(s - min_s) / (max_s - min_s) for s in bm25_scores]
167
+ else:
168
+ bm25_scores = [1.0] * len(bm25_scores)
169
+
170
+ # 3. Fusion (Weighted)
171
+ # alpha * keyword + (1 - alpha) * semantic
172
+ # Note: Chroma distances are distance (lower better), so we use 1 - distance for similarity
173
+ combined_scores = {}
174
+
175
+ # Add vector results
176
+ for doc_id, text, distance in vector_results:
177
+ # Strip chunk suffix for deduplication
178
+ base_id = doc_id.split('_chunk_')[0]
179
+ score = 1 - distance # Similarity
180
+
181
+ if base_id in combined_scores:
182
+ # Keep highest score if already present
183
+ combined_scores[base_id]['score'] = max(combined_scores[base_id]['score'], (1 - alpha) * score)
184
+ else:
185
+ combined_scores[base_id] = {
186
+ 'text': text,
187
+ 'score': (1 - alpha) * score
188
+ }
189
+
190
+ # Add BM25 results
191
+ for i, score in enumerate(bm25_scores):
192
+ doc_id = self.id_map[i]
193
+ # Strip chunk suffix for deduplication
194
+ base_id = doc_id.split('_chunk_')[0]
195
+ text = " ".join(self.corpus[i])
196
+
197
+ if base_id in combined_scores:
198
+ combined_scores[base_id]['score'] += alpha * score
199
+ else:
200
+ combined_scores[base_id] = {
201
+ 'text': text,
202
+ 'score': alpha * score
203
+ }
204
+
205
+ # Sort and return
206
+ sorted_results = sorted(
207
+ combined_scores.items(),
208
+ key=lambda x: x[1]['score'],
209
+ reverse=True
210
+ )
211
+
212
+ return [(doc_id, info['text'], info['score']) for doc_id, info in sorted_results[:k]]
213
+
214
+ def mmr(self, query: str, results: List[Tuple], k: int = 3, lambda_param: float = 0.5) -> List[Tuple]:
215
+ """
216
+ Maximal Marginal Relevance (MMR) for diversification.
217
+ lambda_param: 1.0 = relevance only, 0.0 = diversity only
218
+ """
219
+ if not results or len(results) <= k:
220
+ return results[:k]
221
+
222
+ # Get query embedding
223
+ query_emb = self.memory._get_embedding(query)
224
+
225
+ # Get document embeddings
226
+ # We need to re-embed if not provided, which is expensive.
227
+ # Ideally VectorMemory returns embeddings or we cache them.
228
+ doc_embs = [self.memory._get_embedding(res[1]) for res in results]
229
+
230
+ selected_indices = [0]
231
+ remaining_indices = list(range(1, len(results)))
232
+
233
+ while len(selected_indices) < k and remaining_indices:
234
+ best_score = -float('inf')
235
+ best_idx = -1
236
+
237
+ for i in remaining_indices:
238
+ # Similarity to query
239
+ rel = np.dot(query_emb, doc_embs[i]) / (np.linalg.norm(query_emb) * np.linalg.norm(doc_embs[i]))
240
+
241
+ # Similarity to already selected docs (redundancy)
242
+ max_sim = max([np.dot(doc_embs[i], doc_embs[j]) / (np.linalg.norm(doc_embs[i]) * np.linalg.norm(doc_embs[j])) for j in selected_indices])
243
+
244
+ mmr_score = lambda_param * rel - (1 - lambda_param) * max_sim
245
+
246
+ if mmr_score > best_score:
247
+ best_score = mmr_score
248
+ best_idx = i
249
+
250
+ if best_idx != -1:
251
+ selected_indices.append(best_idx)
252
+ remaining_indices.remove(best_idx)
253
+ else:
254
+ break
255
+
256
+ return [results[i] for i in selected_indices]
257
+
258
+ def rerank_cohere(self, query: str, results: List[Tuple], top_n: int = 3) -> List[Tuple]:
259
+ """Rerank using Cohere Rerank API."""
260
+ if cohere is None: raise ImportError("pip install cohere")
261
+ if not self.cohere_client:
262
+ self.cohere_client = cohere.Client(os.getenv("COHERE_API_KEY"))
263
+
264
+ docs = [res[1] for res in results]
265
+ rerank_results = self.cohere_client.rerank(
266
+ query=query, documents=docs, top_n=top_n, model="rerank-english-v3.0"
267
+ )
268
+
269
+ output = []
270
+ for res in rerank_results.results:
271
+ idx = res.index
272
+ output.append((results[idx][0], results[idx][1], res.relevance_score))
273
+ return output
274
+
275
+ def rerank_cross_encoder(self, query: str, results: List[Tuple], top_n: int = 3) -> List[Tuple]:
276
+ """Rerank using a Cross-Encoder model."""
277
+ if CrossEncoder is None: raise ImportError("pip install sentence-transformers")
278
+ if not self.cross_encoder:
279
+ self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
280
+
281
+ pairs = [[query, res[1]] for res in results]
282
+ scores = self.cross_encoder.predict(pairs)
283
+
284
+ scored_results = []
285
+ for i, score in enumerate(scores):
286
+ scored_results.append((results[i][0], results[i][1], float(score)))
287
+
288
+ scored_results.sort(key=lambda x: x[2], reverse=True)
289
+ return scored_results[:top_n]
290
+
291
+ def add_document_recursive(self, parent_id: str, text: str):
292
+ """
293
+ Store a document with parent-child chunking.
294
+ The agent searches small chunks but retrieves the larger parent context.
295
+ """
296
+ # Store parent
297
+ self.parents[parent_id] = text
298
+
299
+ # Chunk into small pieces for better retrieval
300
+ chunks = self.memory._chunk_text(text, chunk_size=200, overlap=20)
301
+ for i, chunk in enumerate(chunks):
302
+ child_id = f"{parent_id}_small_{i}"
303
+ self.memory.add_document(child_id, chunk, auto_chunk=False)
304
+ self.child_to_parent[child_id] = parent_id
305
+
306
+ print(f"[OK] Added document {parent_id} recursively with {len(chunks)} small chunks")
307
+
308
+ def search_recursive(self, query: str, k: int = 3) -> List[Tuple]:
309
+ """
310
+ Search small chunks but return parent contexts.
311
+ """
312
+ child_results = self.memory.search(query, k=k*2)
313
+
314
+ parent_results = []
315
+ seen_parents = set()
316
+
317
+ for child_id, _, score in child_results:
318
+ # Map child back to parent
319
+ # Handle both our manual mapping and potential Naming convention
320
+ parent_id = self.child_to_parent.get(child_id)
321
+ if not parent_id and "_small_" in child_id:
322
+ parent_id = child_id.split("_small_")[0]
323
+
324
+ if parent_id and parent_id in self.parents:
325
+ if parent_id not in seen_parents:
326
+ parent_results.append((parent_id, self.parents[parent_id], score))
327
+ seen_parents.add(parent_id)
328
+
329
+ if len(parent_results) >= k:
330
+ break
331
+
332
+ # If recursive fails (no parents found), fallback to normal
333
+ return parent_results if parent_results else child_results[:k]