PyPI - kairo-code - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

kairo-code 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

kairo/backend/api/agents.py +337 -16
kairo/backend/app.py +84 -4
kairo/backend/config.py +4 -2
kairo/backend/models/agent.py +216 -2
kairo/backend/models/api_key.py +4 -1
kairo/backend/models/task.py +31 -0
kairo/backend/models/user_provider_key.py +26 -0
kairo/backend/schemas/agent.py +249 -2
kairo/backend/schemas/api_key.py +3 -0
kairo/backend/services/agent/__init__.py +52 -0
kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
kairo/backend/services/agent/agent_alerts_service.py +201 -0
kairo/backend/services/agent/agent_commands_service.py +142 -0
kairo/backend/services/agent/agent_crud_service.py +150 -0
kairo/backend/services/agent/agent_events_service.py +103 -0
kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
kairo/backend/services/agent/agent_metrics_service.py +259 -0
kairo/backend/services/agent/agent_service.py +315 -0
kairo/backend/services/agent/agent_setup_service.py +180 -0
kairo/backend/services/agent/constants.py +28 -0
kairo/backend/services/agent_service.py +18 -102
kairo/backend/services/api_key_service.py +23 -3
kairo/backend/services/byok_service.py +204 -0
kairo/backend/services/chat_service.py +398 -63
kairo/backend/services/deep_search_service.py +159 -0
kairo/backend/services/email_service.py +418 -19
kairo/backend/services/few_shot_service.py +223 -0
kairo/backend/services/post_processor.py +261 -0
kairo/backend/services/rag_service.py +150 -0
kairo/backend/services/task_service.py +119 -0
kairo/backend/tests/__init__.py +1 -0
kairo/backend/tests/e2e/__init__.py +1 -0
kairo/backend/tests/e2e/agents/__init__.py +1 -0
kairo/backend/tests/e2e/agents/conftest.py +389 -0
kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
kairo/migrations/versions/010_agent_dashboard.py +246 -0
{kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
{kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
{kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
kairo_migrations/env.py +92 -0
kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
{kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
{kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0

kairo/backend/services/chat_service.py CHANGED Viewed

@@ -13,9 +13,22 @@ from backend.services.web_search_service import (
     web_search,
     format_search_results,
 )
+from backend.services.deep_search_service import deep_search
+from backend.services.post_processor import validate_response
+from backend.services.rag_service import lookup_kairo_docs
+from backend.services.few_shot_service import get_few_shot_examples, get_output_format_instructions
 logger = logging.getLogger(__name__)
+# Signals that indicate a complex query benefiting from chain-of-thought
+_COMPLEX_SIGNALS = frozenset([
+    'algorithm', 'optimize', 'debug', 'architecture', 'design pattern',
+    'trade-off', 'tradeoff', 'compare', 'pros and cons', 'best approach',
+    'refactor', 'performance', 'scale', 'security', 'why does', 'how should',
+    'what would happen', 'difference between', 'which is better',
+])
 WEB_SEARCH_TOOL = {
     "type": "function",
     "function": {
@@ -33,7 +46,154 @@ WEB_SEARCH_TOOL = {
         },
     },
 }
-TOOLS = [WEB_SEARCH_TOOL]
+DEEP_SEARCH_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "deep_search",
+        "description": (
+            "Search the web AND read the actual page content from top results. "
+            "Use this when you need detailed documentation, API references, code examples, "
+            "or technical specifications. This fetches real page content, not just snippets. "
+            "Prefer this over web_search when the user needs accurate technical details."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "The search query — be specific, e.g. 'TheSportsDB API documentation endpoints'",
+                }
+            },
+            "required": ["query"],
+        },
+    },
+}
+KAIRO_DOCS_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "lookup_kairo_docs",
+        "description": (
+            "Look up Kairo API documentation. Use this when the user asks about "
+            "the Kairo API, how to use the Kairo API, Kairo endpoints, API keys, "
+            "or wants to write code that integrates with Kairo/Kairon Labs. "
+            "This returns accurate, internal documentation."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "topic": {
+                    "type": "string",
+                    "description": "What to look up, e.g. 'chat completions', 'authentication', 'python sdk', 'streaming'",
+                }
+            },
+            "required": ["topic"],
+        },
+    },
+}
+TOOLS = [WEB_SEARCH_TOOL, DEEP_SEARCH_TOOL, KAIRO_DOCS_TOOL]
+_CODE_SIGNALS = frozenset([
+    'code', 'function', 'api', 'endpoint', 'implement', 'debug', 'error',
+    'fix', 'python', 'javascript', 'typescript', 'sql', 'html', 'css',
+    'write a script', 'write a program', 'how to use', 'integrate', 'bug',
+    'class', 'method', 'variable', 'compile', 'runtime', 'import', 'package',
+    'npm', 'pip', 'docker', 'server', 'database', 'query', 'regex',
+])
+_FACTUAL_SIGNALS = frozenset([
+    'what is', 'who is', 'when did', 'how many', 'explain', 'define',
+    'compare', 'difference between', 'how does', 'why does', 'list',
+])
+def _adaptive_temperature(message: str, model: str, default: float = 0.7) -> float:
+    """Select temperature based on query type. Lower = more precise."""
+    msg_lower = message.lower()
+    if any(s in msg_lower for s in _CODE_SIGNALS):
+        return 0.2 if model == "nyx-lite" else 0.3
+    if any(s in msg_lower for s in _FACTUAL_SIGNALS):
+        return 0.3 if model == "nyx-lite" else 0.4
+    # General default — slightly lower for the smaller model
+    return 0.5 if model == "nyx-lite" else default
+def _needs_chain_of_thought(message: str) -> bool:
+    """Detect if query would benefit from explicit reasoning."""
+    msg_lower = message.lower()
+    return any(s in msg_lower for s in _COMPLEX_SIGNALS)
+def _inject_chain_of_thought(message: str, model: str) -> str:
+    """Wrap complex queries with reasoning instruction for small models."""
+    if model != "nyx-lite":
+        return message
+    if not _needs_chain_of_thought(message):
+        return message
+    return f"""Think through this step by step:
+1. Understand what is being asked
+2. Consider the key factors
+3. Reason through the options
+4. Provide your answer
+Question: {message}"""
+def _simplify_tools_for_model(tools: list[dict], model: str) -> list[dict]:
+    """Simplify tool definitions for small models.
+    Small models work better with:
+    - Shorter descriptions
+    - Only required parameters
+    - Usage examples in descriptions
+    """
+    if model != "nyx-lite":
+        return tools
+    simplified = []
+    for tool in tools:
+        func = tool["function"]
+        params = func["parameters"]
+        # Build simplified tool with example in description
+        name = func["name"]
+        desc = func["description"]
+        # Add usage example to description
+        if name == "web_search":
+            desc = 'Search web for current info. Example: {"query": "Python 3.12 new features"}'
+        elif name == "deep_search":
+            desc = 'Search AND read page content. Example: {"query": "FastAPI OAuth2 tutorial"}'
+        elif name == "lookup_kairo_docs":
+            desc = 'Look up Kairo API docs. Example: {"topic": "authentication"}'
+        simple_tool = {
+            "type": "function",
+            "function": {
+                "name": name,
+                "description": desc,
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": params.get("required", []),
+                },
+            },
+        }
+        # Only include required parameters with shortened descriptions
+        for key in params.get("required", []):
+            if key in params.get("properties", {}):
+                prop = params["properties"][key]
+                simple_tool["function"]["parameters"]["properties"][key] = {
+                    "type": prop.get("type", "string"),
+                    "description": prop.get("description", "")[:80],
+                }
+        simplified.append(simple_tool)
+    logger.debug("Simplified %d tools for small model", len(simplified))
+    return simplified
 def _estimate_tokens(text: str) -> int:
@@ -142,13 +302,20 @@ class ChatService:
         # Build context-aware history
         history = self._build_history(conv, model)
+        # Adaptive temperature: override default 0.7 based on query type
+        temperature = _adaptive_temperature(message, model, temperature)
         logger.info(
-            "Streaming conv=%s model=%s history_msgs=%d",
-            conv.id, model, len(history),
+            "Streaming conv=%s model=%s history_msgs=%d temp=%.2f",
+            conv.id, model, len(history), temperature,
         )
         # Stream from LLM with tool calling support
+        # Use simplified tools for small models
+        model_tools = _simplify_tools_for_model(TOOLS, model)
         full_response = ""
+        tool_result_text = None
         usage_data = None
         try:
             tool_calls_result = None
@@ -157,7 +324,7 @@ class ChatService:
                 model=model,
                 temperature=temperature,
                 max_tokens=max_tokens,
-                tools=TOOLS,
+                tools=model_tools,
             ):
                 if isinstance(chunk, dict):
                     if chunk.get("type") == "fallback":
@@ -182,26 +349,50 @@ class ChatService:
                             continue
                         results = await web_search(args.get("query", message))
                         result_text = format_search_results(results)
+                        tool_result_text = result_text
                         logger.info("Tool call web_search(%s) returned %d results", args.get("query"), len(results))
-                        # Append tool call + result to history for second LLM call
-                        history.append({
-                            "role": "assistant",
-                            "content": None,
-                            "tool_calls": [{
-                                "id": call["id"],
-                                "type": "function",
-                                "function": {
-                                    "name": call["name"],
-                                    "arguments": call["arguments"],
-                                },
-                            }],
-                        })
-                        history.append({
-                            "role": "tool",
-                            "tool_call_id": call["id"],
-                            "content": result_text or "No results found.",
-                        })
+                    elif call["name"] == "deep_search":
+                        yield _sse({"type": "status", "content": "Reading documentation..."})
+                        try:
+                            args = json.loads(call["arguments"])
+                        except json.JSONDecodeError:
+                            logger.warning("Invalid tool call arguments: %s", call["arguments"])
+                            continue
+                        result_text = await deep_search(args.get("query", message))
+                        tool_result_text = result_text
+                        logger.info("Tool call deep_search(%s) returned %d chars", args.get("query"), len(result_text))
+                    elif call["name"] == "lookup_kairo_docs":
+                        yield _sse({"type": "status", "content": "Looking up Kairo docs..."})
+                        try:
+                            args = json.loads(call["arguments"])
+                        except json.JSONDecodeError:
+                            logger.warning("Invalid tool call arguments: %s", call["arguments"])
+                            continue
+                        result_text = lookup_kairo_docs(args.get("topic", message))
+                        tool_result_text = result_text
+                        logger.info("Tool call lookup_kairo_docs(%s) returned %d chars", args.get("topic"), len(result_text))
+                    else:
+                        logger.warning("Unknown tool call: %s", call["name"])
+                        continue
+                    # Append tool call + result to history for second LLM call
+                    history.append({
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [{
+                            "id": call["id"],
+                            "type": "function",
+                            "function": {
+                                "name": call["name"],
+                                "arguments": call["arguments"],
+                            },
+                        }],
+                    })
+                    history.append({
+                        "role": "tool",
+                        "tool_call_id": call["id"],
+                        "content": result_text or "No results found.",
+                    })
                 # Second LLM call — no tools, generate final response using search results
                 async for chunk in self.llm_service.stream_chat(
@@ -224,6 +415,10 @@ class ChatService:
         # Post-process: strip duplicate trailing code blocks
         full_response = _strip_duplicate_trailing_code(full_response)
+        # Post-process: validate against tool results
+        if tool_result_text:
+            full_response = validate_response(full_response, tool_result_text)
         # Save assistant response
         if full_response:
             await self.conversation_service.add_message(conv.id, "assistant", full_response)
@@ -254,6 +449,46 @@ class ChatService:
         yield "data: [DONE]\n\n"
+    # Short reminder injected before the last user message so the model
+    # doesn't forget it has tools available in longer conversations.
+    _TOOL_REMINDER = (
+        "[You have tools: web_search (quick facts), "
+        "deep_search (reads actual web pages — use for APIs, docs, code), "
+        "and lookup_kairo_docs (Kairo API docs). "
+        "Use deep_search for any technical or documentation question. "
+        "Use lookup_kairo_docs for anything about the Kairo API.]"
+    )
+    def _score_message_importance(self, msg, index: int, total: int) -> float:
+        """Score message importance for context retention.
+        Higher scores = more important to keep in context.
+        """
+        score = 0.0
+        content = msg.content.lower()
+        # Recency bonus (0-3 points) - newer messages score higher
+        recency = (index / max(total, 1)) * 3
+        score += recency
+        # Code content bonus - small models need code context
+        if '```' in msg.content:
+            score += 2.0
+        # Error/fix context is valuable for debugging continuity
+        if any(w in content for w in ['error', 'fix', 'bug', 'issue', 'problem', 'traceback']):
+            score += 1.5
+        # User preferences/decisions should be remembered
+        if any(w in content for w in ['i want', 'i need', 'please', 'should be', 'must', 'don\'t']):
+            score += 1.0
+        # Technical specifications
+        if any(w in content for w in ['file:', 'path:', 'url:', 'config', 'setting', 'version']):
+            score += 1.0
+        return score
     def _build_history(self, conv, model: str) -> list[dict[str, str]]:
         context_limit = settings.CONTEXT_LIMITS.get(model, 6000)
         history: list[dict[str, str]] = []
@@ -281,29 +516,91 @@ class ChatService:
             history.append({"role": "system", "content": summary_msg})
             token_count += _estimate_tokens(summary_msg)
-        # Add recent messages, working backwards to prioritize the latest
         messages = list(conv.messages)
-        to_include = []
-        for msg in reversed(messages):
-            msg_tokens = _estimate_tokens(msg.content)
-            if token_count + msg_tokens > context_limit:
+        # Get the last user message for few-shot and format injection
+        last_user_content = ""
+        for m in reversed(messages):
+            if m.role == "user":
+                last_user_content = m.content
                 break
-            to_include.append({"role": msg.role, "content": msg.content})
-            token_count += msg_tokens
-        # Reverse back to chronological order
-        to_include.reverse()
+        # Inject few-shot examples for small models (based on query type)
+        few_shot = get_few_shot_examples(last_user_content, model)
+        if few_shot:
+            history.append({"role": "system", "content": few_shot})
+            token_count += _estimate_tokens(few_shot)
+        # Inject output format instructions for small models
+        format_instructions = get_output_format_instructions(last_user_content, model)
+        if format_instructions:
+            history.append({"role": "system", "content": format_instructions})
+            token_count += _estimate_tokens(format_instructions)
+        # Reserve budget for tool reminder (~40 tokens)
+        reminder_tokens = _estimate_tokens(self._TOOL_REMINDER)
+        message_budget = context_limit - token_count - reminder_tokens
+        # IMPORTANCE-WEIGHTED CONTEXT RETENTION
+        # Always include last 4 messages (current context)
+        must_include_count = min(4, len(messages))
+        must_include_indices = set(range(len(messages) - must_include_count, len(messages)))
+        # Score remaining messages by importance
+        scored_messages = [
+            (i, msg, self._score_message_importance(msg, i, len(messages)))
+            for i, msg in enumerate(messages)
+            if i not in must_include_indices
+        ]
+        scored_messages.sort(key=lambda x: x[2], reverse=True)
-        # Ensure we at least include the very last message (the new user msg)
-        if not to_include and messages:
-            last = messages[-1]
-            to_include = [{"role": last.role, "content": last.content}]
+        # Calculate budget used by must-include messages
+        selected_indices = must_include_indices.copy()
+        budget_used = sum(
+            _estimate_tokens(messages[i].content) for i in must_include_indices
+        )
+        # Add high-importance messages until budget exhausted
+        for i, msg, score in scored_messages:
+            msg_tokens = _estimate_tokens(msg.content)
+            if budget_used + msg_tokens > message_budget:
+                continue
+            selected_indices.add(i)
+            budget_used += msg_tokens
-        history.extend(to_include)
+        # Build message list in chronological order
+        all_msgs = []
+        for i in sorted(selected_indices):
+            msg = messages[i]
+            content = msg.content
+            # Apply chain-of-thought to the last user message for small models
+            if i == len(messages) - 1 and msg.role == "user":
+                content = _inject_chain_of_thought(content, model)
+            all_msgs.append({"role": msg.role, "content": content})
+        # Ensure we at least include the very last message
+        if not all_msgs and messages:
+            last = messages[-1]
+            content = _inject_chain_of_thought(last.content, model) if last.role == "user" else last.content
+            all_msgs = [{"role": last.role, "content": content}]
+        # Inject tool reminder right before the final user message
+        # so it's fresh in the model's attention
+        if len(all_msgs) >= 1:
+            history.extend(all_msgs[:-1])
+            history.append({"role": "system", "content": self._TOOL_REMINDER})
+            history.append(all_msgs[-1])
+        else:
+            history.extend(all_msgs)
+        total_tokens = token_count + sum(
+            _estimate_tokens(m["content"]) for m in history if m.get("content")
+        )
         logger.debug(
             "Built history: %d msgs, ~%d tokens (limit %d)",
-            len(history), token_count, context_limit,
+            len(history), total_tokens, context_limit,
         )
         return history
@@ -327,14 +624,16 @@ class ChatService:
             {
                 "role": "system",
                 "content": (
-                    "You are a summarization assistant. Condense the following "
-                    "conversation into a brief summary (2-4 sentences) that "
-                    "captures the key topics, decisions, and context. "
-                    "Focus on information the AI would need to continue "
-                    "the conversation coherently."
+                    "Summarize this conversation in a structured format. "
+                    "Use this exact format:\n"
+                    "Topics: [comma-separated list of topics discussed]\n"
+                    "Key facts: [any specific names, file paths, variables, configs, or technical details mentioned]\n"
+                    "Decisions: [any decisions or preferences the user stated]\n"
+                    "Context: [1-2 sentences of overall context needed to continue]\n"
+                    "Be concise. Preserve technical details exactly."
                 ),
             },
-            {"role": "user", "content": f"Summarize this conversation:\n\n{old_text}"},
+            {"role": "user", "content": f"Summarize:\n\n{old_text}"},
         ]
         logger.info(
@@ -398,7 +697,14 @@ class ChatService:
                 last_user_msg = m.content
                 break
+        # Adaptive temperature based on the last user message
+        temperature = _adaptive_temperature(last_user_msg, model, temperature)
+        # Use simplified tools for small models
+        model_tools = _simplify_tools_for_model(TOOLS, model)
         full_response = ""
+        tool_result_text = None
         usage_data = None
         try:
             tool_calls_result = None
@@ -407,7 +713,7 @@ class ChatService:
                 model=model,
                 temperature=temperature,
                 max_tokens=max_tokens,
-                tools=TOOLS,
+                tools=model_tools,
             ):
                 if isinstance(chunk, dict):
                     if chunk.get("type") == "fallback":
@@ -431,24 +737,49 @@ class ChatService:
                             continue
                         results = await web_search(args.get("query", last_user_msg))
                         result_text = format_search_results(results)
-                        history.append({
-                            "role": "assistant",
-                            "content": None,
-                            "tool_calls": [{
-                                "id": call["id"],
-                                "type": "function",
-                                "function": {
-                                    "name": call["name"],
-                                    "arguments": call["arguments"],
-                                },
-                            }],
-                        })
-                        history.append({
-                            "role": "tool",
-                            "tool_call_id": call["id"],
-                            "content": result_text or "No results found.",
-                        })
+                        tool_result_text = result_text
+                    elif call["name"] == "deep_search":
+                        yield _sse({"type": "status", "content": "Reading documentation..."})
+                        try:
+                            args = json.loads(call["arguments"])
+                        except json.JSONDecodeError:
+                            logger.warning("Invalid tool call arguments: %s", call["arguments"])
+                            continue
+                        result_text = await deep_search(args.get("query", last_user_msg))
+                        tool_result_text = result_text
+                        logger.info("Tool call deep_search(%s) returned %d chars", args.get("query"), len(result_text))
+                    elif call["name"] == "lookup_kairo_docs":
+                        yield _sse({"type": "status", "content": "Looking up Kairo docs..."})
+                        try:
+                            args = json.loads(call["arguments"])
+                        except json.JSONDecodeError:
+                            logger.warning("Invalid tool call arguments: %s", call["arguments"])
+                            continue
+                        result_text = lookup_kairo_docs(args.get("topic", last_user_msg))
+                        tool_result_text = result_text
+                        logger.info("Tool call lookup_kairo_docs(%s) returned %d chars", args.get("topic"), len(result_text))
+                    else:
+                        logger.warning("Unknown tool call: %s", call["name"])
+                        continue
+                    # Append tool call + result to history for second LLM call
+                    history.append({
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [{
+                            "id": call["id"],
+                            "type": "function",
+                            "function": {
+                                "name": call["name"],
+                                "arguments": call["arguments"],
+                            },
+                        }],
+                    })
+                    history.append({
+                        "role": "tool",
+                        "tool_call_id": call["id"],
+                        "content": result_text or "No results found.",
+                    })
                 async for chunk in self.llm_service.stream_chat(
                     messages=history,
@@ -469,6 +800,10 @@ class ChatService:
         full_response = _strip_duplicate_trailing_code(full_response)
+        # Post-process: validate against tool results
+        if tool_result_text:
+            full_response = validate_response(full_response, tool_result_text)
         if full_response:
             await self.conversation_service.add_message(conv.id, "assistant", full_response)

kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

kairo-code 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl