kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. kairo/backend/api/agents.py +337 -16
  2. kairo/backend/app.py +84 -4
  3. kairo/backend/config.py +4 -2
  4. kairo/backend/models/agent.py +216 -2
  5. kairo/backend/models/api_key.py +4 -1
  6. kairo/backend/models/task.py +31 -0
  7. kairo/backend/models/user_provider_key.py +26 -0
  8. kairo/backend/schemas/agent.py +249 -2
  9. kairo/backend/schemas/api_key.py +3 -0
  10. kairo/backend/services/agent/__init__.py +52 -0
  11. kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
  12. kairo/backend/services/agent/agent_alerts_service.py +201 -0
  13. kairo/backend/services/agent/agent_commands_service.py +142 -0
  14. kairo/backend/services/agent/agent_crud_service.py +150 -0
  15. kairo/backend/services/agent/agent_events_service.py +103 -0
  16. kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
  17. kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
  18. kairo/backend/services/agent/agent_metrics_service.py +259 -0
  19. kairo/backend/services/agent/agent_service.py +315 -0
  20. kairo/backend/services/agent/agent_setup_service.py +180 -0
  21. kairo/backend/services/agent/constants.py +28 -0
  22. kairo/backend/services/agent_service.py +18 -102
  23. kairo/backend/services/api_key_service.py +23 -3
  24. kairo/backend/services/byok_service.py +204 -0
  25. kairo/backend/services/chat_service.py +398 -63
  26. kairo/backend/services/deep_search_service.py +159 -0
  27. kairo/backend/services/email_service.py +418 -19
  28. kairo/backend/services/few_shot_service.py +223 -0
  29. kairo/backend/services/post_processor.py +261 -0
  30. kairo/backend/services/rag_service.py +150 -0
  31. kairo/backend/services/task_service.py +119 -0
  32. kairo/backend/tests/__init__.py +1 -0
  33. kairo/backend/tests/e2e/__init__.py +1 -0
  34. kairo/backend/tests/e2e/agents/__init__.py +1 -0
  35. kairo/backend/tests/e2e/agents/conftest.py +389 -0
  36. kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
  37. kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
  38. kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
  39. kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
  40. kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
  41. kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
  42. kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
  43. kairo/migrations/versions/010_agent_dashboard.py +246 -0
  44. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
  45. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
  46. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
  47. kairo_migrations/env.py +92 -0
  48. kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
  49. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
  50. {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0
@@ -13,9 +13,22 @@ from backend.services.web_search_service import (
13
13
  web_search,
14
14
  format_search_results,
15
15
  )
16
+ from backend.services.deep_search_service import deep_search
17
+ from backend.services.post_processor import validate_response
18
+ from backend.services.rag_service import lookup_kairo_docs
19
+ from backend.services.few_shot_service import get_few_shot_examples, get_output_format_instructions
16
20
 
17
21
  logger = logging.getLogger(__name__)
18
22
 
23
+
24
+ # Signals that indicate a complex query benefiting from chain-of-thought
25
+ _COMPLEX_SIGNALS = frozenset([
26
+ 'algorithm', 'optimize', 'debug', 'architecture', 'design pattern',
27
+ 'trade-off', 'tradeoff', 'compare', 'pros and cons', 'best approach',
28
+ 'refactor', 'performance', 'scale', 'security', 'why does', 'how should',
29
+ 'what would happen', 'difference between', 'which is better',
30
+ ])
31
+
19
32
  WEB_SEARCH_TOOL = {
20
33
  "type": "function",
21
34
  "function": {
@@ -33,7 +46,154 @@ WEB_SEARCH_TOOL = {
33
46
  },
34
47
  },
35
48
  }
36
- TOOLS = [WEB_SEARCH_TOOL]
49
+
50
+ DEEP_SEARCH_TOOL = {
51
+ "type": "function",
52
+ "function": {
53
+ "name": "deep_search",
54
+ "description": (
55
+ "Search the web AND read the actual page content from top results. "
56
+ "Use this when you need detailed documentation, API references, code examples, "
57
+ "or technical specifications. This fetches real page content, not just snippets. "
58
+ "Prefer this over web_search when the user needs accurate technical details."
59
+ ),
60
+ "parameters": {
61
+ "type": "object",
62
+ "properties": {
63
+ "query": {
64
+ "type": "string",
65
+ "description": "The search query — be specific, e.g. 'TheSportsDB API documentation endpoints'",
66
+ }
67
+ },
68
+ "required": ["query"],
69
+ },
70
+ },
71
+ }
72
+
73
+ KAIRO_DOCS_TOOL = {
74
+ "type": "function",
75
+ "function": {
76
+ "name": "lookup_kairo_docs",
77
+ "description": (
78
+ "Look up Kairo API documentation. Use this when the user asks about "
79
+ "the Kairo API, how to use the Kairo API, Kairo endpoints, API keys, "
80
+ "or wants to write code that integrates with Kairo/Kairon Labs. "
81
+ "This returns accurate, internal documentation."
82
+ ),
83
+ "parameters": {
84
+ "type": "object",
85
+ "properties": {
86
+ "topic": {
87
+ "type": "string",
88
+ "description": "What to look up, e.g. 'chat completions', 'authentication', 'python sdk', 'streaming'",
89
+ }
90
+ },
91
+ "required": ["topic"],
92
+ },
93
+ },
94
+ }
95
+ TOOLS = [WEB_SEARCH_TOOL, DEEP_SEARCH_TOOL, KAIRO_DOCS_TOOL]
96
+
97
+ _CODE_SIGNALS = frozenset([
98
+ 'code', 'function', 'api', 'endpoint', 'implement', 'debug', 'error',
99
+ 'fix', 'python', 'javascript', 'typescript', 'sql', 'html', 'css',
100
+ 'write a script', 'write a program', 'how to use', 'integrate', 'bug',
101
+ 'class', 'method', 'variable', 'compile', 'runtime', 'import', 'package',
102
+ 'npm', 'pip', 'docker', 'server', 'database', 'query', 'regex',
103
+ ])
104
+ _FACTUAL_SIGNALS = frozenset([
105
+ 'what is', 'who is', 'when did', 'how many', 'explain', 'define',
106
+ 'compare', 'difference between', 'how does', 'why does', 'list',
107
+ ])
108
+
109
+
110
+ def _adaptive_temperature(message: str, model: str, default: float = 0.7) -> float:
111
+ """Select temperature based on query type. Lower = more precise."""
112
+ msg_lower = message.lower()
113
+ if any(s in msg_lower for s in _CODE_SIGNALS):
114
+ return 0.2 if model == "nyx-lite" else 0.3
115
+ if any(s in msg_lower for s in _FACTUAL_SIGNALS):
116
+ return 0.3 if model == "nyx-lite" else 0.4
117
+ # General default — slightly lower for the smaller model
118
+ return 0.5 if model == "nyx-lite" else default
119
+
120
+
121
+ def _needs_chain_of_thought(message: str) -> bool:
122
+ """Detect if query would benefit from explicit reasoning."""
123
+ msg_lower = message.lower()
124
+ return any(s in msg_lower for s in _COMPLEX_SIGNALS)
125
+
126
+
127
+ def _inject_chain_of_thought(message: str, model: str) -> str:
128
+ """Wrap complex queries with reasoning instruction for small models."""
129
+ if model != "nyx-lite":
130
+ return message
131
+ if not _needs_chain_of_thought(message):
132
+ return message
133
+
134
+ return f"""Think through this step by step:
135
+ 1. Understand what is being asked
136
+ 2. Consider the key factors
137
+ 3. Reason through the options
138
+ 4. Provide your answer
139
+
140
+ Question: {message}"""
141
+
142
+
143
+ def _simplify_tools_for_model(tools: list[dict], model: str) -> list[dict]:
144
+ """Simplify tool definitions for small models.
145
+
146
+ Small models work better with:
147
+ - Shorter descriptions
148
+ - Only required parameters
149
+ - Usage examples in descriptions
150
+ """
151
+ if model != "nyx-lite":
152
+ return tools
153
+
154
+ simplified = []
155
+ for tool in tools:
156
+ func = tool["function"]
157
+ params = func["parameters"]
158
+
159
+ # Build simplified tool with example in description
160
+ name = func["name"]
161
+ desc = func["description"]
162
+
163
+ # Add usage example to description
164
+ if name == "web_search":
165
+ desc = 'Search web for current info. Example: {"query": "Python 3.12 new features"}'
166
+ elif name == "deep_search":
167
+ desc = 'Search AND read page content. Example: {"query": "FastAPI OAuth2 tutorial"}'
168
+ elif name == "lookup_kairo_docs":
169
+ desc = 'Look up Kairo API docs. Example: {"topic": "authentication"}'
170
+
171
+ simple_tool = {
172
+ "type": "function",
173
+ "function": {
174
+ "name": name,
175
+ "description": desc,
176
+ "parameters": {
177
+ "type": "object",
178
+ "properties": {},
179
+ "required": params.get("required", []),
180
+ },
181
+ },
182
+ }
183
+
184
+ # Only include required parameters with shortened descriptions
185
+ for key in params.get("required", []):
186
+ if key in params.get("properties", {}):
187
+ prop = params["properties"][key]
188
+ simple_tool["function"]["parameters"]["properties"][key] = {
189
+ "type": prop.get("type", "string"),
190
+ "description": prop.get("description", "")[:80],
191
+ }
192
+
193
+ simplified.append(simple_tool)
194
+
195
+ logger.debug("Simplified %d tools for small model", len(simplified))
196
+ return simplified
37
197
 
38
198
 
39
199
  def _estimate_tokens(text: str) -> int:
@@ -142,13 +302,20 @@ class ChatService:
142
302
  # Build context-aware history
143
303
  history = self._build_history(conv, model)
144
304
 
305
+ # Adaptive temperature: override default 0.7 based on query type
306
+ temperature = _adaptive_temperature(message, model, temperature)
307
+
145
308
  logger.info(
146
- "Streaming conv=%s model=%s history_msgs=%d",
147
- conv.id, model, len(history),
309
+ "Streaming conv=%s model=%s history_msgs=%d temp=%.2f",
310
+ conv.id, model, len(history), temperature,
148
311
  )
149
312
 
150
313
  # Stream from LLM with tool calling support
314
+ # Use simplified tools for small models
315
+ model_tools = _simplify_tools_for_model(TOOLS, model)
316
+
151
317
  full_response = ""
318
+ tool_result_text = None
152
319
  usage_data = None
153
320
  try:
154
321
  tool_calls_result = None
@@ -157,7 +324,7 @@ class ChatService:
157
324
  model=model,
158
325
  temperature=temperature,
159
326
  max_tokens=max_tokens,
160
- tools=TOOLS,
327
+ tools=model_tools,
161
328
  ):
162
329
  if isinstance(chunk, dict):
163
330
  if chunk.get("type") == "fallback":
@@ -182,26 +349,50 @@ class ChatService:
182
349
  continue
183
350
  results = await web_search(args.get("query", message))
184
351
  result_text = format_search_results(results)
352
+ tool_result_text = result_text
185
353
  logger.info("Tool call web_search(%s) returned %d results", args.get("query"), len(results))
186
-
187
- # Append tool call + result to history for second LLM call
188
- history.append({
189
- "role": "assistant",
190
- "content": None,
191
- "tool_calls": [{
192
- "id": call["id"],
193
- "type": "function",
194
- "function": {
195
- "name": call["name"],
196
- "arguments": call["arguments"],
197
- },
198
- }],
199
- })
200
- history.append({
201
- "role": "tool",
202
- "tool_call_id": call["id"],
203
- "content": result_text or "No results found.",
204
- })
354
+ elif call["name"] == "deep_search":
355
+ yield _sse({"type": "status", "content": "Reading documentation..."})
356
+ try:
357
+ args = json.loads(call["arguments"])
358
+ except json.JSONDecodeError:
359
+ logger.warning("Invalid tool call arguments: %s", call["arguments"])
360
+ continue
361
+ result_text = await deep_search(args.get("query", message))
362
+ tool_result_text = result_text
363
+ logger.info("Tool call deep_search(%s) returned %d chars", args.get("query"), len(result_text))
364
+ elif call["name"] == "lookup_kairo_docs":
365
+ yield _sse({"type": "status", "content": "Looking up Kairo docs..."})
366
+ try:
367
+ args = json.loads(call["arguments"])
368
+ except json.JSONDecodeError:
369
+ logger.warning("Invalid tool call arguments: %s", call["arguments"])
370
+ continue
371
+ result_text = lookup_kairo_docs(args.get("topic", message))
372
+ tool_result_text = result_text
373
+ logger.info("Tool call lookup_kairo_docs(%s) returned %d chars", args.get("topic"), len(result_text))
374
+ else:
375
+ logger.warning("Unknown tool call: %s", call["name"])
376
+ continue
377
+
378
+ # Append tool call + result to history for second LLM call
379
+ history.append({
380
+ "role": "assistant",
381
+ "content": None,
382
+ "tool_calls": [{
383
+ "id": call["id"],
384
+ "type": "function",
385
+ "function": {
386
+ "name": call["name"],
387
+ "arguments": call["arguments"],
388
+ },
389
+ }],
390
+ })
391
+ history.append({
392
+ "role": "tool",
393
+ "tool_call_id": call["id"],
394
+ "content": result_text or "No results found.",
395
+ })
205
396
 
206
397
  # Second LLM call — no tools, generate final response using search results
207
398
  async for chunk in self.llm_service.stream_chat(
@@ -224,6 +415,10 @@ class ChatService:
224
415
  # Post-process: strip duplicate trailing code blocks
225
416
  full_response = _strip_duplicate_trailing_code(full_response)
226
417
 
418
+ # Post-process: validate against tool results
419
+ if tool_result_text:
420
+ full_response = validate_response(full_response, tool_result_text)
421
+
227
422
  # Save assistant response
228
423
  if full_response:
229
424
  await self.conversation_service.add_message(conv.id, "assistant", full_response)
@@ -254,6 +449,46 @@ class ChatService:
254
449
 
255
450
  yield "data: [DONE]\n\n"
256
451
 
452
+ # Short reminder injected before the last user message so the model
453
+ # doesn't forget it has tools available in longer conversations.
454
+ _TOOL_REMINDER = (
455
+ "[You have tools: web_search (quick facts), "
456
+ "deep_search (reads actual web pages — use for APIs, docs, code), "
457
+ "and lookup_kairo_docs (Kairo API docs). "
458
+ "Use deep_search for any technical or documentation question. "
459
+ "Use lookup_kairo_docs for anything about the Kairo API.]"
460
+ )
461
+
462
+ def _score_message_importance(self, msg, index: int, total: int) -> float:
463
+ """Score message importance for context retention.
464
+
465
+ Higher scores = more important to keep in context.
466
+ """
467
+ score = 0.0
468
+ content = msg.content.lower()
469
+
470
+ # Recency bonus (0-3 points) - newer messages score higher
471
+ recency = (index / max(total, 1)) * 3
472
+ score += recency
473
+
474
+ # Code content bonus - small models need code context
475
+ if '```' in msg.content:
476
+ score += 2.0
477
+
478
+ # Error/fix context is valuable for debugging continuity
479
+ if any(w in content for w in ['error', 'fix', 'bug', 'issue', 'problem', 'traceback']):
480
+ score += 1.5
481
+
482
+ # User preferences/decisions should be remembered
483
+ if any(w in content for w in ['i want', 'i need', 'please', 'should be', 'must', 'don\'t']):
484
+ score += 1.0
485
+
486
+ # Technical specifications
487
+ if any(w in content for w in ['file:', 'path:', 'url:', 'config', 'setting', 'version']):
488
+ score += 1.0
489
+
490
+ return score
491
+
257
492
  def _build_history(self, conv, model: str) -> list[dict[str, str]]:
258
493
  context_limit = settings.CONTEXT_LIMITS.get(model, 6000)
259
494
  history: list[dict[str, str]] = []
@@ -281,29 +516,91 @@ class ChatService:
281
516
  history.append({"role": "system", "content": summary_msg})
282
517
  token_count += _estimate_tokens(summary_msg)
283
518
 
284
- # Add recent messages, working backwards to prioritize the latest
285
519
  messages = list(conv.messages)
286
- to_include = []
287
- for msg in reversed(messages):
288
- msg_tokens = _estimate_tokens(msg.content)
289
- if token_count + msg_tokens > context_limit:
520
+
521
+ # Get the last user message for few-shot and format injection
522
+ last_user_content = ""
523
+ for m in reversed(messages):
524
+ if m.role == "user":
525
+ last_user_content = m.content
290
526
  break
291
- to_include.append({"role": msg.role, "content": msg.content})
292
- token_count += msg_tokens
293
527
 
294
- # Reverse back to chronological order
295
- to_include.reverse()
528
+ # Inject few-shot examples for small models (based on query type)
529
+ few_shot = get_few_shot_examples(last_user_content, model)
530
+ if few_shot:
531
+ history.append({"role": "system", "content": few_shot})
532
+ token_count += _estimate_tokens(few_shot)
533
+
534
+ # Inject output format instructions for small models
535
+ format_instructions = get_output_format_instructions(last_user_content, model)
536
+ if format_instructions:
537
+ history.append({"role": "system", "content": format_instructions})
538
+ token_count += _estimate_tokens(format_instructions)
539
+
540
+ # Reserve budget for tool reminder (~40 tokens)
541
+ reminder_tokens = _estimate_tokens(self._TOOL_REMINDER)
542
+ message_budget = context_limit - token_count - reminder_tokens
543
+
544
+ # IMPORTANCE-WEIGHTED CONTEXT RETENTION
545
+ # Always include last 4 messages (current context)
546
+ must_include_count = min(4, len(messages))
547
+ must_include_indices = set(range(len(messages) - must_include_count, len(messages)))
548
+
549
+ # Score remaining messages by importance
550
+ scored_messages = [
551
+ (i, msg, self._score_message_importance(msg, i, len(messages)))
552
+ for i, msg in enumerate(messages)
553
+ if i not in must_include_indices
554
+ ]
555
+ scored_messages.sort(key=lambda x: x[2], reverse=True)
296
556
 
297
- # Ensure we at least include the very last message (the new user msg)
298
- if not to_include and messages:
299
- last = messages[-1]
300
- to_include = [{"role": last.role, "content": last.content}]
557
+ # Calculate budget used by must-include messages
558
+ selected_indices = must_include_indices.copy()
559
+ budget_used = sum(
560
+ _estimate_tokens(messages[i].content) for i in must_include_indices
561
+ )
562
+
563
+ # Add high-importance messages until budget exhausted
564
+ for i, msg, score in scored_messages:
565
+ msg_tokens = _estimate_tokens(msg.content)
566
+ if budget_used + msg_tokens > message_budget:
567
+ continue
568
+ selected_indices.add(i)
569
+ budget_used += msg_tokens
301
570
 
302
- history.extend(to_include)
571
+ # Build message list in chronological order
572
+ all_msgs = []
573
+ for i in sorted(selected_indices):
574
+ msg = messages[i]
575
+ content = msg.content
303
576
 
577
+ # Apply chain-of-thought to the last user message for small models
578
+ if i == len(messages) - 1 and msg.role == "user":
579
+ content = _inject_chain_of_thought(content, model)
580
+
581
+ all_msgs.append({"role": msg.role, "content": content})
582
+
583
+ # Ensure we at least include the very last message
584
+ if not all_msgs and messages:
585
+ last = messages[-1]
586
+ content = _inject_chain_of_thought(last.content, model) if last.role == "user" else last.content
587
+ all_msgs = [{"role": last.role, "content": content}]
588
+
589
+ # Inject tool reminder right before the final user message
590
+ # so it's fresh in the model's attention
591
+ if len(all_msgs) >= 1:
592
+ history.extend(all_msgs[:-1])
593
+ history.append({"role": "system", "content": self._TOOL_REMINDER})
594
+ history.append(all_msgs[-1])
595
+ else:
596
+ history.extend(all_msgs)
597
+
598
+ total_tokens = token_count + sum(
599
+ _estimate_tokens(m["content"]) for m in history if m.get("content")
600
+ )
304
601
  logger.debug(
305
602
  "Built history: %d msgs, ~%d tokens (limit %d)",
306
- len(history), token_count, context_limit,
603
+ len(history), total_tokens, context_limit,
307
604
  )
308
605
  return history
309
606
 
@@ -327,14 +624,16 @@ class ChatService:
327
624
  {
328
625
  "role": "system",
329
626
  "content": (
330
- "You are a summarization assistant. Condense the following "
331
- "conversation into a brief summary (2-4 sentences) that "
332
- "captures the key topics, decisions, and context. "
333
- "Focus on information the AI would need to continue "
334
- "the conversation coherently."
627
+ "Summarize this conversation in a structured format. "
628
+ "Use this exact format:\n"
629
+ "Topics: [comma-separated list of topics discussed]\n"
630
+ "Key facts: [any specific names, file paths, variables, configs, or technical details mentioned]\n"
631
+ "Decisions: [any decisions or preferences the user stated]\n"
632
+ "Context: [1-2 sentences of overall context needed to continue]\n"
633
+ "Be concise. Preserve technical details exactly."
335
634
  ),
336
635
  },
337
- {"role": "user", "content": f"Summarize this conversation:\n\n{old_text}"},
636
+ {"role": "user", "content": f"Summarize:\n\n{old_text}"},
338
637
  ]
339
638
 
340
639
  logger.info(
@@ -398,7 +697,14 @@ class ChatService:
398
697
  last_user_msg = m.content
399
698
  break
400
699
 
700
+ # Adaptive temperature based on the last user message
701
+ temperature = _adaptive_temperature(last_user_msg, model, temperature)
702
+
703
+ # Use simplified tools for small models
704
+ model_tools = _simplify_tools_for_model(TOOLS, model)
705
+
401
706
  full_response = ""
707
+ tool_result_text = None
402
708
  usage_data = None
403
709
  try:
404
710
  tool_calls_result = None
@@ -407,7 +713,7 @@ class ChatService:
407
713
  model=model,
408
714
  temperature=temperature,
409
715
  max_tokens=max_tokens,
410
- tools=TOOLS,
716
+ tools=model_tools,
411
717
  ):
412
718
  if isinstance(chunk, dict):
413
719
  if chunk.get("type") == "fallback":
@@ -431,24 +737,49 @@ class ChatService:
431
737
  continue
432
738
  results = await web_search(args.get("query", last_user_msg))
433
739
  result_text = format_search_results(results)
434
-
435
- history.append({
436
- "role": "assistant",
437
- "content": None,
438
- "tool_calls": [{
439
- "id": call["id"],
440
- "type": "function",
441
- "function": {
442
- "name": call["name"],
443
- "arguments": call["arguments"],
444
- },
445
- }],
446
- })
447
- history.append({
448
- "role": "tool",
449
- "tool_call_id": call["id"],
450
- "content": result_text or "No results found.",
451
- })
740
+ tool_result_text = result_text
741
+ elif call["name"] == "deep_search":
742
+ yield _sse({"type": "status", "content": "Reading documentation..."})
743
+ try:
744
+ args = json.loads(call["arguments"])
745
+ except json.JSONDecodeError:
746
+ logger.warning("Invalid tool call arguments: %s", call["arguments"])
747
+ continue
748
+ result_text = await deep_search(args.get("query", last_user_msg))
749
+ tool_result_text = result_text
750
+ logger.info("Tool call deep_search(%s) returned %d chars", args.get("query"), len(result_text))
751
+ elif call["name"] == "lookup_kairo_docs":
752
+ yield _sse({"type": "status", "content": "Looking up Kairo docs..."})
753
+ try:
754
+ args = json.loads(call["arguments"])
755
+ except json.JSONDecodeError:
756
+ logger.warning("Invalid tool call arguments: %s", call["arguments"])
757
+ continue
758
+ result_text = lookup_kairo_docs(args.get("topic", last_user_msg))
759
+ tool_result_text = result_text
760
+ logger.info("Tool call lookup_kairo_docs(%s) returned %d chars", args.get("topic"), len(result_text))
761
+ else:
762
+ logger.warning("Unknown tool call: %s", call["name"])
763
+ continue
764
+
765
+ # Append tool call + result to history for second LLM call
766
+ history.append({
767
+ "role": "assistant",
768
+ "content": None,
769
+ "tool_calls": [{
770
+ "id": call["id"],
771
+ "type": "function",
772
+ "function": {
773
+ "name": call["name"],
774
+ "arguments": call["arguments"],
775
+ },
776
+ }],
777
+ })
778
+ history.append({
779
+ "role": "tool",
780
+ "tool_call_id": call["id"],
781
+ "content": result_text or "No results found.",
782
+ })
452
783
 
453
784
  async for chunk in self.llm_service.stream_chat(
454
785
  messages=history,
@@ -469,6 +800,10 @@ class ChatService:
469
800
 
470
801
  full_response = _strip_duplicate_trailing_code(full_response)
471
802
 
803
+ # Post-process: validate against tool results
804
+ if tool_result_text:
805
+ full_response = validate_response(full_response, tool_result_text)
806
+
472
807
  if full_response:
473
808
  await self.conversation_service.add_message(conv.id, "assistant", full_response)
474
809