kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kairo/backend/api/agents.py +337 -16
- kairo/backend/app.py +84 -4
- kairo/backend/config.py +4 -2
- kairo/backend/models/agent.py +216 -2
- kairo/backend/models/api_key.py +4 -1
- kairo/backend/models/task.py +31 -0
- kairo/backend/models/user_provider_key.py +26 -0
- kairo/backend/schemas/agent.py +249 -2
- kairo/backend/schemas/api_key.py +3 -0
- kairo/backend/services/agent/__init__.py +52 -0
- kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
- kairo/backend/services/agent/agent_alerts_service.py +201 -0
- kairo/backend/services/agent/agent_commands_service.py +142 -0
- kairo/backend/services/agent/agent_crud_service.py +150 -0
- kairo/backend/services/agent/agent_events_service.py +103 -0
- kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
- kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
- kairo/backend/services/agent/agent_metrics_service.py +259 -0
- kairo/backend/services/agent/agent_service.py +315 -0
- kairo/backend/services/agent/agent_setup_service.py +180 -0
- kairo/backend/services/agent/constants.py +28 -0
- kairo/backend/services/agent_service.py +18 -102
- kairo/backend/services/api_key_service.py +23 -3
- kairo/backend/services/byok_service.py +204 -0
- kairo/backend/services/chat_service.py +398 -63
- kairo/backend/services/deep_search_service.py +159 -0
- kairo/backend/services/email_service.py +418 -19
- kairo/backend/services/few_shot_service.py +223 -0
- kairo/backend/services/post_processor.py +261 -0
- kairo/backend/services/rag_service.py +150 -0
- kairo/backend/services/task_service.py +119 -0
- kairo/backend/tests/__init__.py +1 -0
- kairo/backend/tests/e2e/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/conftest.py +389 -0
- kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
- kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
- kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
- kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
- kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
- kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
- kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
- kairo/migrations/versions/010_agent_dashboard.py +246 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
- kairo_migrations/env.py +92 -0
- kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -13,9 +13,22 @@ from backend.services.web_search_service import (
|
|
|
13
13
|
web_search,
|
|
14
14
|
format_search_results,
|
|
15
15
|
)
|
|
16
|
+
from backend.services.deep_search_service import deep_search
|
|
17
|
+
from backend.services.post_processor import validate_response
|
|
18
|
+
from backend.services.rag_service import lookup_kairo_docs
|
|
19
|
+
from backend.services.few_shot_service import get_few_shot_examples, get_output_format_instructions
|
|
16
20
|
|
|
17
21
|
logger = logging.getLogger(__name__)
|
|
18
22
|
|
|
23
|
+
|
|
24
|
+
# Signals that indicate a complex query benefiting from chain-of-thought
|
|
25
|
+
_COMPLEX_SIGNALS = frozenset([
|
|
26
|
+
'algorithm', 'optimize', 'debug', 'architecture', 'design pattern',
|
|
27
|
+
'trade-off', 'tradeoff', 'compare', 'pros and cons', 'best approach',
|
|
28
|
+
'refactor', 'performance', 'scale', 'security', 'why does', 'how should',
|
|
29
|
+
'what would happen', 'difference between', 'which is better',
|
|
30
|
+
])
|
|
31
|
+
|
|
19
32
|
WEB_SEARCH_TOOL = {
|
|
20
33
|
"type": "function",
|
|
21
34
|
"function": {
|
|
@@ -33,7 +46,154 @@ WEB_SEARCH_TOOL = {
|
|
|
33
46
|
},
|
|
34
47
|
},
|
|
35
48
|
}
|
|
36
|
-
|
|
49
|
+
|
|
50
|
+
DEEP_SEARCH_TOOL = {
|
|
51
|
+
"type": "function",
|
|
52
|
+
"function": {
|
|
53
|
+
"name": "deep_search",
|
|
54
|
+
"description": (
|
|
55
|
+
"Search the web AND read the actual page content from top results. "
|
|
56
|
+
"Use this when you need detailed documentation, API references, code examples, "
|
|
57
|
+
"or technical specifications. This fetches real page content, not just snippets. "
|
|
58
|
+
"Prefer this over web_search when the user needs accurate technical details."
|
|
59
|
+
),
|
|
60
|
+
"parameters": {
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {
|
|
63
|
+
"query": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"description": "The search query — be specific, e.g. 'TheSportsDB API documentation endpoints'",
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"required": ["query"],
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
KAIRO_DOCS_TOOL = {
|
|
74
|
+
"type": "function",
|
|
75
|
+
"function": {
|
|
76
|
+
"name": "lookup_kairo_docs",
|
|
77
|
+
"description": (
|
|
78
|
+
"Look up Kairo API documentation. Use this when the user asks about "
|
|
79
|
+
"the Kairo API, how to use the Kairo API, Kairo endpoints, API keys, "
|
|
80
|
+
"or wants to write code that integrates with Kairo/Kairon Labs. "
|
|
81
|
+
"This returns accurate, internal documentation."
|
|
82
|
+
),
|
|
83
|
+
"parameters": {
|
|
84
|
+
"type": "object",
|
|
85
|
+
"properties": {
|
|
86
|
+
"topic": {
|
|
87
|
+
"type": "string",
|
|
88
|
+
"description": "What to look up, e.g. 'chat completions', 'authentication', 'python sdk', 'streaming'",
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
"required": ["topic"],
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
}
|
|
95
|
+
TOOLS = [WEB_SEARCH_TOOL, DEEP_SEARCH_TOOL, KAIRO_DOCS_TOOL]
|
|
96
|
+
|
|
97
|
+
_CODE_SIGNALS = frozenset([
|
|
98
|
+
'code', 'function', 'api', 'endpoint', 'implement', 'debug', 'error',
|
|
99
|
+
'fix', 'python', 'javascript', 'typescript', 'sql', 'html', 'css',
|
|
100
|
+
'write a script', 'write a program', 'how to use', 'integrate', 'bug',
|
|
101
|
+
'class', 'method', 'variable', 'compile', 'runtime', 'import', 'package',
|
|
102
|
+
'npm', 'pip', 'docker', 'server', 'database', 'query', 'regex',
|
|
103
|
+
])
|
|
104
|
+
_FACTUAL_SIGNALS = frozenset([
|
|
105
|
+
'what is', 'who is', 'when did', 'how many', 'explain', 'define',
|
|
106
|
+
'compare', 'difference between', 'how does', 'why does', 'list',
|
|
107
|
+
])
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _adaptive_temperature(message: str, model: str, default: float = 0.7) -> float:
|
|
111
|
+
"""Select temperature based on query type. Lower = more precise."""
|
|
112
|
+
msg_lower = message.lower()
|
|
113
|
+
if any(s in msg_lower for s in _CODE_SIGNALS):
|
|
114
|
+
return 0.2 if model == "nyx-lite" else 0.3
|
|
115
|
+
if any(s in msg_lower for s in _FACTUAL_SIGNALS):
|
|
116
|
+
return 0.3 if model == "nyx-lite" else 0.4
|
|
117
|
+
# General default — slightly lower for the smaller model
|
|
118
|
+
return 0.5 if model == "nyx-lite" else default
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _needs_chain_of_thought(message: str) -> bool:
|
|
122
|
+
"""Detect if query would benefit from explicit reasoning."""
|
|
123
|
+
msg_lower = message.lower()
|
|
124
|
+
return any(s in msg_lower for s in _COMPLEX_SIGNALS)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _inject_chain_of_thought(message: str, model: str) -> str:
|
|
128
|
+
"""Wrap complex queries with reasoning instruction for small models."""
|
|
129
|
+
if model != "nyx-lite":
|
|
130
|
+
return message
|
|
131
|
+
if not _needs_chain_of_thought(message):
|
|
132
|
+
return message
|
|
133
|
+
|
|
134
|
+
return f"""Think through this step by step:
|
|
135
|
+
1. Understand what is being asked
|
|
136
|
+
2. Consider the key factors
|
|
137
|
+
3. Reason through the options
|
|
138
|
+
4. Provide your answer
|
|
139
|
+
|
|
140
|
+
Question: {message}"""
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _simplify_tools_for_model(tools: list[dict], model: str) -> list[dict]:
|
|
144
|
+
"""Simplify tool definitions for small models.
|
|
145
|
+
|
|
146
|
+
Small models work better with:
|
|
147
|
+
- Shorter descriptions
|
|
148
|
+
- Only required parameters
|
|
149
|
+
- Usage examples in descriptions
|
|
150
|
+
"""
|
|
151
|
+
if model != "nyx-lite":
|
|
152
|
+
return tools
|
|
153
|
+
|
|
154
|
+
simplified = []
|
|
155
|
+
for tool in tools:
|
|
156
|
+
func = tool["function"]
|
|
157
|
+
params = func["parameters"]
|
|
158
|
+
|
|
159
|
+
# Build simplified tool with example in description
|
|
160
|
+
name = func["name"]
|
|
161
|
+
desc = func["description"]
|
|
162
|
+
|
|
163
|
+
# Add usage example to description
|
|
164
|
+
if name == "web_search":
|
|
165
|
+
desc = 'Search web for current info. Example: {"query": "Python 3.12 new features"}'
|
|
166
|
+
elif name == "deep_search":
|
|
167
|
+
desc = 'Search AND read page content. Example: {"query": "FastAPI OAuth2 tutorial"}'
|
|
168
|
+
elif name == "lookup_kairo_docs":
|
|
169
|
+
desc = 'Look up Kairo API docs. Example: {"topic": "authentication"}'
|
|
170
|
+
|
|
171
|
+
simple_tool = {
|
|
172
|
+
"type": "function",
|
|
173
|
+
"function": {
|
|
174
|
+
"name": name,
|
|
175
|
+
"description": desc,
|
|
176
|
+
"parameters": {
|
|
177
|
+
"type": "object",
|
|
178
|
+
"properties": {},
|
|
179
|
+
"required": params.get("required", []),
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# Only include required parameters with shortened descriptions
|
|
185
|
+
for key in params.get("required", []):
|
|
186
|
+
if key in params.get("properties", {}):
|
|
187
|
+
prop = params["properties"][key]
|
|
188
|
+
simple_tool["function"]["parameters"]["properties"][key] = {
|
|
189
|
+
"type": prop.get("type", "string"),
|
|
190
|
+
"description": prop.get("description", "")[:80],
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
simplified.append(simple_tool)
|
|
194
|
+
|
|
195
|
+
logger.debug("Simplified %d tools for small model", len(simplified))
|
|
196
|
+
return simplified
|
|
37
197
|
|
|
38
198
|
|
|
39
199
|
def _estimate_tokens(text: str) -> int:
|
|
@@ -142,13 +302,20 @@ class ChatService:
|
|
|
142
302
|
# Build context-aware history
|
|
143
303
|
history = self._build_history(conv, model)
|
|
144
304
|
|
|
305
|
+
# Adaptive temperature: override default 0.7 based on query type
|
|
306
|
+
temperature = _adaptive_temperature(message, model, temperature)
|
|
307
|
+
|
|
145
308
|
logger.info(
|
|
146
|
-
"Streaming conv=%s model=%s history_msgs=%d",
|
|
147
|
-
conv.id, model, len(history),
|
|
309
|
+
"Streaming conv=%s model=%s history_msgs=%d temp=%.2f",
|
|
310
|
+
conv.id, model, len(history), temperature,
|
|
148
311
|
)
|
|
149
312
|
|
|
150
313
|
# Stream from LLM with tool calling support
|
|
314
|
+
# Use simplified tools for small models
|
|
315
|
+
model_tools = _simplify_tools_for_model(TOOLS, model)
|
|
316
|
+
|
|
151
317
|
full_response = ""
|
|
318
|
+
tool_result_text = None
|
|
152
319
|
usage_data = None
|
|
153
320
|
try:
|
|
154
321
|
tool_calls_result = None
|
|
@@ -157,7 +324,7 @@ class ChatService:
|
|
|
157
324
|
model=model,
|
|
158
325
|
temperature=temperature,
|
|
159
326
|
max_tokens=max_tokens,
|
|
160
|
-
tools=
|
|
327
|
+
tools=model_tools,
|
|
161
328
|
):
|
|
162
329
|
if isinstance(chunk, dict):
|
|
163
330
|
if chunk.get("type") == "fallback":
|
|
@@ -182,26 +349,50 @@ class ChatService:
|
|
|
182
349
|
continue
|
|
183
350
|
results = await web_search(args.get("query", message))
|
|
184
351
|
result_text = format_search_results(results)
|
|
352
|
+
tool_result_text = result_text
|
|
185
353
|
logger.info("Tool call web_search(%s) returned %d results", args.get("query"), len(results))
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
"
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
"
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
354
|
+
elif call["name"] == "deep_search":
|
|
355
|
+
yield _sse({"type": "status", "content": "Reading documentation..."})
|
|
356
|
+
try:
|
|
357
|
+
args = json.loads(call["arguments"])
|
|
358
|
+
except json.JSONDecodeError:
|
|
359
|
+
logger.warning("Invalid tool call arguments: %s", call["arguments"])
|
|
360
|
+
continue
|
|
361
|
+
result_text = await deep_search(args.get("query", message))
|
|
362
|
+
tool_result_text = result_text
|
|
363
|
+
logger.info("Tool call deep_search(%s) returned %d chars", args.get("query"), len(result_text))
|
|
364
|
+
elif call["name"] == "lookup_kairo_docs":
|
|
365
|
+
yield _sse({"type": "status", "content": "Looking up Kairo docs..."})
|
|
366
|
+
try:
|
|
367
|
+
args = json.loads(call["arguments"])
|
|
368
|
+
except json.JSONDecodeError:
|
|
369
|
+
logger.warning("Invalid tool call arguments: %s", call["arguments"])
|
|
370
|
+
continue
|
|
371
|
+
result_text = lookup_kairo_docs(args.get("topic", message))
|
|
372
|
+
tool_result_text = result_text
|
|
373
|
+
logger.info("Tool call lookup_kairo_docs(%s) returned %d chars", args.get("topic"), len(result_text))
|
|
374
|
+
else:
|
|
375
|
+
logger.warning("Unknown tool call: %s", call["name"])
|
|
376
|
+
continue
|
|
377
|
+
|
|
378
|
+
# Append tool call + result to history for second LLM call
|
|
379
|
+
history.append({
|
|
380
|
+
"role": "assistant",
|
|
381
|
+
"content": None,
|
|
382
|
+
"tool_calls": [{
|
|
383
|
+
"id": call["id"],
|
|
384
|
+
"type": "function",
|
|
385
|
+
"function": {
|
|
386
|
+
"name": call["name"],
|
|
387
|
+
"arguments": call["arguments"],
|
|
388
|
+
},
|
|
389
|
+
}],
|
|
390
|
+
})
|
|
391
|
+
history.append({
|
|
392
|
+
"role": "tool",
|
|
393
|
+
"tool_call_id": call["id"],
|
|
394
|
+
"content": result_text or "No results found.",
|
|
395
|
+
})
|
|
205
396
|
|
|
206
397
|
# Second LLM call — no tools, generate final response using search results
|
|
207
398
|
async for chunk in self.llm_service.stream_chat(
|
|
@@ -224,6 +415,10 @@ class ChatService:
|
|
|
224
415
|
# Post-process: strip duplicate trailing code blocks
|
|
225
416
|
full_response = _strip_duplicate_trailing_code(full_response)
|
|
226
417
|
|
|
418
|
+
# Post-process: validate against tool results
|
|
419
|
+
if tool_result_text:
|
|
420
|
+
full_response = validate_response(full_response, tool_result_text)
|
|
421
|
+
|
|
227
422
|
# Save assistant response
|
|
228
423
|
if full_response:
|
|
229
424
|
await self.conversation_service.add_message(conv.id, "assistant", full_response)
|
|
@@ -254,6 +449,46 @@ class ChatService:
|
|
|
254
449
|
|
|
255
450
|
yield "data: [DONE]\n\n"
|
|
256
451
|
|
|
452
|
+
# Short reminder injected before the last user message so the model
|
|
453
|
+
# doesn't forget it has tools available in longer conversations.
|
|
454
|
+
_TOOL_REMINDER = (
|
|
455
|
+
"[You have tools: web_search (quick facts), "
|
|
456
|
+
"deep_search (reads actual web pages — use for APIs, docs, code), "
|
|
457
|
+
"and lookup_kairo_docs (Kairo API docs). "
|
|
458
|
+
"Use deep_search for any technical or documentation question. "
|
|
459
|
+
"Use lookup_kairo_docs for anything about the Kairo API.]"
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
def _score_message_importance(self, msg, index: int, total: int) -> float:
|
|
463
|
+
"""Score message importance for context retention.
|
|
464
|
+
|
|
465
|
+
Higher scores = more important to keep in context.
|
|
466
|
+
"""
|
|
467
|
+
score = 0.0
|
|
468
|
+
content = msg.content.lower()
|
|
469
|
+
|
|
470
|
+
# Recency bonus (0-3 points) - newer messages score higher
|
|
471
|
+
recency = (index / max(total, 1)) * 3
|
|
472
|
+
score += recency
|
|
473
|
+
|
|
474
|
+
# Code content bonus - small models need code context
|
|
475
|
+
if '```' in msg.content:
|
|
476
|
+
score += 2.0
|
|
477
|
+
|
|
478
|
+
# Error/fix context is valuable for debugging continuity
|
|
479
|
+
if any(w in content for w in ['error', 'fix', 'bug', 'issue', 'problem', 'traceback']):
|
|
480
|
+
score += 1.5
|
|
481
|
+
|
|
482
|
+
# User preferences/decisions should be remembered
|
|
483
|
+
if any(w in content for w in ['i want', 'i need', 'please', 'should be', 'must', 'don\'t']):
|
|
484
|
+
score += 1.0
|
|
485
|
+
|
|
486
|
+
# Technical specifications
|
|
487
|
+
if any(w in content for w in ['file:', 'path:', 'url:', 'config', 'setting', 'version']):
|
|
488
|
+
score += 1.0
|
|
489
|
+
|
|
490
|
+
return score
|
|
491
|
+
|
|
257
492
|
def _build_history(self, conv, model: str) -> list[dict[str, str]]:
|
|
258
493
|
context_limit = settings.CONTEXT_LIMITS.get(model, 6000)
|
|
259
494
|
history: list[dict[str, str]] = []
|
|
@@ -281,29 +516,91 @@ class ChatService:
|
|
|
281
516
|
history.append({"role": "system", "content": summary_msg})
|
|
282
517
|
token_count += _estimate_tokens(summary_msg)
|
|
283
518
|
|
|
284
|
-
# Add recent messages, working backwards to prioritize the latest
|
|
285
519
|
messages = list(conv.messages)
|
|
286
|
-
|
|
287
|
-
for
|
|
288
|
-
|
|
289
|
-
|
|
520
|
+
|
|
521
|
+
# Get the last user message for few-shot and format injection
|
|
522
|
+
last_user_content = ""
|
|
523
|
+
for m in reversed(messages):
|
|
524
|
+
if m.role == "user":
|
|
525
|
+
last_user_content = m.content
|
|
290
526
|
break
|
|
291
|
-
to_include.append({"role": msg.role, "content": msg.content})
|
|
292
|
-
token_count += msg_tokens
|
|
293
527
|
|
|
294
|
-
#
|
|
295
|
-
|
|
528
|
+
# Inject few-shot examples for small models (based on query type)
|
|
529
|
+
few_shot = get_few_shot_examples(last_user_content, model)
|
|
530
|
+
if few_shot:
|
|
531
|
+
history.append({"role": "system", "content": few_shot})
|
|
532
|
+
token_count += _estimate_tokens(few_shot)
|
|
533
|
+
|
|
534
|
+
# Inject output format instructions for small models
|
|
535
|
+
format_instructions = get_output_format_instructions(last_user_content, model)
|
|
536
|
+
if format_instructions:
|
|
537
|
+
history.append({"role": "system", "content": format_instructions})
|
|
538
|
+
token_count += _estimate_tokens(format_instructions)
|
|
539
|
+
|
|
540
|
+
# Reserve budget for tool reminder (~40 tokens)
|
|
541
|
+
reminder_tokens = _estimate_tokens(self._TOOL_REMINDER)
|
|
542
|
+
message_budget = context_limit - token_count - reminder_tokens
|
|
543
|
+
|
|
544
|
+
# IMPORTANCE-WEIGHTED CONTEXT RETENTION
|
|
545
|
+
# Always include last 4 messages (current context)
|
|
546
|
+
must_include_count = min(4, len(messages))
|
|
547
|
+
must_include_indices = set(range(len(messages) - must_include_count, len(messages)))
|
|
548
|
+
|
|
549
|
+
# Score remaining messages by importance
|
|
550
|
+
scored_messages = [
|
|
551
|
+
(i, msg, self._score_message_importance(msg, i, len(messages)))
|
|
552
|
+
for i, msg in enumerate(messages)
|
|
553
|
+
if i not in must_include_indices
|
|
554
|
+
]
|
|
555
|
+
scored_messages.sort(key=lambda x: x[2], reverse=True)
|
|
296
556
|
|
|
297
|
-
#
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
557
|
+
# Calculate budget used by must-include messages
|
|
558
|
+
selected_indices = must_include_indices.copy()
|
|
559
|
+
budget_used = sum(
|
|
560
|
+
_estimate_tokens(messages[i].content) for i in must_include_indices
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
# Add high-importance messages until budget exhausted
|
|
564
|
+
for i, msg, score in scored_messages:
|
|
565
|
+
msg_tokens = _estimate_tokens(msg.content)
|
|
566
|
+
if budget_used + msg_tokens > message_budget:
|
|
567
|
+
continue
|
|
568
|
+
selected_indices.add(i)
|
|
569
|
+
budget_used += msg_tokens
|
|
301
570
|
|
|
302
|
-
|
|
571
|
+
# Build message list in chronological order
|
|
572
|
+
all_msgs = []
|
|
573
|
+
for i in sorted(selected_indices):
|
|
574
|
+
msg = messages[i]
|
|
575
|
+
content = msg.content
|
|
303
576
|
|
|
577
|
+
# Apply chain-of-thought to the last user message for small models
|
|
578
|
+
if i == len(messages) - 1 and msg.role == "user":
|
|
579
|
+
content = _inject_chain_of_thought(content, model)
|
|
580
|
+
|
|
581
|
+
all_msgs.append({"role": msg.role, "content": content})
|
|
582
|
+
|
|
583
|
+
# Ensure we at least include the very last message
|
|
584
|
+
if not all_msgs and messages:
|
|
585
|
+
last = messages[-1]
|
|
586
|
+
content = _inject_chain_of_thought(last.content, model) if last.role == "user" else last.content
|
|
587
|
+
all_msgs = [{"role": last.role, "content": content}]
|
|
588
|
+
|
|
589
|
+
# Inject tool reminder right before the final user message
|
|
590
|
+
# so it's fresh in the model's attention
|
|
591
|
+
if len(all_msgs) >= 1:
|
|
592
|
+
history.extend(all_msgs[:-1])
|
|
593
|
+
history.append({"role": "system", "content": self._TOOL_REMINDER})
|
|
594
|
+
history.append(all_msgs[-1])
|
|
595
|
+
else:
|
|
596
|
+
history.extend(all_msgs)
|
|
597
|
+
|
|
598
|
+
total_tokens = token_count + sum(
|
|
599
|
+
_estimate_tokens(m["content"]) for m in history if m.get("content")
|
|
600
|
+
)
|
|
304
601
|
logger.debug(
|
|
305
602
|
"Built history: %d msgs, ~%d tokens (limit %d)",
|
|
306
|
-
len(history),
|
|
603
|
+
len(history), total_tokens, context_limit,
|
|
307
604
|
)
|
|
308
605
|
return history
|
|
309
606
|
|
|
@@ -327,14 +624,16 @@ class ChatService:
|
|
|
327
624
|
{
|
|
328
625
|
"role": "system",
|
|
329
626
|
"content": (
|
|
330
|
-
"
|
|
331
|
-
"
|
|
332
|
-
"
|
|
333
|
-
"
|
|
334
|
-
"the
|
|
627
|
+
"Summarize this conversation in a structured format. "
|
|
628
|
+
"Use this exact format:\n"
|
|
629
|
+
"Topics: [comma-separated list of topics discussed]\n"
|
|
630
|
+
"Key facts: [any specific names, file paths, variables, configs, or technical details mentioned]\n"
|
|
631
|
+
"Decisions: [any decisions or preferences the user stated]\n"
|
|
632
|
+
"Context: [1-2 sentences of overall context needed to continue]\n"
|
|
633
|
+
"Be concise. Preserve technical details exactly."
|
|
335
634
|
),
|
|
336
635
|
},
|
|
337
|
-
{"role": "user", "content": f"Summarize
|
|
636
|
+
{"role": "user", "content": f"Summarize:\n\n{old_text}"},
|
|
338
637
|
]
|
|
339
638
|
|
|
340
639
|
logger.info(
|
|
@@ -398,7 +697,14 @@ class ChatService:
|
|
|
398
697
|
last_user_msg = m.content
|
|
399
698
|
break
|
|
400
699
|
|
|
700
|
+
# Adaptive temperature based on the last user message
|
|
701
|
+
temperature = _adaptive_temperature(last_user_msg, model, temperature)
|
|
702
|
+
|
|
703
|
+
# Use simplified tools for small models
|
|
704
|
+
model_tools = _simplify_tools_for_model(TOOLS, model)
|
|
705
|
+
|
|
401
706
|
full_response = ""
|
|
707
|
+
tool_result_text = None
|
|
402
708
|
usage_data = None
|
|
403
709
|
try:
|
|
404
710
|
tool_calls_result = None
|
|
@@ -407,7 +713,7 @@ class ChatService:
|
|
|
407
713
|
model=model,
|
|
408
714
|
temperature=temperature,
|
|
409
715
|
max_tokens=max_tokens,
|
|
410
|
-
tools=
|
|
716
|
+
tools=model_tools,
|
|
411
717
|
):
|
|
412
718
|
if isinstance(chunk, dict):
|
|
413
719
|
if chunk.get("type") == "fallback":
|
|
@@ -431,24 +737,49 @@ class ChatService:
|
|
|
431
737
|
continue
|
|
432
738
|
results = await web_search(args.get("query", last_user_msg))
|
|
433
739
|
result_text = format_search_results(results)
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
"
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
})
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
"
|
|
451
|
-
|
|
740
|
+
tool_result_text = result_text
|
|
741
|
+
elif call["name"] == "deep_search":
|
|
742
|
+
yield _sse({"type": "status", "content": "Reading documentation..."})
|
|
743
|
+
try:
|
|
744
|
+
args = json.loads(call["arguments"])
|
|
745
|
+
except json.JSONDecodeError:
|
|
746
|
+
logger.warning("Invalid tool call arguments: %s", call["arguments"])
|
|
747
|
+
continue
|
|
748
|
+
result_text = await deep_search(args.get("query", last_user_msg))
|
|
749
|
+
tool_result_text = result_text
|
|
750
|
+
logger.info("Tool call deep_search(%s) returned %d chars", args.get("query"), len(result_text))
|
|
751
|
+
elif call["name"] == "lookup_kairo_docs":
|
|
752
|
+
yield _sse({"type": "status", "content": "Looking up Kairo docs..."})
|
|
753
|
+
try:
|
|
754
|
+
args = json.loads(call["arguments"])
|
|
755
|
+
except json.JSONDecodeError:
|
|
756
|
+
logger.warning("Invalid tool call arguments: %s", call["arguments"])
|
|
757
|
+
continue
|
|
758
|
+
result_text = lookup_kairo_docs(args.get("topic", last_user_msg))
|
|
759
|
+
tool_result_text = result_text
|
|
760
|
+
logger.info("Tool call lookup_kairo_docs(%s) returned %d chars", args.get("topic"), len(result_text))
|
|
761
|
+
else:
|
|
762
|
+
logger.warning("Unknown tool call: %s", call["name"])
|
|
763
|
+
continue
|
|
764
|
+
|
|
765
|
+
# Append tool call + result to history for second LLM call
|
|
766
|
+
history.append({
|
|
767
|
+
"role": "assistant",
|
|
768
|
+
"content": None,
|
|
769
|
+
"tool_calls": [{
|
|
770
|
+
"id": call["id"],
|
|
771
|
+
"type": "function",
|
|
772
|
+
"function": {
|
|
773
|
+
"name": call["name"],
|
|
774
|
+
"arguments": call["arguments"],
|
|
775
|
+
},
|
|
776
|
+
}],
|
|
777
|
+
})
|
|
778
|
+
history.append({
|
|
779
|
+
"role": "tool",
|
|
780
|
+
"tool_call_id": call["id"],
|
|
781
|
+
"content": result_text or "No results found.",
|
|
782
|
+
})
|
|
452
783
|
|
|
453
784
|
async for chunk in self.llm_service.stream_chat(
|
|
454
785
|
messages=history,
|
|
@@ -469,6 +800,10 @@ class ChatService:
|
|
|
469
800
|
|
|
470
801
|
full_response = _strip_duplicate_trailing_code(full_response)
|
|
471
802
|
|
|
803
|
+
# Post-process: validate against tool results
|
|
804
|
+
if tool_result_text:
|
|
805
|
+
full_response = validate_response(full_response, tool_result_text)
|
|
806
|
+
|
|
472
807
|
if full_response:
|
|
473
808
|
await self.conversation_service.add_message(conv.id, "assistant", full_response)
|
|
474
809
|
|