kairo-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. image-service/main.py +178 -0
  2. infra/chat/app/main.py +84 -0
  3. kairo/backend/__init__.py +0 -0
  4. kairo/backend/api/__init__.py +0 -0
  5. kairo/backend/api/admin/__init__.py +23 -0
  6. kairo/backend/api/admin/audit.py +54 -0
  7. kairo/backend/api/admin/content.py +142 -0
  8. kairo/backend/api/admin/incidents.py +148 -0
  9. kairo/backend/api/admin/stats.py +125 -0
  10. kairo/backend/api/admin/system.py +87 -0
  11. kairo/backend/api/admin/users.py +279 -0
  12. kairo/backend/api/agents.py +94 -0
  13. kairo/backend/api/api_keys.py +85 -0
  14. kairo/backend/api/auth.py +116 -0
  15. kairo/backend/api/billing.py +41 -0
  16. kairo/backend/api/chat.py +72 -0
  17. kairo/backend/api/conversations.py +125 -0
  18. kairo/backend/api/device_auth.py +100 -0
  19. kairo/backend/api/files.py +83 -0
  20. kairo/backend/api/health.py +36 -0
  21. kairo/backend/api/images.py +80 -0
  22. kairo/backend/api/openai_compat.py +225 -0
  23. kairo/backend/api/projects.py +102 -0
  24. kairo/backend/api/usage.py +32 -0
  25. kairo/backend/api/webhooks.py +79 -0
  26. kairo/backend/app.py +297 -0
  27. kairo/backend/config.py +179 -0
  28. kairo/backend/core/__init__.py +0 -0
  29. kairo/backend/core/admin_auth.py +24 -0
  30. kairo/backend/core/api_key_auth.py +55 -0
  31. kairo/backend/core/database.py +28 -0
  32. kairo/backend/core/dependencies.py +70 -0
  33. kairo/backend/core/logging.py +23 -0
  34. kairo/backend/core/rate_limit.py +73 -0
  35. kairo/backend/core/security.py +29 -0
  36. kairo/backend/models/__init__.py +19 -0
  37. kairo/backend/models/agent.py +30 -0
  38. kairo/backend/models/api_key.py +25 -0
  39. kairo/backend/models/api_usage.py +29 -0
  40. kairo/backend/models/audit_log.py +26 -0
  41. kairo/backend/models/conversation.py +48 -0
  42. kairo/backend/models/device_code.py +30 -0
  43. kairo/backend/models/feature_flag.py +21 -0
  44. kairo/backend/models/image_generation.py +24 -0
  45. kairo/backend/models/incident.py +28 -0
  46. kairo/backend/models/project.py +28 -0
  47. kairo/backend/models/uptime_record.py +24 -0
  48. kairo/backend/models/usage.py +24 -0
  49. kairo/backend/models/user.py +49 -0
  50. kairo/backend/schemas/__init__.py +0 -0
  51. kairo/backend/schemas/admin/__init__.py +0 -0
  52. kairo/backend/schemas/admin/audit.py +28 -0
  53. kairo/backend/schemas/admin/content.py +53 -0
  54. kairo/backend/schemas/admin/stats.py +77 -0
  55. kairo/backend/schemas/admin/system.py +44 -0
  56. kairo/backend/schemas/admin/users.py +48 -0
  57. kairo/backend/schemas/agent.py +42 -0
  58. kairo/backend/schemas/api_key.py +30 -0
  59. kairo/backend/schemas/auth.py +57 -0
  60. kairo/backend/schemas/chat.py +26 -0
  61. kairo/backend/schemas/conversation.py +39 -0
  62. kairo/backend/schemas/device_auth.py +40 -0
  63. kairo/backend/schemas/image.py +15 -0
  64. kairo/backend/schemas/openai_compat.py +76 -0
  65. kairo/backend/schemas/project.py +21 -0
  66. kairo/backend/schemas/status.py +81 -0
  67. kairo/backend/schemas/usage.py +15 -0
  68. kairo/backend/services/__init__.py +0 -0
  69. kairo/backend/services/admin/__init__.py +0 -0
  70. kairo/backend/services/admin/audit_service.py +78 -0
  71. kairo/backend/services/admin/content_service.py +119 -0
  72. kairo/backend/services/admin/incident_service.py +94 -0
  73. kairo/backend/services/admin/stats_service.py +281 -0
  74. kairo/backend/services/admin/system_service.py +126 -0
  75. kairo/backend/services/admin/user_service.py +157 -0
  76. kairo/backend/services/agent_service.py +107 -0
  77. kairo/backend/services/api_key_service.py +66 -0
  78. kairo/backend/services/api_usage_service.py +126 -0
  79. kairo/backend/services/auth_service.py +101 -0
  80. kairo/backend/services/chat_service.py +501 -0
  81. kairo/backend/services/conversation_service.py +264 -0
  82. kairo/backend/services/device_auth_service.py +193 -0
  83. kairo/backend/services/email_service.py +55 -0
  84. kairo/backend/services/image_service.py +181 -0
  85. kairo/backend/services/llm_service.py +186 -0
  86. kairo/backend/services/project_service.py +109 -0
  87. kairo/backend/services/status_service.py +167 -0
  88. kairo/backend/services/stripe_service.py +78 -0
  89. kairo/backend/services/usage_service.py +150 -0
  90. kairo/backend/services/web_search_service.py +96 -0
  91. kairo/migrations/env.py +60 -0
  92. kairo/migrations/versions/001_initial.py +55 -0
  93. kairo/migrations/versions/002_usage_tracking_and_indexes.py +66 -0
  94. kairo/migrations/versions/003_username_to_email.py +21 -0
  95. kairo/migrations/versions/004_add_plans_and_verification.py +67 -0
  96. kairo/migrations/versions/005_add_projects.py +52 -0
  97. kairo/migrations/versions/006_add_image_generation.py +63 -0
  98. kairo/migrations/versions/007_add_admin_portal.py +107 -0
  99. kairo/migrations/versions/008_add_device_code_auth.py +76 -0
  100. kairo/migrations/versions/009_add_status_page.py +65 -0
  101. kairo/tools/extract_claude_data.py +465 -0
  102. kairo/tools/filter_claude_data.py +303 -0
  103. kairo/tools/generate_curated_data.py +157 -0
  104. kairo/tools/mix_training_data.py +295 -0
  105. kairo_code/__init__.py +3 -0
  106. kairo_code/agents/__init__.py +25 -0
  107. kairo_code/agents/architect.py +98 -0
  108. kairo_code/agents/audit.py +100 -0
  109. kairo_code/agents/base.py +463 -0
  110. kairo_code/agents/coder.py +155 -0
  111. kairo_code/agents/database.py +77 -0
  112. kairo_code/agents/docs.py +88 -0
  113. kairo_code/agents/explorer.py +62 -0
  114. kairo_code/agents/guardian.py +80 -0
  115. kairo_code/agents/planner.py +66 -0
  116. kairo_code/agents/reviewer.py +91 -0
  117. kairo_code/agents/security.py +94 -0
  118. kairo_code/agents/terraform.py +88 -0
  119. kairo_code/agents/testing.py +97 -0
  120. kairo_code/agents/uiux.py +88 -0
  121. kairo_code/auth.py +232 -0
  122. kairo_code/config.py +172 -0
  123. kairo_code/conversation.py +173 -0
  124. kairo_code/heartbeat.py +63 -0
  125. kairo_code/llm.py +291 -0
  126. kairo_code/logging_config.py +156 -0
  127. kairo_code/main.py +818 -0
  128. kairo_code/router.py +217 -0
  129. kairo_code/sandbox.py +248 -0
  130. kairo_code/settings.py +183 -0
  131. kairo_code/tools/__init__.py +51 -0
  132. kairo_code/tools/analysis.py +509 -0
  133. kairo_code/tools/base.py +417 -0
  134. kairo_code/tools/code.py +58 -0
  135. kairo_code/tools/definitions.py +617 -0
  136. kairo_code/tools/files.py +315 -0
  137. kairo_code/tools/review.py +390 -0
  138. kairo_code/tools/search.py +185 -0
  139. kairo_code/ui.py +418 -0
  140. kairo_code-0.1.0.dist-info/METADATA +13 -0
  141. kairo_code-0.1.0.dist-info/RECORD +144 -0
  142. kairo_code-0.1.0.dist-info/WHEEL +5 -0
  143. kairo_code-0.1.0.dist-info/entry_points.txt +2 -0
  144. kairo_code-0.1.0.dist-info/top_level.txt +4 -0
@@ -0,0 +1,501 @@
1
+ import json
2
+ import logging
3
+ from collections.abc import AsyncGenerator
4
+ from datetime import date, datetime, timezone
5
+
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+ from sqlalchemy.orm import selectinload
8
+
9
+ from backend.config import settings
10
+ from backend.services.conversation_service import ConversationService
11
+ from backend.services.llm_service import LLMService
12
+ from backend.services.web_search_service import (
13
+ web_search,
14
+ format_search_results,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ WEB_SEARCH_TOOL = {
20
+ "type": "function",
21
+ "function": {
22
+ "name": "web_search",
23
+ "description": "Search the web for current information, recent events, or to verify facts you are unsure about.",
24
+ "parameters": {
25
+ "type": "object",
26
+ "properties": {
27
+ "query": {
28
+ "type": "string",
29
+ "description": "The search query",
30
+ }
31
+ },
32
+ "required": ["query"],
33
+ },
34
+ },
35
+ }
36
+ TOOLS = [WEB_SEARCH_TOOL]
37
+
38
+
39
+ def _estimate_tokens(text: str) -> int:
40
+ """Rough token estimate: ~4 chars per token for English text."""
41
+ return len(text) // 4
42
+
43
+
44
+ def _strip_duplicate_trailing_code(text: str) -> str:
45
+ """Remove raw code duplicated at the end of a response.
46
+
47
+ Quantized models sometimes output a fenced code block and then repeat the
48
+ same code as unformatted text at the bottom. This detects the pattern and
49
+ strips the trailing duplicate.
50
+ """
51
+ import re as _re
52
+
53
+ # Find all fenced code blocks
54
+ fence_pattern = _re.compile(r"```[\w]*\n(.*?)```", _re.DOTALL)
55
+ blocks = fence_pattern.findall(text)
56
+ if not blocks:
57
+ return text
58
+
59
+ # Check if the text after the last fence contains a duplicate of any block
60
+ last_fence_end = text.rfind("```")
61
+ if last_fence_end < 0:
62
+ return text
63
+ # Move past the closing ```
64
+ tail_start = last_fence_end + 3
65
+ tail = text[tail_start:].strip()
66
+ if not tail or len(tail) < 20:
67
+ return text
68
+
69
+ for block in blocks:
70
+ block_stripped = block.strip()
71
+ if not block_stripped:
72
+ continue
73
+ # Check if the tail is substantially the same as a code block
74
+ # Use normalized comparison (collapse whitespace)
75
+ norm_block = " ".join(block_stripped.split())
76
+ norm_tail = " ".join(tail.split())
77
+ # Tail matches or is a prefix/suffix of the block
78
+ if norm_tail in norm_block or norm_block in norm_tail:
79
+ logger.info("Stripped duplicate trailing code (%d chars)", len(tail))
80
+ return text[:tail_start].rstrip()
81
+ # Also check with high overlap ratio for near-duplicates
82
+ if len(norm_tail) > 50 and len(norm_block) > 50:
83
+ shorter = min(len(norm_tail), len(norm_block))
84
+ # Compare the first N characters
85
+ if norm_tail[:shorter] == norm_block[:shorter]:
86
+ logger.info("Stripped near-duplicate trailing code (%d chars)", len(tail))
87
+ return text[:tail_start].rstrip()
88
+
89
+ return text
90
+
91
+
92
+ class ChatService:
93
+ def __init__(self, db: AsyncSession, llm_service: LLMService, user_id: str | None = None):
94
+ self.db = db
95
+ self.conversation_service = ConversationService(db, user_id=user_id)
96
+ self.llm_service = llm_service
97
+ self.user_id = user_id
98
+
99
+ async def stream_response(
100
+ self,
101
+ message: str,
102
+ model: str,
103
+ conversation_id: str | None = None,
104
+ temperature: float = 0.7,
105
+ max_tokens: int = 2048,
106
+ project_id: str | None = None,
107
+ ) -> AsyncGenerator[str, None]:
108
+ # Check usage limits before generating
109
+ if self.user_id:
110
+ from backend.services.usage_service import UsageService
111
+ usage_service = UsageService(self.db)
112
+ allowed, reason = await usage_service.check_limits(self.user_id)
113
+ if not allowed:
114
+ yield _sse({"type": "error", "content": reason})
115
+ return
116
+
117
+ # Get or create conversation
118
+ if conversation_id:
119
+ conv = await self.conversation_service.get(conversation_id)
120
+ if not conv:
121
+ logger.warning("Conversation %s not found", conversation_id)
122
+ yield _sse({"type": "error", "content": "Conversation not found"})
123
+ return
124
+ else:
125
+ conv = await self.conversation_service.create(model=model, project_id=project_id)
126
+ logger.info("Created conversation %s with model %s", conv.id, model)
127
+
128
+ # Emit meta event with conversation ID
129
+ yield _sse({"type": "meta", "conversation_id": conv.id})
130
+
131
+ # Save user message
132
+ await self.conversation_service.add_message(conv.id, "user", message)
133
+
134
+ # Reload with messages (force fresh from DB)
135
+ conv = await self.conversation_service.get_fresh(conv.id)
136
+
137
+ # Check if we need to summarize older messages before building history
138
+ await self._maybe_summarize(conv, model)
139
+ # Reload after potential summary
140
+ conv = await self.conversation_service.get(conv.id)
141
+
142
+ # Build context-aware history
143
+ history = self._build_history(conv, model)
144
+
145
+ logger.info(
146
+ "Streaming conv=%s model=%s history_msgs=%d",
147
+ conv.id, model, len(history),
148
+ )
149
+
150
+ # Stream from LLM with tool calling support
151
+ full_response = ""
152
+ usage_data = None
153
+ try:
154
+ tool_calls_result = None
155
+ async for chunk in self.llm_service.stream_chat(
156
+ messages=history,
157
+ model=model,
158
+ temperature=temperature,
159
+ max_tokens=max_tokens,
160
+ tools=TOOLS,
161
+ ):
162
+ if isinstance(chunk, dict):
163
+ if chunk.get("type") == "fallback":
164
+ yield _sse({"type": "status", "content": "Using Nyx Lite — full model resuming shortly"})
165
+ elif chunk.get("type") == "tool_calls":
166
+ tool_calls_result = chunk["calls"]
167
+ else:
168
+ usage_data = chunk
169
+ else:
170
+ full_response += chunk
171
+ yield _sse({"type": "content", "content": chunk})
172
+
173
+ # If the model requested tool calls, execute them
174
+ if tool_calls_result:
175
+ for call in tool_calls_result:
176
+ if call["name"] == "web_search":
177
+ yield _sse({"type": "status", "content": "Searching the web..."})
178
+ try:
179
+ args = json.loads(call["arguments"])
180
+ except json.JSONDecodeError:
181
+ logger.warning("Invalid tool call arguments: %s", call["arguments"])
182
+ continue
183
+ results = await web_search(args.get("query", message))
184
+ result_text = format_search_results(results)
185
+ logger.info("Tool call web_search(%s) returned %d results", args.get("query"), len(results))
186
+
187
+ # Append tool call + result to history for second LLM call
188
+ history.append({
189
+ "role": "assistant",
190
+ "content": None,
191
+ "tool_calls": [{
192
+ "id": call["id"],
193
+ "type": "function",
194
+ "function": {
195
+ "name": call["name"],
196
+ "arguments": call["arguments"],
197
+ },
198
+ }],
199
+ })
200
+ history.append({
201
+ "role": "tool",
202
+ "tool_call_id": call["id"],
203
+ "content": result_text or "No results found.",
204
+ })
205
+
206
+ # Second LLM call — no tools, generate final response using search results
207
+ async for chunk in self.llm_service.stream_chat(
208
+ messages=history,
209
+ model=model,
210
+ temperature=temperature,
211
+ max_tokens=max_tokens,
212
+ ):
213
+ if isinstance(chunk, dict):
214
+ usage_data = chunk
215
+ else:
216
+ full_response += chunk
217
+ yield _sse({"type": "content", "content": chunk})
218
+
219
+ except Exception as e:
220
+ logger.error("LLM streaming error for conv %s: %s", conv.id, e, exc_info=True)
221
+ yield _sse({"type": "error", "content": "Failed to generate response. Please try again."})
222
+ return
223
+
224
+ # Post-process: strip duplicate trailing code blocks
225
+ full_response = _strip_duplicate_trailing_code(full_response)
226
+
227
+ # Save assistant response
228
+ if full_response:
229
+ await self.conversation_service.add_message(conv.id, "assistant", full_response)
230
+
231
+ # Record usage
232
+ if self.user_id and usage_data:
233
+ try:
234
+ from backend.services.usage_service import UsageService
235
+ usage_service = UsageService(self.db)
236
+ await usage_service.record_usage(
237
+ user_id=self.user_id,
238
+ conversation_id=conv.id,
239
+ model=model,
240
+ prompt_tokens=usage_data.get("prompt_tokens", 0),
241
+ completion_tokens=usage_data.get("completion_tokens", 0),
242
+ )
243
+ except Exception as e:
244
+ logger.warning("Failed to record usage: %s", e)
245
+
246
+ # Auto-title from first exchange
247
+ if conv.title == "New Conversation" and full_response:
248
+ title = self._generate_title(message)
249
+ await self.conversation_service.rename(conv.id, title)
250
+
251
+ # Update timestamp
252
+ conv.updated_at = datetime.now(timezone.utc)
253
+ await self.db.commit()
254
+
255
+ yield "data: [DONE]\n\n"
256
+
257
+ def _build_history(self, conv, model: str) -> list[dict[str, str]]:
258
+ context_limit = settings.CONTEXT_LIMITS.get(model, 6000)
259
+ history: list[dict[str, str]] = []
260
+ token_count = 0
261
+
262
+ # System prompt with current date
263
+ system_prompt = settings.SYSTEM_PROMPTS.get(model, "")
264
+ if system_prompt:
265
+ today = date.today().strftime("%A, %B %d, %Y")
266
+ system_prompt += f"\n\nToday's date is {today}."
267
+ history.append({"role": "system", "content": system_prompt})
268
+ token_count += _estimate_tokens(system_prompt)
269
+
270
+ # If conversation belongs to a project with instructions, inject them
271
+ if conv.project and conv.project.instructions:
272
+ project_msg = f"[Project Instructions]\n{conv.project.instructions}"
273
+ history.append({"role": "system", "content": project_msg})
274
+ token_count += _estimate_tokens(project_msg)
275
+
276
+ # If we have a summary of older messages, inject it
277
+ if conv.summary:
278
+ summary_msg = (
279
+ f"[Context from earlier in this conversation]\n{conv.summary}"
280
+ )
281
+ history.append({"role": "system", "content": summary_msg})
282
+ token_count += _estimate_tokens(summary_msg)
283
+
284
+ # Add recent messages, working backwards to prioritize the latest
285
+ messages = list(conv.messages)
286
+ to_include = []
287
+ for msg in reversed(messages):
288
+ msg_tokens = _estimate_tokens(msg.content)
289
+ if token_count + msg_tokens > context_limit:
290
+ break
291
+ to_include.append({"role": msg.role, "content": msg.content})
292
+ token_count += msg_tokens
293
+
294
+ # Reverse back to chronological order
295
+ to_include.reverse()
296
+
297
+ # Ensure we at least include the very last message (the new user msg)
298
+ if not to_include and messages:
299
+ last = messages[-1]
300
+ to_include = [{"role": last.role, "content": last.content}]
301
+
302
+ history.extend(to_include)
303
+
304
+ logger.debug(
305
+ "Built history: %d msgs, ~%d tokens (limit %d)",
306
+ len(history), token_count, context_limit,
307
+ )
308
+ return history
309
+
310
+ async def _maybe_summarize(self, conv, model: str) -> None:
311
+ messages = list(conv.messages)
312
+ total_tokens = sum(_estimate_tokens(m.content) for m in messages)
313
+
314
+ if total_tokens < settings.SUMMARY_TRIGGER_TOKENS:
315
+ return
316
+
317
+ midpoint = len(messages) // 2
318
+ if midpoint < 2:
319
+ return
320
+
321
+ old_messages = messages[:midpoint]
322
+ old_text = "\n".join(
323
+ f"{m.role}: {m.content[:500]}" for m in old_messages
324
+ )
325
+
326
+ summary_prompt = [
327
+ {
328
+ "role": "system",
329
+ "content": (
330
+ "You are a summarization assistant. Condense the following "
331
+ "conversation into a brief summary (2-4 sentences) that "
332
+ "captures the key topics, decisions, and context. "
333
+ "Focus on information the AI would need to continue "
334
+ "the conversation coherently."
335
+ ),
336
+ },
337
+ {"role": "user", "content": f"Summarize this conversation:\n\n{old_text}"},
338
+ ]
339
+
340
+ logger.info(
341
+ "Summarizing %d old messages for conv %s (~%d tokens)",
342
+ len(old_messages), conv.id, sum(_estimate_tokens(m.content) for m in old_messages),
343
+ )
344
+
345
+ try:
346
+ summary = ""
347
+ async for token in self.llm_service.stream_chat(
348
+ messages=summary_prompt,
349
+ model=model,
350
+ temperature=0.3,
351
+ max_tokens=300,
352
+ ):
353
+ if isinstance(token, dict):
354
+ continue
355
+ summary += token
356
+
357
+ if summary:
358
+ conv.summary = summary
359
+ await self.db.commit()
360
+ logger.info("Saved summary for conv %s: %s", conv.id, summary[:100])
361
+ except Exception as e:
362
+ logger.warning("Failed to summarize conv %s: %s", conv.id, e)
363
+
364
+ async def regenerate_response(
365
+ self,
366
+ conversation_id: str,
367
+ temperature: float = 0.7,
368
+ max_tokens: int = 2048,
369
+ ) -> AsyncGenerator[str, None]:
370
+ """Delete last assistant message and re-stream a new response."""
371
+ conv = await self.conversation_service.get(conversation_id)
372
+ if not conv:
373
+ yield _sse({"type": "error", "content": "Conversation not found"})
374
+ return
375
+
376
+ # Delete last assistant message
377
+ await self.conversation_service.delete_last_assistant_message(conversation_id)
378
+
379
+ # Reload conversation
380
+ conv = await self.conversation_service.get(conversation_id)
381
+ if not conv or not conv.messages:
382
+ yield _sse({"type": "error", "content": "No messages to regenerate from"})
383
+ return
384
+
385
+ model = conv.model
386
+ yield _sse({"type": "meta", "conversation_id": conv.id})
387
+
388
+ # Build history and stream
389
+ await self._maybe_summarize(conv, model)
390
+ conv = await self.conversation_service.get(conv.id)
391
+ history = self._build_history(conv, model)
392
+
393
+ # Find the last user message (for fallback query if tool args parse fails)
394
+ messages_sorted = sorted(conv.messages, key=lambda m: m.created_at)
395
+ last_user_msg = ""
396
+ for m in reversed(messages_sorted):
397
+ if m.role == "user":
398
+ last_user_msg = m.content
399
+ break
400
+
401
+ full_response = ""
402
+ usage_data = None
403
+ try:
404
+ tool_calls_result = None
405
+ async for chunk in self.llm_service.stream_chat(
406
+ messages=history,
407
+ model=model,
408
+ temperature=temperature,
409
+ max_tokens=max_tokens,
410
+ tools=TOOLS,
411
+ ):
412
+ if isinstance(chunk, dict):
413
+ if chunk.get("type") == "fallback":
414
+ yield _sse({"type": "status", "content": "Using Nyx Lite — full model resuming shortly"})
415
+ elif chunk.get("type") == "tool_calls":
416
+ tool_calls_result = chunk["calls"]
417
+ else:
418
+ usage_data = chunk
419
+ else:
420
+ full_response += chunk
421
+ yield _sse({"type": "content", "content": chunk})
422
+
423
+ if tool_calls_result:
424
+ for call in tool_calls_result:
425
+ if call["name"] == "web_search":
426
+ yield _sse({"type": "status", "content": "Searching the web..."})
427
+ try:
428
+ args = json.loads(call["arguments"])
429
+ except json.JSONDecodeError:
430
+ logger.warning("Invalid tool call arguments: %s", call["arguments"])
431
+ continue
432
+ results = await web_search(args.get("query", last_user_msg))
433
+ result_text = format_search_results(results)
434
+
435
+ history.append({
436
+ "role": "assistant",
437
+ "content": None,
438
+ "tool_calls": [{
439
+ "id": call["id"],
440
+ "type": "function",
441
+ "function": {
442
+ "name": call["name"],
443
+ "arguments": call["arguments"],
444
+ },
445
+ }],
446
+ })
447
+ history.append({
448
+ "role": "tool",
449
+ "tool_call_id": call["id"],
450
+ "content": result_text or "No results found.",
451
+ })
452
+
453
+ async for chunk in self.llm_service.stream_chat(
454
+ messages=history,
455
+ model=model,
456
+ temperature=temperature,
457
+ max_tokens=max_tokens,
458
+ ):
459
+ if isinstance(chunk, dict):
460
+ usage_data = chunk
461
+ else:
462
+ full_response += chunk
463
+ yield _sse({"type": "content", "content": chunk})
464
+
465
+ except Exception as e:
466
+ logger.error("LLM streaming error for conv %s: %s", conv.id, e, exc_info=True)
467
+ yield _sse({"type": "error", "content": "Failed to generate response. Please try again."})
468
+ return
469
+
470
+ full_response = _strip_duplicate_trailing_code(full_response)
471
+
472
+ if full_response:
473
+ await self.conversation_service.add_message(conv.id, "assistant", full_response)
474
+
475
+ if self.user_id and usage_data:
476
+ try:
477
+ from backend.services.usage_service import UsageService
478
+ usage_service = UsageService(self.db)
479
+ await usage_service.record_usage(
480
+ user_id=self.user_id,
481
+ conversation_id=conv.id,
482
+ model=model,
483
+ prompt_tokens=usage_data.get("prompt_tokens", 0),
484
+ completion_tokens=usage_data.get("completion_tokens", 0),
485
+ )
486
+ except Exception as e:
487
+ logger.warning("Failed to record usage: %s", e)
488
+
489
+ conv.updated_at = datetime.now(timezone.utc)
490
+ await self.db.commit()
491
+ yield "data: [DONE]\n\n"
492
+
493
+ def _generate_title(self, first_message: str) -> str:
494
+ title = first_message.strip()
495
+ if len(title) > 50:
496
+ title = title[:47] + "..."
497
+ return title
498
+
499
+
500
+ def _sse(data: dict) -> str:
501
+ return f"data: {json.dumps(data)}\n\n"