kairo-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- image-service/main.py +178 -0
- infra/chat/app/main.py +84 -0
- kairo/backend/__init__.py +0 -0
- kairo/backend/api/__init__.py +0 -0
- kairo/backend/api/admin/__init__.py +23 -0
- kairo/backend/api/admin/audit.py +54 -0
- kairo/backend/api/admin/content.py +142 -0
- kairo/backend/api/admin/incidents.py +148 -0
- kairo/backend/api/admin/stats.py +125 -0
- kairo/backend/api/admin/system.py +87 -0
- kairo/backend/api/admin/users.py +279 -0
- kairo/backend/api/agents.py +94 -0
- kairo/backend/api/api_keys.py +85 -0
- kairo/backend/api/auth.py +116 -0
- kairo/backend/api/billing.py +41 -0
- kairo/backend/api/chat.py +72 -0
- kairo/backend/api/conversations.py +125 -0
- kairo/backend/api/device_auth.py +100 -0
- kairo/backend/api/files.py +83 -0
- kairo/backend/api/health.py +36 -0
- kairo/backend/api/images.py +80 -0
- kairo/backend/api/openai_compat.py +225 -0
- kairo/backend/api/projects.py +102 -0
- kairo/backend/api/usage.py +32 -0
- kairo/backend/api/webhooks.py +79 -0
- kairo/backend/app.py +297 -0
- kairo/backend/config.py +179 -0
- kairo/backend/core/__init__.py +0 -0
- kairo/backend/core/admin_auth.py +24 -0
- kairo/backend/core/api_key_auth.py +55 -0
- kairo/backend/core/database.py +28 -0
- kairo/backend/core/dependencies.py +70 -0
- kairo/backend/core/logging.py +23 -0
- kairo/backend/core/rate_limit.py +73 -0
- kairo/backend/core/security.py +29 -0
- kairo/backend/models/__init__.py +19 -0
- kairo/backend/models/agent.py +30 -0
- kairo/backend/models/api_key.py +25 -0
- kairo/backend/models/api_usage.py +29 -0
- kairo/backend/models/audit_log.py +26 -0
- kairo/backend/models/conversation.py +48 -0
- kairo/backend/models/device_code.py +30 -0
- kairo/backend/models/feature_flag.py +21 -0
- kairo/backend/models/image_generation.py +24 -0
- kairo/backend/models/incident.py +28 -0
- kairo/backend/models/project.py +28 -0
- kairo/backend/models/uptime_record.py +24 -0
- kairo/backend/models/usage.py +24 -0
- kairo/backend/models/user.py +49 -0
- kairo/backend/schemas/__init__.py +0 -0
- kairo/backend/schemas/admin/__init__.py +0 -0
- kairo/backend/schemas/admin/audit.py +28 -0
- kairo/backend/schemas/admin/content.py +53 -0
- kairo/backend/schemas/admin/stats.py +77 -0
- kairo/backend/schemas/admin/system.py +44 -0
- kairo/backend/schemas/admin/users.py +48 -0
- kairo/backend/schemas/agent.py +42 -0
- kairo/backend/schemas/api_key.py +30 -0
- kairo/backend/schemas/auth.py +57 -0
- kairo/backend/schemas/chat.py +26 -0
- kairo/backend/schemas/conversation.py +39 -0
- kairo/backend/schemas/device_auth.py +40 -0
- kairo/backend/schemas/image.py +15 -0
- kairo/backend/schemas/openai_compat.py +76 -0
- kairo/backend/schemas/project.py +21 -0
- kairo/backend/schemas/status.py +81 -0
- kairo/backend/schemas/usage.py +15 -0
- kairo/backend/services/__init__.py +0 -0
- kairo/backend/services/admin/__init__.py +0 -0
- kairo/backend/services/admin/audit_service.py +78 -0
- kairo/backend/services/admin/content_service.py +119 -0
- kairo/backend/services/admin/incident_service.py +94 -0
- kairo/backend/services/admin/stats_service.py +281 -0
- kairo/backend/services/admin/system_service.py +126 -0
- kairo/backend/services/admin/user_service.py +157 -0
- kairo/backend/services/agent_service.py +107 -0
- kairo/backend/services/api_key_service.py +66 -0
- kairo/backend/services/api_usage_service.py +126 -0
- kairo/backend/services/auth_service.py +101 -0
- kairo/backend/services/chat_service.py +501 -0
- kairo/backend/services/conversation_service.py +264 -0
- kairo/backend/services/device_auth_service.py +193 -0
- kairo/backend/services/email_service.py +55 -0
- kairo/backend/services/image_service.py +181 -0
- kairo/backend/services/llm_service.py +186 -0
- kairo/backend/services/project_service.py +109 -0
- kairo/backend/services/status_service.py +167 -0
- kairo/backend/services/stripe_service.py +78 -0
- kairo/backend/services/usage_service.py +150 -0
- kairo/backend/services/web_search_service.py +96 -0
- kairo/migrations/env.py +60 -0
- kairo/migrations/versions/001_initial.py +55 -0
- kairo/migrations/versions/002_usage_tracking_and_indexes.py +66 -0
- kairo/migrations/versions/003_username_to_email.py +21 -0
- kairo/migrations/versions/004_add_plans_and_verification.py +67 -0
- kairo/migrations/versions/005_add_projects.py +52 -0
- kairo/migrations/versions/006_add_image_generation.py +63 -0
- kairo/migrations/versions/007_add_admin_portal.py +107 -0
- kairo/migrations/versions/008_add_device_code_auth.py +76 -0
- kairo/migrations/versions/009_add_status_page.py +65 -0
- kairo/tools/extract_claude_data.py +465 -0
- kairo/tools/filter_claude_data.py +303 -0
- kairo/tools/generate_curated_data.py +157 -0
- kairo/tools/mix_training_data.py +295 -0
- kairo_code/__init__.py +3 -0
- kairo_code/agents/__init__.py +25 -0
- kairo_code/agents/architect.py +98 -0
- kairo_code/agents/audit.py +100 -0
- kairo_code/agents/base.py +463 -0
- kairo_code/agents/coder.py +155 -0
- kairo_code/agents/database.py +77 -0
- kairo_code/agents/docs.py +88 -0
- kairo_code/agents/explorer.py +62 -0
- kairo_code/agents/guardian.py +80 -0
- kairo_code/agents/planner.py +66 -0
- kairo_code/agents/reviewer.py +91 -0
- kairo_code/agents/security.py +94 -0
- kairo_code/agents/terraform.py +88 -0
- kairo_code/agents/testing.py +97 -0
- kairo_code/agents/uiux.py +88 -0
- kairo_code/auth.py +232 -0
- kairo_code/config.py +172 -0
- kairo_code/conversation.py +173 -0
- kairo_code/heartbeat.py +63 -0
- kairo_code/llm.py +291 -0
- kairo_code/logging_config.py +156 -0
- kairo_code/main.py +818 -0
- kairo_code/router.py +217 -0
- kairo_code/sandbox.py +248 -0
- kairo_code/settings.py +183 -0
- kairo_code/tools/__init__.py +51 -0
- kairo_code/tools/analysis.py +509 -0
- kairo_code/tools/base.py +417 -0
- kairo_code/tools/code.py +58 -0
- kairo_code/tools/definitions.py +617 -0
- kairo_code/tools/files.py +315 -0
- kairo_code/tools/review.py +390 -0
- kairo_code/tools/search.py +185 -0
- kairo_code/ui.py +418 -0
- kairo_code-0.1.0.dist-info/METADATA +13 -0
- kairo_code-0.1.0.dist-info/RECORD +144 -0
- kairo_code-0.1.0.dist-info/WHEEL +5 -0
- kairo_code-0.1.0.dist-info/entry_points.txt +2 -0
- kairo_code-0.1.0.dist-info/top_level.txt +4 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from datetime import date, datetime, timezone
|
|
5
|
+
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
from sqlalchemy.orm import selectinload
|
|
8
|
+
|
|
9
|
+
from backend.config import settings
|
|
10
|
+
from backend.services.conversation_service import ConversationService
|
|
11
|
+
from backend.services.llm_service import LLMService
|
|
12
|
+
from backend.services.web_search_service import (
|
|
13
|
+
web_search,
|
|
14
|
+
format_search_results,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
WEB_SEARCH_TOOL = {
|
|
20
|
+
"type": "function",
|
|
21
|
+
"function": {
|
|
22
|
+
"name": "web_search",
|
|
23
|
+
"description": "Search the web for current information, recent events, or to verify facts you are unsure about.",
|
|
24
|
+
"parameters": {
|
|
25
|
+
"type": "object",
|
|
26
|
+
"properties": {
|
|
27
|
+
"query": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"description": "The search query",
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
"required": ["query"],
|
|
33
|
+
},
|
|
34
|
+
},
|
|
35
|
+
}
|
|
36
|
+
TOOLS = [WEB_SEARCH_TOOL]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _estimate_tokens(text: str) -> int:
|
|
40
|
+
"""Rough token estimate: ~4 chars per token for English text."""
|
|
41
|
+
return len(text) // 4
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _strip_duplicate_trailing_code(text: str) -> str:
|
|
45
|
+
"""Remove raw code duplicated at the end of a response.
|
|
46
|
+
|
|
47
|
+
Quantized models sometimes output a fenced code block and then repeat the
|
|
48
|
+
same code as unformatted text at the bottom. This detects the pattern and
|
|
49
|
+
strips the trailing duplicate.
|
|
50
|
+
"""
|
|
51
|
+
import re as _re
|
|
52
|
+
|
|
53
|
+
# Find all fenced code blocks
|
|
54
|
+
fence_pattern = _re.compile(r"```[\w]*\n(.*?)```", _re.DOTALL)
|
|
55
|
+
blocks = fence_pattern.findall(text)
|
|
56
|
+
if not blocks:
|
|
57
|
+
return text
|
|
58
|
+
|
|
59
|
+
# Check if the text after the last fence contains a duplicate of any block
|
|
60
|
+
last_fence_end = text.rfind("```")
|
|
61
|
+
if last_fence_end < 0:
|
|
62
|
+
return text
|
|
63
|
+
# Move past the closing ```
|
|
64
|
+
tail_start = last_fence_end + 3
|
|
65
|
+
tail = text[tail_start:].strip()
|
|
66
|
+
if not tail or len(tail) < 20:
|
|
67
|
+
return text
|
|
68
|
+
|
|
69
|
+
for block in blocks:
|
|
70
|
+
block_stripped = block.strip()
|
|
71
|
+
if not block_stripped:
|
|
72
|
+
continue
|
|
73
|
+
# Check if the tail is substantially the same as a code block
|
|
74
|
+
# Use normalized comparison (collapse whitespace)
|
|
75
|
+
norm_block = " ".join(block_stripped.split())
|
|
76
|
+
norm_tail = " ".join(tail.split())
|
|
77
|
+
# Tail matches or is a prefix/suffix of the block
|
|
78
|
+
if norm_tail in norm_block or norm_block in norm_tail:
|
|
79
|
+
logger.info("Stripped duplicate trailing code (%d chars)", len(tail))
|
|
80
|
+
return text[:tail_start].rstrip()
|
|
81
|
+
# Also check with high overlap ratio for near-duplicates
|
|
82
|
+
if len(norm_tail) > 50 and len(norm_block) > 50:
|
|
83
|
+
shorter = min(len(norm_tail), len(norm_block))
|
|
84
|
+
# Compare the first N characters
|
|
85
|
+
if norm_tail[:shorter] == norm_block[:shorter]:
|
|
86
|
+
logger.info("Stripped near-duplicate trailing code (%d chars)", len(tail))
|
|
87
|
+
return text[:tail_start].rstrip()
|
|
88
|
+
|
|
89
|
+
return text
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ChatService:
|
|
93
|
+
def __init__(self, db: AsyncSession, llm_service: LLMService, user_id: str | None = None):
|
|
94
|
+
self.db = db
|
|
95
|
+
self.conversation_service = ConversationService(db, user_id=user_id)
|
|
96
|
+
self.llm_service = llm_service
|
|
97
|
+
self.user_id = user_id
|
|
98
|
+
|
|
99
|
+
async def stream_response(
|
|
100
|
+
self,
|
|
101
|
+
message: str,
|
|
102
|
+
model: str,
|
|
103
|
+
conversation_id: str | None = None,
|
|
104
|
+
temperature: float = 0.7,
|
|
105
|
+
max_tokens: int = 2048,
|
|
106
|
+
project_id: str | None = None,
|
|
107
|
+
) -> AsyncGenerator[str, None]:
|
|
108
|
+
# Check usage limits before generating
|
|
109
|
+
if self.user_id:
|
|
110
|
+
from backend.services.usage_service import UsageService
|
|
111
|
+
usage_service = UsageService(self.db)
|
|
112
|
+
allowed, reason = await usage_service.check_limits(self.user_id)
|
|
113
|
+
if not allowed:
|
|
114
|
+
yield _sse({"type": "error", "content": reason})
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
# Get or create conversation
|
|
118
|
+
if conversation_id:
|
|
119
|
+
conv = await self.conversation_service.get(conversation_id)
|
|
120
|
+
if not conv:
|
|
121
|
+
logger.warning("Conversation %s not found", conversation_id)
|
|
122
|
+
yield _sse({"type": "error", "content": "Conversation not found"})
|
|
123
|
+
return
|
|
124
|
+
else:
|
|
125
|
+
conv = await self.conversation_service.create(model=model, project_id=project_id)
|
|
126
|
+
logger.info("Created conversation %s with model %s", conv.id, model)
|
|
127
|
+
|
|
128
|
+
# Emit meta event with conversation ID
|
|
129
|
+
yield _sse({"type": "meta", "conversation_id": conv.id})
|
|
130
|
+
|
|
131
|
+
# Save user message
|
|
132
|
+
await self.conversation_service.add_message(conv.id, "user", message)
|
|
133
|
+
|
|
134
|
+
# Reload with messages (force fresh from DB)
|
|
135
|
+
conv = await self.conversation_service.get_fresh(conv.id)
|
|
136
|
+
|
|
137
|
+
# Check if we need to summarize older messages before building history
|
|
138
|
+
await self._maybe_summarize(conv, model)
|
|
139
|
+
# Reload after potential summary
|
|
140
|
+
conv = await self.conversation_service.get(conv.id)
|
|
141
|
+
|
|
142
|
+
# Build context-aware history
|
|
143
|
+
history = self._build_history(conv, model)
|
|
144
|
+
|
|
145
|
+
logger.info(
|
|
146
|
+
"Streaming conv=%s model=%s history_msgs=%d",
|
|
147
|
+
conv.id, model, len(history),
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Stream from LLM with tool calling support
|
|
151
|
+
full_response = ""
|
|
152
|
+
usage_data = None
|
|
153
|
+
try:
|
|
154
|
+
tool_calls_result = None
|
|
155
|
+
async for chunk in self.llm_service.stream_chat(
|
|
156
|
+
messages=history,
|
|
157
|
+
model=model,
|
|
158
|
+
temperature=temperature,
|
|
159
|
+
max_tokens=max_tokens,
|
|
160
|
+
tools=TOOLS,
|
|
161
|
+
):
|
|
162
|
+
if isinstance(chunk, dict):
|
|
163
|
+
if chunk.get("type") == "fallback":
|
|
164
|
+
yield _sse({"type": "status", "content": "Using Nyx Lite — full model resuming shortly"})
|
|
165
|
+
elif chunk.get("type") == "tool_calls":
|
|
166
|
+
tool_calls_result = chunk["calls"]
|
|
167
|
+
else:
|
|
168
|
+
usage_data = chunk
|
|
169
|
+
else:
|
|
170
|
+
full_response += chunk
|
|
171
|
+
yield _sse({"type": "content", "content": chunk})
|
|
172
|
+
|
|
173
|
+
# If the model requested tool calls, execute them
|
|
174
|
+
if tool_calls_result:
|
|
175
|
+
for call in tool_calls_result:
|
|
176
|
+
if call["name"] == "web_search":
|
|
177
|
+
yield _sse({"type": "status", "content": "Searching the web..."})
|
|
178
|
+
try:
|
|
179
|
+
args = json.loads(call["arguments"])
|
|
180
|
+
except json.JSONDecodeError:
|
|
181
|
+
logger.warning("Invalid tool call arguments: %s", call["arguments"])
|
|
182
|
+
continue
|
|
183
|
+
results = await web_search(args.get("query", message))
|
|
184
|
+
result_text = format_search_results(results)
|
|
185
|
+
logger.info("Tool call web_search(%s) returned %d results", args.get("query"), len(results))
|
|
186
|
+
|
|
187
|
+
# Append tool call + result to history for second LLM call
|
|
188
|
+
history.append({
|
|
189
|
+
"role": "assistant",
|
|
190
|
+
"content": None,
|
|
191
|
+
"tool_calls": [{
|
|
192
|
+
"id": call["id"],
|
|
193
|
+
"type": "function",
|
|
194
|
+
"function": {
|
|
195
|
+
"name": call["name"],
|
|
196
|
+
"arguments": call["arguments"],
|
|
197
|
+
},
|
|
198
|
+
}],
|
|
199
|
+
})
|
|
200
|
+
history.append({
|
|
201
|
+
"role": "tool",
|
|
202
|
+
"tool_call_id": call["id"],
|
|
203
|
+
"content": result_text or "No results found.",
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
# Second LLM call — no tools, generate final response using search results
|
|
207
|
+
async for chunk in self.llm_service.stream_chat(
|
|
208
|
+
messages=history,
|
|
209
|
+
model=model,
|
|
210
|
+
temperature=temperature,
|
|
211
|
+
max_tokens=max_tokens,
|
|
212
|
+
):
|
|
213
|
+
if isinstance(chunk, dict):
|
|
214
|
+
usage_data = chunk
|
|
215
|
+
else:
|
|
216
|
+
full_response += chunk
|
|
217
|
+
yield _sse({"type": "content", "content": chunk})
|
|
218
|
+
|
|
219
|
+
except Exception as e:
|
|
220
|
+
logger.error("LLM streaming error for conv %s: %s", conv.id, e, exc_info=True)
|
|
221
|
+
yield _sse({"type": "error", "content": "Failed to generate response. Please try again."})
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
# Post-process: strip duplicate trailing code blocks
|
|
225
|
+
full_response = _strip_duplicate_trailing_code(full_response)
|
|
226
|
+
|
|
227
|
+
# Save assistant response
|
|
228
|
+
if full_response:
|
|
229
|
+
await self.conversation_service.add_message(conv.id, "assistant", full_response)
|
|
230
|
+
|
|
231
|
+
# Record usage
|
|
232
|
+
if self.user_id and usage_data:
|
|
233
|
+
try:
|
|
234
|
+
from backend.services.usage_service import UsageService
|
|
235
|
+
usage_service = UsageService(self.db)
|
|
236
|
+
await usage_service.record_usage(
|
|
237
|
+
user_id=self.user_id,
|
|
238
|
+
conversation_id=conv.id,
|
|
239
|
+
model=model,
|
|
240
|
+
prompt_tokens=usage_data.get("prompt_tokens", 0),
|
|
241
|
+
completion_tokens=usage_data.get("completion_tokens", 0),
|
|
242
|
+
)
|
|
243
|
+
except Exception as e:
|
|
244
|
+
logger.warning("Failed to record usage: %s", e)
|
|
245
|
+
|
|
246
|
+
# Auto-title from first exchange
|
|
247
|
+
if conv.title == "New Conversation" and full_response:
|
|
248
|
+
title = self._generate_title(message)
|
|
249
|
+
await self.conversation_service.rename(conv.id, title)
|
|
250
|
+
|
|
251
|
+
# Update timestamp
|
|
252
|
+
conv.updated_at = datetime.now(timezone.utc)
|
|
253
|
+
await self.db.commit()
|
|
254
|
+
|
|
255
|
+
yield "data: [DONE]\n\n"
|
|
256
|
+
|
|
257
|
+
def _build_history(self, conv, model: str) -> list[dict[str, str]]:
|
|
258
|
+
context_limit = settings.CONTEXT_LIMITS.get(model, 6000)
|
|
259
|
+
history: list[dict[str, str]] = []
|
|
260
|
+
token_count = 0
|
|
261
|
+
|
|
262
|
+
# System prompt with current date
|
|
263
|
+
system_prompt = settings.SYSTEM_PROMPTS.get(model, "")
|
|
264
|
+
if system_prompt:
|
|
265
|
+
today = date.today().strftime("%A, %B %d, %Y")
|
|
266
|
+
system_prompt += f"\n\nToday's date is {today}."
|
|
267
|
+
history.append({"role": "system", "content": system_prompt})
|
|
268
|
+
token_count += _estimate_tokens(system_prompt)
|
|
269
|
+
|
|
270
|
+
# If conversation belongs to a project with instructions, inject them
|
|
271
|
+
if conv.project and conv.project.instructions:
|
|
272
|
+
project_msg = f"[Project Instructions]\n{conv.project.instructions}"
|
|
273
|
+
history.append({"role": "system", "content": project_msg})
|
|
274
|
+
token_count += _estimate_tokens(project_msg)
|
|
275
|
+
|
|
276
|
+
# If we have a summary of older messages, inject it
|
|
277
|
+
if conv.summary:
|
|
278
|
+
summary_msg = (
|
|
279
|
+
f"[Context from earlier in this conversation]\n{conv.summary}"
|
|
280
|
+
)
|
|
281
|
+
history.append({"role": "system", "content": summary_msg})
|
|
282
|
+
token_count += _estimate_tokens(summary_msg)
|
|
283
|
+
|
|
284
|
+
# Add recent messages, working backwards to prioritize the latest
|
|
285
|
+
messages = list(conv.messages)
|
|
286
|
+
to_include = []
|
|
287
|
+
for msg in reversed(messages):
|
|
288
|
+
msg_tokens = _estimate_tokens(msg.content)
|
|
289
|
+
if token_count + msg_tokens > context_limit:
|
|
290
|
+
break
|
|
291
|
+
to_include.append({"role": msg.role, "content": msg.content})
|
|
292
|
+
token_count += msg_tokens
|
|
293
|
+
|
|
294
|
+
# Reverse back to chronological order
|
|
295
|
+
to_include.reverse()
|
|
296
|
+
|
|
297
|
+
# Ensure we at least include the very last message (the new user msg)
|
|
298
|
+
if not to_include and messages:
|
|
299
|
+
last = messages[-1]
|
|
300
|
+
to_include = [{"role": last.role, "content": last.content}]
|
|
301
|
+
|
|
302
|
+
history.extend(to_include)
|
|
303
|
+
|
|
304
|
+
logger.debug(
|
|
305
|
+
"Built history: %d msgs, ~%d tokens (limit %d)",
|
|
306
|
+
len(history), token_count, context_limit,
|
|
307
|
+
)
|
|
308
|
+
return history
|
|
309
|
+
|
|
310
|
+
async def _maybe_summarize(self, conv, model: str) -> None:
|
|
311
|
+
messages = list(conv.messages)
|
|
312
|
+
total_tokens = sum(_estimate_tokens(m.content) for m in messages)
|
|
313
|
+
|
|
314
|
+
if total_tokens < settings.SUMMARY_TRIGGER_TOKENS:
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
midpoint = len(messages) // 2
|
|
318
|
+
if midpoint < 2:
|
|
319
|
+
return
|
|
320
|
+
|
|
321
|
+
old_messages = messages[:midpoint]
|
|
322
|
+
old_text = "\n".join(
|
|
323
|
+
f"{m.role}: {m.content[:500]}" for m in old_messages
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
summary_prompt = [
|
|
327
|
+
{
|
|
328
|
+
"role": "system",
|
|
329
|
+
"content": (
|
|
330
|
+
"You are a summarization assistant. Condense the following "
|
|
331
|
+
"conversation into a brief summary (2-4 sentences) that "
|
|
332
|
+
"captures the key topics, decisions, and context. "
|
|
333
|
+
"Focus on information the AI would need to continue "
|
|
334
|
+
"the conversation coherently."
|
|
335
|
+
),
|
|
336
|
+
},
|
|
337
|
+
{"role": "user", "content": f"Summarize this conversation:\n\n{old_text}"},
|
|
338
|
+
]
|
|
339
|
+
|
|
340
|
+
logger.info(
|
|
341
|
+
"Summarizing %d old messages for conv %s (~%d tokens)",
|
|
342
|
+
len(old_messages), conv.id, sum(_estimate_tokens(m.content) for m in old_messages),
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
try:
|
|
346
|
+
summary = ""
|
|
347
|
+
async for token in self.llm_service.stream_chat(
|
|
348
|
+
messages=summary_prompt,
|
|
349
|
+
model=model,
|
|
350
|
+
temperature=0.3,
|
|
351
|
+
max_tokens=300,
|
|
352
|
+
):
|
|
353
|
+
if isinstance(token, dict):
|
|
354
|
+
continue
|
|
355
|
+
summary += token
|
|
356
|
+
|
|
357
|
+
if summary:
|
|
358
|
+
conv.summary = summary
|
|
359
|
+
await self.db.commit()
|
|
360
|
+
logger.info("Saved summary for conv %s: %s", conv.id, summary[:100])
|
|
361
|
+
except Exception as e:
|
|
362
|
+
logger.warning("Failed to summarize conv %s: %s", conv.id, e)
|
|
363
|
+
|
|
364
|
+
async def regenerate_response(
|
|
365
|
+
self,
|
|
366
|
+
conversation_id: str,
|
|
367
|
+
temperature: float = 0.7,
|
|
368
|
+
max_tokens: int = 2048,
|
|
369
|
+
) -> AsyncGenerator[str, None]:
|
|
370
|
+
"""Delete last assistant message and re-stream a new response."""
|
|
371
|
+
conv = await self.conversation_service.get(conversation_id)
|
|
372
|
+
if not conv:
|
|
373
|
+
yield _sse({"type": "error", "content": "Conversation not found"})
|
|
374
|
+
return
|
|
375
|
+
|
|
376
|
+
# Delete last assistant message
|
|
377
|
+
await self.conversation_service.delete_last_assistant_message(conversation_id)
|
|
378
|
+
|
|
379
|
+
# Reload conversation
|
|
380
|
+
conv = await self.conversation_service.get(conversation_id)
|
|
381
|
+
if not conv or not conv.messages:
|
|
382
|
+
yield _sse({"type": "error", "content": "No messages to regenerate from"})
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
model = conv.model
|
|
386
|
+
yield _sse({"type": "meta", "conversation_id": conv.id})
|
|
387
|
+
|
|
388
|
+
# Build history and stream
|
|
389
|
+
await self._maybe_summarize(conv, model)
|
|
390
|
+
conv = await self.conversation_service.get(conv.id)
|
|
391
|
+
history = self._build_history(conv, model)
|
|
392
|
+
|
|
393
|
+
# Find the last user message (for fallback query if tool args parse fails)
|
|
394
|
+
messages_sorted = sorted(conv.messages, key=lambda m: m.created_at)
|
|
395
|
+
last_user_msg = ""
|
|
396
|
+
for m in reversed(messages_sorted):
|
|
397
|
+
if m.role == "user":
|
|
398
|
+
last_user_msg = m.content
|
|
399
|
+
break
|
|
400
|
+
|
|
401
|
+
full_response = ""
|
|
402
|
+
usage_data = None
|
|
403
|
+
try:
|
|
404
|
+
tool_calls_result = None
|
|
405
|
+
async for chunk in self.llm_service.stream_chat(
|
|
406
|
+
messages=history,
|
|
407
|
+
model=model,
|
|
408
|
+
temperature=temperature,
|
|
409
|
+
max_tokens=max_tokens,
|
|
410
|
+
tools=TOOLS,
|
|
411
|
+
):
|
|
412
|
+
if isinstance(chunk, dict):
|
|
413
|
+
if chunk.get("type") == "fallback":
|
|
414
|
+
yield _sse({"type": "status", "content": "Using Nyx Lite — full model resuming shortly"})
|
|
415
|
+
elif chunk.get("type") == "tool_calls":
|
|
416
|
+
tool_calls_result = chunk["calls"]
|
|
417
|
+
else:
|
|
418
|
+
usage_data = chunk
|
|
419
|
+
else:
|
|
420
|
+
full_response += chunk
|
|
421
|
+
yield _sse({"type": "content", "content": chunk})
|
|
422
|
+
|
|
423
|
+
if tool_calls_result:
|
|
424
|
+
for call in tool_calls_result:
|
|
425
|
+
if call["name"] == "web_search":
|
|
426
|
+
yield _sse({"type": "status", "content": "Searching the web..."})
|
|
427
|
+
try:
|
|
428
|
+
args = json.loads(call["arguments"])
|
|
429
|
+
except json.JSONDecodeError:
|
|
430
|
+
logger.warning("Invalid tool call arguments: %s", call["arguments"])
|
|
431
|
+
continue
|
|
432
|
+
results = await web_search(args.get("query", last_user_msg))
|
|
433
|
+
result_text = format_search_results(results)
|
|
434
|
+
|
|
435
|
+
history.append({
|
|
436
|
+
"role": "assistant",
|
|
437
|
+
"content": None,
|
|
438
|
+
"tool_calls": [{
|
|
439
|
+
"id": call["id"],
|
|
440
|
+
"type": "function",
|
|
441
|
+
"function": {
|
|
442
|
+
"name": call["name"],
|
|
443
|
+
"arguments": call["arguments"],
|
|
444
|
+
},
|
|
445
|
+
}],
|
|
446
|
+
})
|
|
447
|
+
history.append({
|
|
448
|
+
"role": "tool",
|
|
449
|
+
"tool_call_id": call["id"],
|
|
450
|
+
"content": result_text or "No results found.",
|
|
451
|
+
})
|
|
452
|
+
|
|
453
|
+
async for chunk in self.llm_service.stream_chat(
|
|
454
|
+
messages=history,
|
|
455
|
+
model=model,
|
|
456
|
+
temperature=temperature,
|
|
457
|
+
max_tokens=max_tokens,
|
|
458
|
+
):
|
|
459
|
+
if isinstance(chunk, dict):
|
|
460
|
+
usage_data = chunk
|
|
461
|
+
else:
|
|
462
|
+
full_response += chunk
|
|
463
|
+
yield _sse({"type": "content", "content": chunk})
|
|
464
|
+
|
|
465
|
+
except Exception as e:
|
|
466
|
+
logger.error("LLM streaming error for conv %s: %s", conv.id, e, exc_info=True)
|
|
467
|
+
yield _sse({"type": "error", "content": "Failed to generate response. Please try again."})
|
|
468
|
+
return
|
|
469
|
+
|
|
470
|
+
full_response = _strip_duplicate_trailing_code(full_response)
|
|
471
|
+
|
|
472
|
+
if full_response:
|
|
473
|
+
await self.conversation_service.add_message(conv.id, "assistant", full_response)
|
|
474
|
+
|
|
475
|
+
if self.user_id and usage_data:
|
|
476
|
+
try:
|
|
477
|
+
from backend.services.usage_service import UsageService
|
|
478
|
+
usage_service = UsageService(self.db)
|
|
479
|
+
await usage_service.record_usage(
|
|
480
|
+
user_id=self.user_id,
|
|
481
|
+
conversation_id=conv.id,
|
|
482
|
+
model=model,
|
|
483
|
+
prompt_tokens=usage_data.get("prompt_tokens", 0),
|
|
484
|
+
completion_tokens=usage_data.get("completion_tokens", 0),
|
|
485
|
+
)
|
|
486
|
+
except Exception as e:
|
|
487
|
+
logger.warning("Failed to record usage: %s", e)
|
|
488
|
+
|
|
489
|
+
conv.updated_at = datetime.now(timezone.utc)
|
|
490
|
+
await self.db.commit()
|
|
491
|
+
yield "data: [DONE]\n\n"
|
|
492
|
+
|
|
493
|
+
def _generate_title(self, first_message: str) -> str:
|
|
494
|
+
title = first_message.strip()
|
|
495
|
+
if len(title) > 50:
|
|
496
|
+
title = title[:47] + "..."
|
|
497
|
+
return title
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _sse(data: dict) -> str:
|
|
501
|
+
return f"data: {json.dumps(data)}\n\n"
|