@agentunion/kite 1.0.7 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +208 -0
- package/README.md +48 -0
- package/cli.js +1 -1
- package/extensions/agents/__init__.py +1 -0
- package/extensions/agents/assistant/__init__.py +1 -0
- package/extensions/agents/assistant/entry.py +329 -0
- package/extensions/agents/assistant/module.md +22 -0
- package/extensions/agents/assistant/server.py +197 -0
- package/extensions/channels/__init__.py +1 -0
- package/extensions/channels/acp_channel/__init__.py +1 -0
- package/extensions/channels/acp_channel/entry.py +329 -0
- package/extensions/channels/acp_channel/module.md +22 -0
- package/extensions/channels/acp_channel/server.py +197 -0
- package/extensions/event_hub_bench/entry.py +624 -379
- package/extensions/event_hub_bench/module.md +2 -1
- package/extensions/services/backup/__init__.py +1 -0
- package/extensions/services/backup/entry.py +508 -0
- package/extensions/services/backup/module.md +22 -0
- package/extensions/services/model_service/__init__.py +1 -0
- package/extensions/services/model_service/entry.py +508 -0
- package/extensions/services/model_service/module.md +22 -0
- package/extensions/services/watchdog/entry.py +468 -102
- package/extensions/services/watchdog/module.md +3 -0
- package/extensions/services/watchdog/monitor.py +170 -69
- package/extensions/services/web/__init__.py +1 -0
- package/extensions/services/web/config.yaml +149 -0
- package/extensions/services/web/entry.py +390 -0
- package/extensions/services/web/module.md +24 -0
- package/extensions/services/web/routes/__init__.py +1 -0
- package/extensions/services/web/routes/routes_call.py +189 -0
- package/extensions/services/web/routes/routes_config.py +512 -0
- package/extensions/services/web/routes/routes_contacts.py +98 -0
- package/extensions/services/web/routes/routes_devlog.py +99 -0
- package/extensions/services/web/routes/routes_phone.py +81 -0
- package/extensions/services/web/routes/routes_sms.py +48 -0
- package/extensions/services/web/routes/routes_stats.py +17 -0
- package/extensions/services/web/routes/routes_voicechat.py +554 -0
- package/extensions/services/web/routes/schemas.py +216 -0
- package/extensions/services/web/server.py +375 -0
- package/extensions/services/web/static/css/style.css +1064 -0
- package/extensions/services/web/static/index.html +1445 -0
- package/extensions/services/web/static/js/app.js +4671 -0
- package/extensions/services/web/vendor/__init__.py +1 -0
- package/extensions/services/web/vendor/bluetooth/audio.py +348 -0
- package/extensions/services/web/vendor/bluetooth/contacts.py +251 -0
- package/extensions/services/web/vendor/bluetooth/manager.py +395 -0
- package/extensions/services/web/vendor/bluetooth/sms.py +290 -0
- package/extensions/services/web/vendor/bluetooth/telephony.py +274 -0
- package/extensions/services/web/vendor/config.py +139 -0
- package/extensions/services/web/vendor/conversation/asr.py +936 -0
- package/extensions/services/web/vendor/conversation/engine.py +548 -0
- package/extensions/services/web/vendor/conversation/llm.py +534 -0
- package/extensions/services/web/vendor/conversation/mcp_tools.py +190 -0
- package/extensions/services/web/vendor/conversation/tts.py +322 -0
- package/extensions/services/web/vendor/conversation/vad.py +138 -0
- package/extensions/services/web/vendor/storage/__init__.py +1 -0
- package/extensions/services/web/vendor/storage/identity.py +312 -0
- package/extensions/services/web/vendor/storage/store.py +507 -0
- package/extensions/services/web/vendor/task/manager.py +864 -0
- package/extensions/services/web/vendor/task/models.py +45 -0
- package/extensions/services/web/vendor/task/webhook.py +263 -0
- package/extensions/services/web/vendor/tools/registry.py +321 -0
- package/kernel/__init__.py +0 -0
- package/kernel/entry.py +407 -0
- package/{core/event_hub/hub.py → kernel/event_hub.py} +62 -74
- package/kernel/module.md +33 -0
- package/{core/registry/store.py → kernel/registry_store.py} +23 -8
- package/kernel/rpc_router.py +388 -0
- package/kernel/server.py +267 -0
- package/launcher/__init__.py +10 -0
- package/launcher/__main__.py +6 -0
- package/launcher/count_lines.py +258 -0
- package/launcher/entry.py +1778 -0
- package/launcher/logging_setup.py +289 -0
- package/{core/launcher → launcher}/module_scanner.py +11 -6
- package/launcher/process_manager.py +880 -0
- package/main.py +11 -210
- package/package.json +6 -9
- package/__init__.py +0 -1
- package/__main__.py +0 -15
- package/core/event_hub/BENCHMARK.md +0 -94
- package/core/event_hub/bench.py +0 -459
- package/core/event_hub/bench_extreme.py +0 -308
- package/core/event_hub/bench_perf.py +0 -350
- package/core/event_hub/entry.py +0 -157
- package/core/event_hub/module.md +0 -20
- package/core/event_hub/server.py +0 -206
- package/core/launcher/entry.py +0 -1158
- package/core/launcher/process_manager.py +0 -470
- package/core/registry/entry.py +0 -110
- package/core/registry/module.md +0 -30
- package/core/registry/server.py +0 -289
- package/extensions/services/watchdog/server.py +0 -167
- /package/{core → extensions/services/web/vendor/bluetooth}/__init__.py +0 -0
- /package/{core/event_hub → extensions/services/web/vendor/conversation}/__init__.py +0 -0
- /package/{core/launcher → extensions/services/web/vendor/task}/__init__.py +0 -0
- /package/{core/registry → extensions/services/web/vendor/tools}/__init__.py +0 -0
- /package/{core/event_hub → kernel}/dedup.py +0 -0
- /package/{core/event_hub → kernel}/router.py +0 -0
- /package/{core/launcher → launcher}/module.md +0 -0
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
"""ConversationEngine -- main orchestrator for AI phone calls."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .. import config as cfg
|
|
12
|
+
from .asr import ASRProvider, create_asr_provider
|
|
13
|
+
from .llm import (
|
|
14
|
+
LLMProvider,
|
|
15
|
+
create_llm_provider,
|
|
16
|
+
generate_summary,
|
|
17
|
+
should_end_call,
|
|
18
|
+
)
|
|
19
|
+
from .tts import TTSProvider, create_tts_provider
|
|
20
|
+
from .vad import VADDetector, create_vad_detector
|
|
21
|
+
from ..storage import store
|
|
22
|
+
from ..storage import identity
|
|
23
|
+
from .registry import get_registry
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ConversationEngine:
|
|
29
|
+
"""Orchestrates a single phone call: ASR -> LLM -> TTS loop.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
audio_pipeline:
|
|
34
|
+
An object providing ``read_audio() -> bytes`` and
|
|
35
|
+
``write_audio(data: bytes) -> None`` async methods for the
|
|
36
|
+
underlying Bluetooth / telephony layer.
|
|
37
|
+
task_info:
|
|
38
|
+
Dict describing the call task -- should include keys like
|
|
39
|
+
``task_id``, ``phone_number``, ``purpose``, ``system_prompt``,
|
|
40
|
+
``play_text`` (optional pre-scripted opener), etc.
|
|
41
|
+
webhook_client:
|
|
42
|
+
An object (or ``None``) with an ``async send(event, payload)``
|
|
43
|
+
method for notifying the task caller about events.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
audio_pipeline: Any,
|
|
49
|
+
task_info: dict[str, Any],
|
|
50
|
+
webhook_client: Any | None = None,
|
|
51
|
+
) -> None:
|
|
52
|
+
# Sub-components
|
|
53
|
+
self.asr: ASRProvider = create_asr_provider()
|
|
54
|
+
self.llm: LLMProvider = create_llm_provider()
|
|
55
|
+
self.tts: TTSProvider = create_tts_provider()
|
|
56
|
+
self.vad: VADDetector = create_vad_detector()
|
|
57
|
+
|
|
58
|
+
# External references
|
|
59
|
+
self.audio = audio_pipeline
|
|
60
|
+
self.task_info = task_info
|
|
61
|
+
self.webhook = webhook_client
|
|
62
|
+
|
|
63
|
+
# Conversation state
|
|
64
|
+
self.messages: list[dict[str, Any]] = []
|
|
65
|
+
self.transcript: list[dict[str, Any]] = []
|
|
66
|
+
self.running: bool = False
|
|
67
|
+
|
|
68
|
+
# Timing
|
|
69
|
+
self._start_time: float = 0.0
|
|
70
|
+
self._no_response_count: int = 0
|
|
71
|
+
|
|
72
|
+
# Config shortcuts
|
|
73
|
+
self._max_duration = cfg.get("call.max_duration_seconds", 300)
|
|
74
|
+
self._no_response_timeout = cfg.get("call.no_response_timeout", 15)
|
|
75
|
+
self._no_response_max_retries = cfg.get("call.no_response_max_retries", 3)
|
|
76
|
+
self._active_provider = cfg.get("llm.active_provider", "openai")
|
|
77
|
+
|
|
78
|
+
# Tool registry integration
|
|
79
|
+
self._registry = get_registry()
|
|
80
|
+
user_phone = task_info.get("user_phone", "")
|
|
81
|
+
contact_phone = task_info.get("phone_number", "")
|
|
82
|
+
self._is_owner = identity.is_owner(user_phone, contact_phone) if (user_phone and contact_phone) else False
|
|
83
|
+
|
|
84
|
+
# Resolve enabled tools through 4-level hierarchy
|
|
85
|
+
global_tools_cfg = identity.load_tools_config(cfg.data_dir() / "tools.yaml")
|
|
86
|
+
user_tools_cfg = identity.load_tools_config(
|
|
87
|
+
identity.user_dir(user_phone) / "tools.yaml"
|
|
88
|
+
) if user_phone else None
|
|
89
|
+
contact_tools_cfg = identity.load_tools_config(
|
|
90
|
+
identity.contact_dir(user_phone, contact_phone) / "tools.yaml"
|
|
91
|
+
) if (user_phone and contact_phone) else None
|
|
92
|
+
call_tools_cfg = None
|
|
93
|
+
if task_info.get("tools_enabled") or task_info.get("tools_disabled"):
|
|
94
|
+
call_tools_cfg = {
|
|
95
|
+
"enabled": task_info.get("tools_enabled", []),
|
|
96
|
+
"disabled": task_info.get("tools_disabled", []),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
self._enabled_tools = self._registry.resolve_enabled(
|
|
100
|
+
global_tools_cfg, user_tools_cfg, contact_tools_cfg, call_tools_cfg,
|
|
101
|
+
self._is_owner,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# ------------------------------------------------------------------
|
|
105
|
+
# Public API
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
async def start(self) -> None:
|
|
109
|
+
"""Run the main conversation loop until completion or timeout."""
|
|
110
|
+
self.running = True
|
|
111
|
+
self._start_time = time.time()
|
|
112
|
+
task_id = self.task_info.get("task_id", "unknown")
|
|
113
|
+
|
|
114
|
+
logger.info("ConversationEngine: starting call %s", task_id)
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
# Initialise ASR stream
|
|
118
|
+
language = cfg.get("asr.whisper.language", "zh")
|
|
119
|
+
await self.asr.start_stream(language)
|
|
120
|
+
|
|
121
|
+
# Build the system message
|
|
122
|
+
await self._init_system_prompt()
|
|
123
|
+
|
|
124
|
+
# Play opening
|
|
125
|
+
if self.task_info.get("play_text"):
|
|
126
|
+
await self._speak(self.task_info["play_text"])
|
|
127
|
+
else:
|
|
128
|
+
opening = await self._generate_opening()
|
|
129
|
+
await self._speak(opening)
|
|
130
|
+
|
|
131
|
+
# Main loop
|
|
132
|
+
while self.running:
|
|
133
|
+
# Check max duration
|
|
134
|
+
if self._check_timeout():
|
|
135
|
+
await self._speak("抱歉,通话时间到了,感谢您的配合,再见。")
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
# Listen for speech
|
|
139
|
+
text = await self._listen()
|
|
140
|
+
|
|
141
|
+
if not text:
|
|
142
|
+
if not await self._handle_no_response():
|
|
143
|
+
break
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
# Reset no-response counter on valid input
|
|
147
|
+
self._no_response_count = 0
|
|
148
|
+
|
|
149
|
+
# Record the human turn
|
|
150
|
+
self._record_turn("human", text)
|
|
151
|
+
|
|
152
|
+
# Generate AI response (may include tool calls)
|
|
153
|
+
response = await self._generate_response(text)
|
|
154
|
+
|
|
155
|
+
# Handle tool-call loops
|
|
156
|
+
while response.get("tool_calls"):
|
|
157
|
+
tool_results = await self._execute_tools(response["tool_calls"])
|
|
158
|
+
response = await self._continue_with_tool_results(
|
|
159
|
+
response["tool_calls"], tool_results
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Speak the text response
|
|
163
|
+
if response.get("content"):
|
|
164
|
+
self._record_turn("ai", response["content"])
|
|
165
|
+
await self._speak(response["content"])
|
|
166
|
+
|
|
167
|
+
# Check if the conversation should end
|
|
168
|
+
if await self._should_end_call():
|
|
169
|
+
farewell = await self._generate_farewell()
|
|
170
|
+
self._record_turn("ai", farewell)
|
|
171
|
+
await self._speak(farewell)
|
|
172
|
+
break
|
|
173
|
+
|
|
174
|
+
except asyncio.CancelledError:
|
|
175
|
+
logger.info("ConversationEngine: call %s cancelled", task_id)
|
|
176
|
+
except Exception:
|
|
177
|
+
logger.exception("ConversationEngine: unhandled error in call %s", task_id)
|
|
178
|
+
finally:
|
|
179
|
+
self.running = False
|
|
180
|
+
await self.asr.stop_stream()
|
|
181
|
+
logger.info("ConversationEngine: call %s ended", task_id)
|
|
182
|
+
|
|
183
|
+
async def stop(self) -> None:
|
|
184
|
+
"""Signal the engine to stop at the next opportunity."""
|
|
185
|
+
self.running = False
|
|
186
|
+
|
|
187
|
+
async def inject_message(self, message: str) -> None:
|
|
188
|
+
"""Inject a message from the task caller into the conversation context."""
|
|
189
|
+
self.messages.append({
|
|
190
|
+
"role": "system",
|
|
191
|
+
"content": f"[来自任务调用者的消息]: {message}",
|
|
192
|
+
})
|
|
193
|
+
logger.info("ConversationEngine: injected caller message: %s", message[:80])
|
|
194
|
+
|
|
195
|
+
async def generate_call_summary(self) -> str:
|
|
196
|
+
"""Generate and persist a summary of the entire call."""
|
|
197
|
+
summary = await generate_summary(self.llm, self.messages)
|
|
198
|
+
|
|
199
|
+
# Save to identity directory
|
|
200
|
+
call_path = self.task_info.get("call_dir")
|
|
201
|
+
if call_path and summary:
|
|
202
|
+
from pathlib import Path
|
|
203
|
+
await identity.save_call_summary(Path(call_path), summary)
|
|
204
|
+
|
|
205
|
+
return summary
|
|
206
|
+
|
|
207
|
+
# ------------------------------------------------------------------
|
|
208
|
+
# System prompt
|
|
209
|
+
# ------------------------------------------------------------------
|
|
210
|
+
|
|
211
|
+
async def _init_system_prompt(self) -> None:
|
|
212
|
+
"""Build the initial system message from task info, config, and identity context."""
|
|
213
|
+
purpose = self.task_info.get("purpose", "")
|
|
214
|
+
custom_prompt = self.task_info.get("system_prompt", "")
|
|
215
|
+
phone_number = self.task_info.get("phone_number", "")
|
|
216
|
+
contact_name = self.task_info.get("contact_name", "")
|
|
217
|
+
direction = self.task_info.get("direction", "outgoing")
|
|
218
|
+
|
|
219
|
+
opening_hint = cfg.get("call.opening_message", "")
|
|
220
|
+
|
|
221
|
+
parts: list[str] = [
|
|
222
|
+
"你是一个AI电话助手。你正在进行一通电话。",
|
|
223
|
+
f"通话方向: {'拨出' if direction == 'outgoing' else '接入'}",
|
|
224
|
+
]
|
|
225
|
+
if phone_number:
|
|
226
|
+
parts.append(f"对方号码: {phone_number}")
|
|
227
|
+
if contact_name:
|
|
228
|
+
parts.append(f"对方姓名: {contact_name}")
|
|
229
|
+
if purpose:
|
|
230
|
+
parts.append(f"通话目的: {purpose}")
|
|
231
|
+
if custom_prompt:
|
|
232
|
+
parts.append(f"\n额外指令:\n{custom_prompt}")
|
|
233
|
+
if opening_hint:
|
|
234
|
+
parts.append(f"开场白参考: {opening_hint}")
|
|
235
|
+
|
|
236
|
+
parts.append(
|
|
237
|
+
"\n注意事项:\n"
|
|
238
|
+
"- 用自然、礼貌的中文对话\n"
|
|
239
|
+
"- 保持简洁,每次回复不超过两三句话\n"
|
|
240
|
+
"- 如需确认信息可以使用工具\n"
|
|
241
|
+
"- 完成任务后礼貌结束通话"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Load identity context (user profile, contact profile, recent summaries)
|
|
245
|
+
identity_context = await self._load_identity_context()
|
|
246
|
+
if identity_context:
|
|
247
|
+
parts.append(identity_context)
|
|
248
|
+
|
|
249
|
+
# Load data README (panoramic overview)
|
|
250
|
+
data_readme = cfg.data_dir() / "README.md"
|
|
251
|
+
if data_readme.exists():
|
|
252
|
+
try:
|
|
253
|
+
readme_content = data_readme.read_text(encoding="utf-8").strip()
|
|
254
|
+
if readme_content:
|
|
255
|
+
parts.append(f"\n# 系统全景\n{readme_content}")
|
|
256
|
+
except Exception:
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
# Append tools summary
|
|
260
|
+
tools_summary = self._registry.build_tools_summary(self._enabled_tools)
|
|
261
|
+
if tools_summary:
|
|
262
|
+
parts.append(f"\n{tools_summary}")
|
|
263
|
+
|
|
264
|
+
system_content = "\n".join(parts)
|
|
265
|
+
self.messages.append({"role": "system", "content": system_content})
|
|
266
|
+
|
|
267
|
+
async def _load_identity_context(self) -> str:
|
|
268
|
+
"""Load identity context files for the system prompt."""
|
|
269
|
+
user_phone = self.task_info.get("user_phone", "")
|
|
270
|
+
contact_phone = self.task_info.get("phone_number", "")
|
|
271
|
+
|
|
272
|
+
if not user_phone or not contact_phone:
|
|
273
|
+
return ""
|
|
274
|
+
|
|
275
|
+
parts: list[str] = []
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
user_ctx = await identity.load_user_context(user_phone)
|
|
279
|
+
contact_ctx = await identity.load_contact_context(user_phone, contact_phone)
|
|
280
|
+
summaries = await identity.load_recent_summaries(user_phone, contact_phone)
|
|
281
|
+
except Exception as exc:
|
|
282
|
+
logger.warning("Failed to load identity context: %s", exc)
|
|
283
|
+
return ""
|
|
284
|
+
|
|
285
|
+
if user_ctx:
|
|
286
|
+
parts.append(f"\n# 用户信息\n{user_ctx}")
|
|
287
|
+
if contact_ctx:
|
|
288
|
+
parts.append(f"\n# 联系人信息\n{contact_ctx}")
|
|
289
|
+
if summaries:
|
|
290
|
+
parts.append(f"\n# 近期通话记录\n{summaries}")
|
|
291
|
+
|
|
292
|
+
return "\n".join(parts)
|
|
293
|
+
|
|
294
|
+
# ------------------------------------------------------------------
|
|
295
|
+
# Opening / farewell
|
|
296
|
+
# ------------------------------------------------------------------
|
|
297
|
+
|
|
298
|
+
async def _generate_opening(self) -> str:
|
|
299
|
+
"""Ask the LLM to produce an appropriate opening line."""
|
|
300
|
+
self.messages.append({
|
|
301
|
+
"role": "user",
|
|
302
|
+
"content": "[系统] 请生成一句合适的开场白。",
|
|
303
|
+
})
|
|
304
|
+
result = await self.llm.generate(self.messages)
|
|
305
|
+
opening = result.get("content", "你好!")
|
|
306
|
+
# Replace the placeholder user message with the actual assistant response
|
|
307
|
+
self.messages.pop()
|
|
308
|
+
self.messages.append({"role": "assistant", "content": opening})
|
|
309
|
+
return opening
|
|
310
|
+
|
|
311
|
+
async def _generate_farewell(self) -> str:
|
|
312
|
+
"""Ask the LLM to produce a farewell message."""
|
|
313
|
+
self.messages.append({
|
|
314
|
+
"role": "user",
|
|
315
|
+
"content": "[系统] 通话即将结束,请生成一句结束语。",
|
|
316
|
+
})
|
|
317
|
+
result = await self.llm.generate(self.messages)
|
|
318
|
+
farewell = result.get("content", "感谢您的时间,再见!")
|
|
319
|
+
self.messages.pop()
|
|
320
|
+
self.messages.append({"role": "assistant", "content": farewell})
|
|
321
|
+
return farewell
|
|
322
|
+
|
|
323
|
+
# ------------------------------------------------------------------
|
|
324
|
+
# Listening (ASR + VAD)
|
|
325
|
+
# ------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
async def _listen(self) -> str | None:
|
|
328
|
+
"""Listen for speech using VAD to detect utterance boundaries.
|
|
329
|
+
|
|
330
|
+
Returns the transcribed text or ``None`` on timeout / no speech.
|
|
331
|
+
"""
|
|
332
|
+
self.vad.reset()
|
|
333
|
+
await self.asr.start_stream(cfg.get("asr.whisper.language", "zh"))
|
|
334
|
+
speech_detected = False
|
|
335
|
+
timeout = self._no_response_timeout
|
|
336
|
+
deadline = time.time() + timeout
|
|
337
|
+
|
|
338
|
+
while self.running and time.time() < deadline:
|
|
339
|
+
try:
|
|
340
|
+
chunk = await asyncio.wait_for(
|
|
341
|
+
self.audio.read_audio(),
|
|
342
|
+
timeout=0.5,
|
|
343
|
+
)
|
|
344
|
+
except asyncio.TimeoutError:
|
|
345
|
+
continue
|
|
346
|
+
except Exception:
|
|
347
|
+
logger.debug("_listen: audio read error")
|
|
348
|
+
break
|
|
349
|
+
|
|
350
|
+
if not chunk:
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
# Feed to VAD
|
|
354
|
+
event = self.vad.feed(chunk)
|
|
355
|
+
|
|
356
|
+
if event == "speech_start":
|
|
357
|
+
speech_detected = True
|
|
358
|
+
# Extend deadline while user is speaking
|
|
359
|
+
deadline = time.time() + timeout
|
|
360
|
+
|
|
361
|
+
if speech_detected:
|
|
362
|
+
await self.asr.feed_audio(chunk)
|
|
363
|
+
|
|
364
|
+
if event == "speech_end" and speech_detected:
|
|
365
|
+
break
|
|
366
|
+
|
|
367
|
+
# If speech is ongoing, keep extending the deadline
|
|
368
|
+
if self.vad.is_speaking:
|
|
369
|
+
deadline = time.time() + timeout
|
|
370
|
+
|
|
371
|
+
if not speech_detected:
|
|
372
|
+
await self.asr.stop_stream()
|
|
373
|
+
return None
|
|
374
|
+
|
|
375
|
+
text = await self.asr.get_result()
|
|
376
|
+
await self.asr.stop_stream()
|
|
377
|
+
text = text.strip() if text else None
|
|
378
|
+
if text:
|
|
379
|
+
logger.info("User said: %s", text)
|
|
380
|
+
return text
|
|
381
|
+
|
|
382
|
+
# ------------------------------------------------------------------
|
|
383
|
+
# Speaking (TTS)
|
|
384
|
+
# ------------------------------------------------------------------
|
|
385
|
+
|
|
386
|
+
async def _speak(self, text: str) -> None:
|
|
387
|
+
"""Synthesize *text* via TTS and play through the audio pipeline."""
|
|
388
|
+
if not text:
|
|
389
|
+
return
|
|
390
|
+
logger.info("AI says: %s", text)
|
|
391
|
+
try:
|
|
392
|
+
audio_data = await self.tts.synthesize(text)
|
|
393
|
+
if audio_data and self.audio:
|
|
394
|
+
await self.audio.write_audio(audio_data)
|
|
395
|
+
except Exception:
|
|
396
|
+
logger.exception("_speak: TTS / playback error")
|
|
397
|
+
|
|
398
|
+
# ------------------------------------------------------------------
|
|
399
|
+
# LLM response generation
|
|
400
|
+
# ------------------------------------------------------------------
|
|
401
|
+
|
|
402
|
+
async def _generate_response(self, user_text: str) -> dict[str, Any]:
|
|
403
|
+
"""Append the user message and ask the LLM for a response."""
|
|
404
|
+
self.messages.append({"role": "user", "content": user_text})
|
|
405
|
+
tools = self._registry.get_tools_for_provider(
|
|
406
|
+
self._active_provider, self._enabled_tools
|
|
407
|
+
)
|
|
408
|
+
result = await self.llm.generate(self.messages, tools=tools)
|
|
409
|
+
|
|
410
|
+
# Append assistant message to history
|
|
411
|
+
assistant_msg: dict[str, Any] = {"role": "assistant"}
|
|
412
|
+
if result.get("content"):
|
|
413
|
+
assistant_msg["content"] = result["content"]
|
|
414
|
+
if result.get("tool_calls"):
|
|
415
|
+
# Use raw_tool_calls (original API format) for message history if available
|
|
416
|
+
assistant_msg["tool_calls"] = result.get("raw_tool_calls") or result["tool_calls"]
|
|
417
|
+
self.messages.append(assistant_msg)
|
|
418
|
+
|
|
419
|
+
return result
|
|
420
|
+
|
|
421
|
+
async def _continue_with_tool_results(
|
|
422
|
+
self,
|
|
423
|
+
tool_calls: list[dict[str, Any]],
|
|
424
|
+
tool_results: list[dict[str, Any]],
|
|
425
|
+
) -> dict[str, Any]:
|
|
426
|
+
"""Feed tool results back to the LLM and get the next response."""
|
|
427
|
+
for tc, result in zip(tool_calls, tool_results):
|
|
428
|
+
self.messages.append({
|
|
429
|
+
"role": "tool",
|
|
430
|
+
"tool_call_id": tc.get("id", ""),
|
|
431
|
+
"name": tc.get("name", ""),
|
|
432
|
+
"content": result.get("content", ""),
|
|
433
|
+
})
|
|
434
|
+
|
|
435
|
+
tools = self._registry.get_tools_for_provider(
|
|
436
|
+
self._active_provider, self._enabled_tools
|
|
437
|
+
)
|
|
438
|
+
response = await self.llm.generate(self.messages, tools=tools)
|
|
439
|
+
|
|
440
|
+
# Append assistant response to history
|
|
441
|
+
assistant_msg: dict[str, Any] = {"role": "assistant"}
|
|
442
|
+
if response.get("content"):
|
|
443
|
+
assistant_msg["content"] = response["content"]
|
|
444
|
+
if response.get("tool_calls"):
|
|
445
|
+
assistant_msg["tool_calls"] = response.get("raw_tool_calls") or response["tool_calls"]
|
|
446
|
+
self.messages.append(assistant_msg)
|
|
447
|
+
|
|
448
|
+
return response
|
|
449
|
+
|
|
450
|
+
# ------------------------------------------------------------------
|
|
451
|
+
# Tool execution
|
|
452
|
+
# ------------------------------------------------------------------
|
|
453
|
+
|
|
454
|
+
async def _execute_tools(
|
|
455
|
+
self, tool_calls: list[dict[str, Any]]
|
|
456
|
+
) -> list[dict[str, Any]]:
|
|
457
|
+
"""Execute a batch of tool calls and return their results."""
|
|
458
|
+
results: list[dict[str, Any]] = []
|
|
459
|
+
for tc in tool_calls:
|
|
460
|
+
name = tc.get("name", "")
|
|
461
|
+
args = tc.get("arguments", {})
|
|
462
|
+
logger.info("Executing tool: %s(%s)", name, args)
|
|
463
|
+
|
|
464
|
+
try:
|
|
465
|
+
result = await self._execute_single_tool(name, args)
|
|
466
|
+
results.append({"content": result})
|
|
467
|
+
except Exception as exc:
|
|
468
|
+
logger.exception("Tool execution failed: %s", name)
|
|
469
|
+
results.append({"content": f"Error: {exc}"})
|
|
470
|
+
|
|
471
|
+
return results
|
|
472
|
+
|
|
473
|
+
async def _execute_single_tool(self, name: str, args: dict[str, Any]) -> str:
|
|
474
|
+
"""Dispatch a single tool call to the registry handler."""
|
|
475
|
+
handler = self._registry.get_handler(name)
|
|
476
|
+
if handler is None:
|
|
477
|
+
return f"Unknown tool: {name}"
|
|
478
|
+
|
|
479
|
+
context = self._registry.build_context(name, self, self._is_owner)
|
|
480
|
+
return await handler(args, context)
|
|
481
|
+
|
|
482
|
+
# ------------------------------------------------------------------
|
|
483
|
+
# No-response handling
|
|
484
|
+
# ------------------------------------------------------------------
|
|
485
|
+
|
|
486
|
+
async def _handle_no_response(self) -> bool:
|
|
487
|
+
"""Handle the case where the user did not respond.
|
|
488
|
+
|
|
489
|
+
Returns ``True`` to continue the loop, ``False`` to end the call.
|
|
490
|
+
"""
|
|
491
|
+
self._no_response_count += 1
|
|
492
|
+
if self._no_response_count >= self._no_response_max_retries:
|
|
493
|
+
await self._speak("您好?因为长时间没有回应,我先挂断了,再见。")
|
|
494
|
+
return False
|
|
495
|
+
|
|
496
|
+
prompts = [
|
|
497
|
+
"您好?请问您还在吗?",
|
|
498
|
+
"不好意思,我没有听到您的声音,请问您还在线吗?",
|
|
499
|
+
"抱歉打扰了,如果您还在的话请说话。",
|
|
500
|
+
]
|
|
501
|
+
idx = min(self._no_response_count - 1, len(prompts) - 1)
|
|
502
|
+
await self._speak(prompts[idx])
|
|
503
|
+
return True
|
|
504
|
+
|
|
505
|
+
# ------------------------------------------------------------------
|
|
506
|
+
# End-of-call detection
|
|
507
|
+
# ------------------------------------------------------------------
|
|
508
|
+
|
|
509
|
+
async def _should_end_call(self) -> bool:
|
|
510
|
+
"""Determine whether the call should end naturally."""
|
|
511
|
+
return await should_end_call(self.llm, self.messages)
|
|
512
|
+
|
|
513
|
+
# ------------------------------------------------------------------
|
|
514
|
+
# Timeout
|
|
515
|
+
# ------------------------------------------------------------------
|
|
516
|
+
|
|
517
|
+
def _check_timeout(self) -> bool:
|
|
518
|
+
"""Return ``True`` if the call has exceeded the maximum duration."""
|
|
519
|
+
if self._max_duration <= 0:
|
|
520
|
+
return False
|
|
521
|
+
elapsed = time.time() - self._start_time
|
|
522
|
+
return elapsed >= self._max_duration
|
|
523
|
+
|
|
524
|
+
# ------------------------------------------------------------------
|
|
525
|
+
# Transcript recording
|
|
526
|
+
# ------------------------------------------------------------------
|
|
527
|
+
|
|
528
|
+
def _record_turn(self, role: str, text: str) -> None:
|
|
529
|
+
"""Record a conversation turn in the transcript and persist it."""
|
|
530
|
+
now = datetime.now(timezone.utc)
|
|
531
|
+
entry = {
|
|
532
|
+
"role": role,
|
|
533
|
+
"text": text,
|
|
534
|
+
"timestamp": now.isoformat(),
|
|
535
|
+
"elapsed": round(time.time() - self._start_time, 1),
|
|
536
|
+
}
|
|
537
|
+
self.transcript.append(entry)
|
|
538
|
+
|
|
539
|
+
# Persist to identity directory (fire-and-forget)
|
|
540
|
+
call_path = self.task_info.get("call_dir")
|
|
541
|
+
if call_path:
|
|
542
|
+
from pathlib import Path
|
|
543
|
+
msg_entry = {
|
|
544
|
+
"role": "human" if role == "human" else "assistant",
|
|
545
|
+
"timestamp": now.isoformat(),
|
|
546
|
+
"text": text,
|
|
547
|
+
}
|
|
548
|
+
asyncio.ensure_future(identity.save_call_message(Path(call_path), msg_entry))
|