@agentunion/kite 1.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/cli.js +127 -25
  2. package/core/event_hub/entry.py +384 -61
  3. package/core/event_hub/hub.py +8 -0
  4. package/core/event_hub/module.md +0 -1
  5. package/core/event_hub/server.py +169 -38
  6. package/core/kite_log.py +241 -0
  7. package/core/launcher/entry.py +1306 -425
  8. package/core/launcher/module_scanner.py +10 -9
  9. package/core/launcher/process_manager.py +555 -121
  10. package/core/registry/entry.py +335 -30
  11. package/core/registry/server.py +339 -256
  12. package/core/registry/store.py +13 -2
  13. package/extensions/agents/__init__.py +1 -0
  14. package/extensions/agents/assistant/__init__.py +1 -0
  15. package/extensions/agents/assistant/entry.py +380 -0
  16. package/extensions/agents/assistant/module.md +22 -0
  17. package/extensions/agents/assistant/server.py +236 -0
  18. package/extensions/channels/__init__.py +1 -0
  19. package/extensions/channels/acp_channel/__init__.py +1 -0
  20. package/extensions/channels/acp_channel/entry.py +380 -0
  21. package/extensions/channels/acp_channel/module.md +22 -0
  22. package/extensions/channels/acp_channel/server.py +236 -0
  23. package/{core → extensions}/event_hub_bench/entry.py +664 -371
  24. package/{core → extensions}/event_hub_bench/module.md +4 -2
  25. package/extensions/services/backup/__init__.py +1 -0
  26. package/extensions/services/backup/entry.py +380 -0
  27. package/extensions/services/backup/module.md +22 -0
  28. package/extensions/services/backup/server.py +244 -0
  29. package/extensions/services/model_service/__init__.py +1 -0
  30. package/extensions/services/model_service/entry.py +380 -0
  31. package/extensions/services/model_service/module.md +22 -0
  32. package/extensions/services/model_service/server.py +236 -0
  33. package/extensions/services/watchdog/entry.py +460 -143
  34. package/extensions/services/watchdog/module.md +3 -0
  35. package/extensions/services/watchdog/monitor.py +128 -13
  36. package/extensions/services/watchdog/server.py +75 -13
  37. package/extensions/services/web/__init__.py +1 -0
  38. package/extensions/services/web/config.yaml +149 -0
  39. package/extensions/services/web/entry.py +487 -0
  40. package/extensions/services/web/module.md +24 -0
  41. package/extensions/services/web/routes/__init__.py +1 -0
  42. package/extensions/services/web/routes/routes_call.py +189 -0
  43. package/extensions/services/web/routes/routes_config.py +512 -0
  44. package/extensions/services/web/routes/routes_contacts.py +98 -0
  45. package/extensions/services/web/routes/routes_devlog.py +99 -0
  46. package/extensions/services/web/routes/routes_phone.py +81 -0
  47. package/extensions/services/web/routes/routes_sms.py +48 -0
  48. package/extensions/services/web/routes/routes_stats.py +17 -0
  49. package/extensions/services/web/routes/routes_voicechat.py +554 -0
  50. package/extensions/services/web/routes/schemas.py +216 -0
  51. package/extensions/services/web/server.py +332 -0
  52. package/extensions/services/web/static/css/style.css +1064 -0
  53. package/extensions/services/web/static/index.html +1445 -0
  54. package/extensions/services/web/static/js/app.js +4671 -0
  55. package/extensions/services/web/vendor/__init__.py +1 -0
  56. package/extensions/services/web/vendor/bluetooth/audio.py +348 -0
  57. package/extensions/services/web/vendor/bluetooth/contacts.py +251 -0
  58. package/extensions/services/web/vendor/bluetooth/manager.py +395 -0
  59. package/extensions/services/web/vendor/bluetooth/sms.py +290 -0
  60. package/extensions/services/web/vendor/bluetooth/telephony.py +274 -0
  61. package/extensions/services/web/vendor/config.py +139 -0
  62. package/extensions/services/web/vendor/conversation/__init__.py +0 -0
  63. package/extensions/services/web/vendor/conversation/asr.py +936 -0
  64. package/extensions/services/web/vendor/conversation/engine.py +548 -0
  65. package/extensions/services/web/vendor/conversation/llm.py +534 -0
  66. package/extensions/services/web/vendor/conversation/mcp_tools.py +190 -0
  67. package/extensions/services/web/vendor/conversation/tts.py +322 -0
  68. package/extensions/services/web/vendor/conversation/vad.py +138 -0
  69. package/extensions/services/web/vendor/storage/__init__.py +1 -0
  70. package/extensions/services/web/vendor/storage/identity.py +312 -0
  71. package/extensions/services/web/vendor/storage/store.py +507 -0
  72. package/extensions/services/web/vendor/task/__init__.py +0 -0
  73. package/extensions/services/web/vendor/task/manager.py +864 -0
  74. package/extensions/services/web/vendor/task/models.py +45 -0
  75. package/extensions/services/web/vendor/task/webhook.py +263 -0
  76. package/extensions/services/web/vendor/tools/__init__.py +0 -0
  77. package/extensions/services/web/vendor/tools/registry.py +321 -0
  78. package/main.py +344 -4
  79. package/package.json +11 -2
  80. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  81. package/core/__pycache__/data_dir.cpython-313.pyc +0 -0
  82. package/core/data_dir.py +0 -62
  83. package/core/event_hub/__pycache__/__init__.cpython-313.pyc +0 -0
  84. package/core/event_hub/__pycache__/bench.cpython-313.pyc +0 -0
  85. package/core/event_hub/__pycache__/bench_perf.cpython-313.pyc +0 -0
  86. package/core/event_hub/__pycache__/dedup.cpython-313.pyc +0 -0
  87. package/core/event_hub/__pycache__/entry.cpython-313.pyc +0 -0
  88. package/core/event_hub/__pycache__/hub.cpython-313.pyc +0 -0
  89. package/core/event_hub/__pycache__/router.cpython-313.pyc +0 -0
  90. package/core/event_hub/__pycache__/server.cpython-313.pyc +0 -0
  91. package/core/event_hub/bench_results/2026-02-28_13-26-48.json +0 -51
  92. package/core/event_hub/bench_results/2026-02-28_13-44-45.json +0 -51
  93. package/core/event_hub/bench_results/2026-02-28_13-45-39.json +0 -51
  94. package/core/launcher/__pycache__/__init__.cpython-313.pyc +0 -0
  95. package/core/launcher/__pycache__/entry.cpython-313.pyc +0 -0
  96. package/core/launcher/__pycache__/module_scanner.cpython-313.pyc +0 -0
  97. package/core/launcher/__pycache__/process_manager.cpython-313.pyc +0 -0
  98. package/core/launcher/data/log/lifecycle.jsonl +0 -1158
  99. package/core/launcher/data/token.txt +0 -1
  100. package/core/registry/__pycache__/__init__.cpython-313.pyc +0 -0
  101. package/core/registry/__pycache__/entry.cpython-313.pyc +0 -0
  102. package/core/registry/__pycache__/server.cpython-313.pyc +0 -0
  103. package/core/registry/__pycache__/store.cpython-313.pyc +0 -0
  104. package/core/registry/data/port.txt +0 -1
  105. package/core/registry/data/port_484.txt +0 -1
  106. package/extensions/__pycache__/__init__.cpython-313.pyc +0 -0
  107. package/extensions/services/__pycache__/__init__.cpython-313.pyc +0 -0
  108. package/extensions/services/watchdog/__pycache__/__init__.cpython-313.pyc +0 -0
  109. package/extensions/services/watchdog/__pycache__/entry.cpython-313.pyc +0 -0
  110. package/extensions/services/watchdog/__pycache__/monitor.cpython-313.pyc +0 -0
  111. package/extensions/services/watchdog/__pycache__/server.cpython-313.pyc +0 -0
  112. /package/{core/event_hub/bench_results/.gitkeep → extensions/services/web/vendor/bluetooth/__init__.py} +0 -0
@@ -0,0 +1,554 @@
1
+ """Voice Chat WebSocket endpoint — browser-based ASR → LLM → TTS pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import re
9
+ import struct
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from fastapi import APIRouter, Request, WebSocket, WebSocketDisconnect
14
+
15
+ from vendor import config as cfg
16
+ from vendor.conversation.asr import create_asr_provider
17
+ from vendor.conversation.llm import create_llm_provider
18
+ from vendor.conversation.tts import create_tts_provider
19
+ from vendor.storage import identity
20
+ from vendor.tools.registry import get_registry
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ router = APIRouter()
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Lightweight energy-based VAD (no C extension needed)
29
+ # ---------------------------------------------------------------------------
30
+
31
+ class EnergyVAD:
32
+ """Simple RMS-energy voice activity detector for S16LE 16 kHz mono PCM."""
33
+
34
+ FRAME_BYTES = 640 # 20 ms at 16 kHz S16LE mono
35
+
36
+ def __init__(
37
+ self,
38
+ energy_threshold: float = 300.0,
39
+ silence_ms: int = 800,
40
+ min_speech_ms: int = 250,
41
+ ) -> None:
42
+ self._base_threshold = energy_threshold
43
+ self._threshold = energy_threshold
44
+ self._silence_frames = silence_ms // 20
45
+ self._speech_frames = max(1, min_speech_ms // 20)
46
+ self._in_speech = False
47
+ self._speech_count = 0
48
+ self._silence_count = 0
49
+ self._buf = bytearray()
50
+ self._last_rms = 0.0
51
+ self._noise_floor = 0.0
52
+ self._noise_alpha = 0.05
53
+ self._noise_margin = 3.0
54
+ self._noise_samples = 0
55
+ self._noise_warmup = 15
56
+ logger.info(
57
+ "EnergyVAD: base_threshold=%.0f, silence_frames=%d (%dms), speech_frames=%d (%dms), adaptive=on (margin=%.1fx)",
58
+ self._base_threshold, self._silence_frames, silence_ms,
59
+ self._speech_frames, min_speech_ms, self._noise_margin,
60
+ )
61
+
62
+ def feed(self, chunk: bytes, external_rms: float | None = None) -> str | None:
63
+ if external_rms is not None:
64
+ self._buf.extend(chunk)
65
+ last_event: str | None = None
66
+ while len(self._buf) >= self.FRAME_BYTES:
67
+ self._buf = self._buf[self.FRAME_BYTES:]
68
+ self._last_rms = external_rms
69
+ ev = self._process_rms(external_rms)
70
+ if ev is not None:
71
+ last_event = ev
72
+ return last_event
73
+
74
+ self._buf.extend(chunk)
75
+ last_event: str | None = None
76
+ while len(self._buf) >= self.FRAME_BYTES:
77
+ frame = self._buf[: self.FRAME_BYTES]
78
+ self._buf = self._buf[self.FRAME_BYTES :]
79
+ ev = self._process(bytes(frame))
80
+ if ev is not None:
81
+ last_event = ev
82
+ return last_event
83
+
84
+ def reset(self) -> None:
85
+ self._in_speech = False
86
+ self._speech_count = 0
87
+ self._silence_count = 0
88
+ self._buf = bytearray()
89
+ self._noise_samples = 0
90
+ self._noise_floor = 0.0
91
+ self._threshold = self._base_threshold
92
+
93
+ @property
94
+ def last_rms(self) -> float:
95
+ return self._last_rms
96
+
97
+ def _rms(self, frame: bytes) -> float:
98
+ n = len(frame) // 2
99
+ if n == 0:
100
+ return 0.0
101
+ samples = struct.unpack(f"<{n}h", frame)
102
+ return (sum(s * s for s in samples) / n) ** 0.5
103
+
104
+ def _process(self, frame: bytes) -> str | None:
105
+ rms = self._rms(frame)
106
+ return self._process_rms(rms)
107
+
108
+ def _process_rms(self, rms: float) -> str | None:
109
+ self._last_rms = rms
110
+
111
+ if not self._in_speech and rms < self._threshold:
112
+ self._noise_samples += 1
113
+ if self._noise_samples == 1:
114
+ self._noise_floor = rms
115
+ else:
116
+ self._noise_floor += self._noise_alpha * (rms - self._noise_floor)
117
+ if self._noise_samples >= self._noise_warmup:
118
+ self._threshold = max(self._base_threshold,
119
+ self._noise_floor * self._noise_margin)
120
+
121
+ is_speech = rms > self._threshold
122
+ if is_speech:
123
+ self._silence_count = 0
124
+ self._speech_count += 1
125
+ if not self._in_speech and self._speech_count >= self._speech_frames:
126
+ self._in_speech = True
127
+ logger.debug("VAD: speech_start (rms=%.0f, threshold=%.0f, noise=%.0f)",
128
+ rms, self._threshold, self._noise_floor)
129
+ return "speech_start"
130
+ else:
131
+ self._speech_count = 0
132
+ if self._in_speech:
133
+ self._silence_count += 1
134
+ if self._silence_count >= self._silence_frames:
135
+ self._in_speech = False
136
+ self._silence_count = 0
137
+ logger.debug(
138
+ "VAD: speech_end (rms=%.0f, threshold=%.0f, noise=%.0f)",
139
+ rms, self._threshold, self._noise_floor,
140
+ )
141
+ return "speech_end"
142
+ return None
143
+
144
+
145
+ DEFAULT_SYSTEM_PROMPT = (
146
+ "你是一个AI语音助手。用户正在通过语音与你对话。\n"
147
+ "- 用自然、简洁的中文回复\n"
148
+ "- 每次回复控制在1-3句话,适合语音播放\n"
149
+ "- 语气友好自然,像朋友聊天"
150
+ )
151
+
152
+ _SENTENCE_RE = re.compile(r'(?<=[。!?!?\n])\s*')
153
+
154
+
155
+ def _split_sentences(text: str) -> list[str]:
156
+ parts = _SENTENCE_RE.split(text)
157
+ sentences: list[str] = []
158
+ for p in parts:
159
+ p = p.strip()
160
+ if not p:
161
+ continue
162
+ if sentences and len(p) < 5:
163
+ sentences[-1] += p
164
+ else:
165
+ sentences.append(p)
166
+ return sentences if sentences else [text]
167
+
168
+
169
+ async def _send_json(ws: WebSocket, data: dict) -> bool:
170
+ try:
171
+ await ws.send_json(data)
172
+ return True
173
+ except Exception:
174
+ return False
175
+
176
+
177
+ async def _send_bytes(ws: WebSocket, data: bytes) -> bool:
178
+ try:
179
+ await ws.send_bytes(data)
180
+ return True
181
+ except Exception:
182
+ return False
183
+
184
+
185
+ async def _send_tts_segments(ws: WebSocket, tts, text: str, language: str) -> bool:
186
+ if not await _send_json(ws, {"type": "speaking"}):
187
+ return False
188
+
189
+ sentences = _split_sentences(text)
190
+ chunk_size = 16 * 1024
191
+
192
+ for sentence in sentences:
193
+ try:
194
+ audio_data = await tts.synthesize(sentence, language)
195
+ if audio_data:
196
+ for i in range(0, len(audio_data), chunk_size):
197
+ if not await _send_bytes(ws, audio_data[i:i + chunk_size]):
198
+ return False
199
+ if not await _send_json(ws, {"type": "segment_end"}):
200
+ return False
201
+ except Exception as e:
202
+ logger.exception("VoiceChat: TTS segment error for: %s", sentence)
203
+ await _send_json(ws, {"type": "error", "message": f"TTS错误: {e}"})
204
+
205
+ await _send_json(ws, {"type": "audio_done"})
206
+ return True
207
+
208
+
209
+ @router.post("/api/voicechat/dump-log")
210
+ async def dump_log(request: Request):
211
+ """Save voicechat status log to data/voicechat_log.txt for diagnostics."""
212
+ body = await request.json()
213
+ log_text = body.get("log", "")
214
+ if not log_text:
215
+ return {"ok": False}
216
+ log_path = cfg.data_dir() / "voicechat_log.txt"
217
+ log_path.parent.mkdir(parents=True, exist_ok=True)
218
+ log_path.write_text(log_text, encoding="utf-8")
219
+ logger.info("VoiceChat: status log saved to %s (%d bytes)", log_path, len(log_text))
220
+ return {"ok": True}
221
+
222
+
223
+ @router.websocket("/ws/voice-chat")
224
+ async def voice_chat(ws: WebSocket):
225
+ """Full-duplex voice chat: browser mic → VAD+ASR → LLM → TTS → browser speaker."""
226
+ await ws.accept()
227
+ logger.info("VoiceChat: WebSocket connected")
228
+
229
+ asr = None
230
+ llm = None
231
+ tts = None
232
+ vad = None
233
+ messages: list[dict[str, Any]] = []
234
+ running = False
235
+ ctx: dict[str, Any] = {"pending_prompt": None, "tools": None, "tool_ctx": None}
236
+
237
+ try:
238
+ while True:
239
+ raw = await ws.receive_text()
240
+ msg = json.loads(raw)
241
+ if msg.get("type") == "start":
242
+ system_prompt = msg.get("system_prompt", "").strip() or DEFAULT_SYSTEM_PROMPT
243
+ break
244
+
245
+ asr = create_asr_provider()
246
+ llm = create_llm_provider()
247
+ tts = create_tts_provider()
248
+ vad = EnergyVAD(
249
+ energy_threshold=float(cfg.get("vad.energy_threshold", 300)),
250
+ silence_ms=int(cfg.get("vad.silence_threshold_ms", 800)),
251
+ min_speech_ms=int(cfg.get("vad.min_speech_ms", 250)),
252
+ )
253
+
254
+ phone_number = msg.get("phone_number", "")
255
+ user_phone = identity.get_user_phone()
256
+ is_owner = identity.is_owner(user_phone, phone_number) if (user_phone and phone_number) else False
257
+
258
+ registry = get_registry()
259
+ active_provider = cfg.get("llm.active_provider", "openai")
260
+
261
+ global_tools_cfg = identity.load_tools_config(cfg.data_dir() / "tools.yaml")
262
+ user_tools_cfg = identity.load_tools_config(
263
+ identity.user_dir(user_phone) / "tools.yaml"
264
+ ) if user_phone else None
265
+ contact_tools_cfg = identity.load_tools_config(
266
+ identity.contact_dir(user_phone, phone_number) / "tools.yaml"
267
+ ) if (user_phone and phone_number) else None
268
+
269
+ enabled_tools = registry.resolve_enabled(
270
+ global_tools_cfg, user_tools_cfg, contact_tools_cfg, None, is_owner,
271
+ )
272
+ provider_tools = registry.get_tools_for_provider(active_provider, enabled_tools) if enabled_tools else None
273
+
274
+ tool_ctx = {
275
+ "task_info": {
276
+ "task_id": f"voicechat-{id(ws)}",
277
+ "phone_number": phone_number,
278
+ "user_phone": user_phone,
279
+ },
280
+ "webhook": None,
281
+ "engine": None,
282
+ "data_dir": cfg.data_dir(),
283
+ "root_dir": cfg.root_dir(),
284
+ "is_owner": is_owner,
285
+ }
286
+
287
+ ctx["tools"] = provider_tools
288
+ ctx["tool_ctx"] = tool_ctx
289
+ ctx["enabled_tools"] = enabled_tools
290
+ ctx["registry"] = registry
291
+ ctx["active_provider"] = active_provider
292
+
293
+ logger.info(
294
+ "VoiceChat: tools resolved — phone=%s, is_owner=%s, enabled=%s",
295
+ phone_number or "(none)", is_owner, enabled_tools,
296
+ )
297
+
298
+ language = cfg.get("asr.language", "zh")
299
+
300
+ tools_summary = registry.build_tools_summary(enabled_tools)
301
+ full_prompt = system_prompt
302
+ if tools_summary:
303
+ full_prompt += "\n\n" + tools_summary
304
+
305
+ messages = [{"role": "system", "content": full_prompt}]
306
+ running = True
307
+
308
+ await _send_json(ws, {"type": "ready"})
309
+ logger.info("VoiceChat: session started")
310
+
311
+ ai_first = msg.get("ai_first", False)
312
+ if ai_first and system_prompt:
313
+ logger.info("VoiceChat: ai_first mode — generating opening")
314
+ await _send_json(ws, {"type": "thinking"})
315
+ try:
316
+ opening_messages = messages + [
317
+ {"role": "user", "content": "请根据系统提示词中的任务目标,主动开始对话。直接说第一句话即可,不要解释。"}
318
+ ]
319
+ result = await llm.generate(opening_messages, tools=provider_tools)
320
+ ai_text = result.get("content", "").strip()
321
+ except Exception as e:
322
+ logger.exception("VoiceChat: AI-first LLM error")
323
+ ai_text = ""
324
+
325
+ if ai_text:
326
+ messages.append({"role": "assistant", "content": ai_text})
327
+ await _send_json(ws, {"type": "ai_text", "text": ai_text})
328
+ await _send_json(ws, {"type": "debug_messages", "messages": opening_messages + [{"role": "assistant", "content": ai_text}]})
329
+ if not await _send_tts_segments(ws, tts, ai_text, language):
330
+ running = False
331
+
332
+ while running:
333
+ await _conversation_turn(ws, asr, llm, tts, vad, messages, language, ctx)
334
+
335
+ except WebSocketDisconnect:
336
+ logger.info("VoiceChat: client disconnected")
337
+ except Exception as e:
338
+ logger.exception("VoiceChat: unexpected error")
339
+ await _send_json(ws, {"type": "error", "message": str(e)})
340
+ finally:
341
+ running = False
342
+ if asr:
343
+ try:
344
+ await asr.stop_stream()
345
+ except Exception:
346
+ pass
347
+ logger.info("VoiceChat: session ended")
348
+
349
+
350
+ async def _conversation_turn(ws, asr, llm, tts, vad, messages, language, ctx):
351
+ """Execute one listen → ASR → LLM → TTS turn."""
352
+
353
+ await _send_json(ws, {"type": "listening"})
354
+ vad.reset()
355
+
356
+ await asr.stop_stream()
357
+ await asr.start_stream(language)
358
+
359
+ speech_started = False
360
+ audio_chunks: list[bytes] = []
361
+ last_interim = ""
362
+ vad_report_counter = 0
363
+
364
+ while True:
365
+ try:
366
+ raw = await ws.receive()
367
+ except RuntimeError:
368
+ raise WebSocketDisconnect()
369
+
370
+ if "text" in raw:
371
+ msg = json.loads(raw["text"])
372
+ if msg.get("type") == "stop":
373
+ raise WebSocketDisconnect()
374
+ if msg.get("type") == "update_system_prompt":
375
+ new_prompt = msg.get("system_prompt", "").strip()
376
+ if new_prompt:
377
+ ctx["pending_prompt"] = new_prompt
378
+ logger.info("VoiceChat: system prompt updated (pending)")
379
+ if msg.get("type") == "update_vad":
380
+ threshold = float(msg.get("energy_threshold", vad._base_threshold))
381
+ silence_ms = int(msg.get("silence_threshold_ms", vad._silence_frames * 20))
382
+ min_speech_ms = int(msg.get("min_speech_ms", vad._speech_frames * 20))
383
+ vad._base_threshold = threshold
384
+ vad._threshold = max(threshold, vad._noise_floor * vad._noise_margin)
385
+ vad._silence_frames = silence_ms // 20
386
+ vad._speech_frames = max(1, min_speech_ms // 20)
387
+ logger.info(
388
+ "VoiceChat: VAD params updated live — base_threshold=%.0f, effective=%.0f, silence=%dms, min_speech=%dms",
389
+ threshold, vad._threshold, silence_ms, min_speech_ms,
390
+ )
391
+ continue
392
+
393
+ if "bytes" not in raw or raw["bytes"] is None:
394
+ continue
395
+
396
+ raw_bytes = raw["bytes"]
397
+ if len(raw_bytes) > 4:
398
+ pre_agc_rms = struct.unpack('<f', raw_bytes[:4])[0]
399
+ chunk = raw_bytes[4:]
400
+ else:
401
+ pre_agc_rms = None
402
+ chunk = raw_bytes
403
+
404
+ event = vad.feed(chunk, external_rms=pre_agc_rms)
405
+
406
+ vad_report_counter += 1
407
+ if vad_report_counter % 4 == 0:
408
+ await _send_json(ws, {
409
+ "type": "vad_info",
410
+ "rms": round(vad.last_rms, 1),
411
+ "threshold": round(vad._threshold, 1),
412
+ "noise_floor": round(vad._noise_floor, 1),
413
+ "in_speech": vad._in_speech,
414
+ "silence_count": vad._silence_count,
415
+ "silence_target": vad._silence_frames,
416
+ })
417
+
418
+ if event == "speech_start" and not speech_started:
419
+ speech_started = True
420
+ audio_chunks.clear()
421
+ last_interim = ""
422
+ await _send_json(ws, {"type": "speech_start"})
423
+
424
+ if speech_started:
425
+ audio_chunks.append(chunk)
426
+ await asr.feed_audio(chunk)
427
+
428
+ interim = await asr.get_interim_result()
429
+ if interim and interim != last_interim:
430
+ last_interim = interim
431
+ logger.debug("VoiceChat: interim → %s", interim)
432
+ await _send_json(ws, {"type": "interim", "text": interim})
433
+
434
+ if event == "speech_end" and speech_started:
435
+ await _send_json(ws, {"type": "speech_end"})
436
+ break
437
+
438
+ user_text = await asr.get_result()
439
+ user_text = user_text.strip()
440
+
441
+ if not user_text:
442
+ logger.info("VoiceChat: empty ASR result, resuming listening")
443
+ return
444
+
445
+ logger.info("VoiceChat: user said: %s", user_text)
446
+ await _send_json(ws, {"type": "user_text", "text": user_text})
447
+ messages.append({"role": "user", "content": user_text})
448
+
449
+ if ctx.get("pending_prompt"):
450
+ messages[0]["content"] = ctx["pending_prompt"]
451
+ ctx["pending_prompt"] = None
452
+ logger.info("VoiceChat: applied updated system prompt")
453
+
454
+ await _send_json(ws, {"type": "thinking"})
455
+
456
+ provider_tools = ctx.get("tools")
457
+ registry = ctx.get("registry")
458
+ tool_ctx_base = ctx.get("tool_ctx", {})
459
+
460
+ try:
461
+ result = await llm.generate(messages, tools=provider_tools)
462
+ except Exception as e:
463
+ logger.exception("VoiceChat: LLM error")
464
+ await _send_json(ws, {"type": "error", "message": f"LLM错误: {e}"})
465
+ return
466
+
467
+ assistant_msg: dict[str, Any] = {"role": "assistant"}
468
+ if result.get("content"):
469
+ assistant_msg["content"] = result["content"]
470
+ if result.get("tool_calls"):
471
+ assistant_msg["tool_calls"] = result.get("raw_tool_calls") or result["tool_calls"]
472
+ messages.append(assistant_msg)
473
+
474
+ while result.get("tool_calls") and registry:
475
+ tool_calls = result["tool_calls"]
476
+
477
+ for tc in tool_calls:
478
+ tc_name = tc.get("name", "")
479
+ tc_args = tc.get("arguments", {})
480
+ tc_id = tc.get("id", "")
481
+
482
+ await _send_json(ws, {
483
+ "type": "tool_call",
484
+ "id": tc_id,
485
+ "name": tc_name,
486
+ "arguments": tc_args,
487
+ })
488
+ logger.info("VoiceChat: tool call: %s(%s)", tc_name, tc_args)
489
+
490
+ handler = registry.get_handler(tc_name)
491
+ if handler:
492
+ perm_files = registry.get_permission_files(
493
+ tc_name,
494
+ tool_ctx_base.get("task_info", {}).get("user_phone", ""),
495
+ tool_ctx_base.get("task_info", {}).get("phone_number", ""),
496
+ )
497
+ handler_ctx = {
498
+ **tool_ctx_base,
499
+ "permission_files": perm_files,
500
+ }
501
+ try:
502
+ tool_result = await handler(tc_args, handler_ctx)
503
+ except Exception as exc:
504
+ logger.exception("VoiceChat: tool execution failed: %s", tc_name)
505
+ tool_result = f"Error: {exc}"
506
+ else:
507
+ tool_result = f"Unknown tool: {tc_name}"
508
+
509
+ await _send_json(ws, {
510
+ "type": "tool_result",
511
+ "id": tc_id,
512
+ "name": tc_name,
513
+ "result": tool_result[:500],
514
+ })
515
+ logger.info("VoiceChat: tool result: %s → %s", tc_name, tool_result[:100])
516
+
517
+ messages.append({
518
+ "role": "tool",
519
+ "tool_call_id": tc_id,
520
+ "name": tc_name,
521
+ "content": tool_result,
522
+ })
523
+
524
+ try:
525
+ result = await llm.generate(messages, tools=provider_tools)
526
+ except Exception as e:
527
+ logger.exception("VoiceChat: LLM error after tool call")
528
+ await _send_json(ws, {"type": "error", "message": f"LLM错误: {e}"})
529
+ return
530
+
531
+ assistant_msg = {"role": "assistant"}
532
+ if result.get("content"):
533
+ assistant_msg["content"] = result["content"]
534
+ if result.get("tool_calls"):
535
+ assistant_msg["tool_calls"] = result.get("raw_tool_calls") or result["tool_calls"]
536
+ messages.append(assistant_msg)
537
+
538
+ ai_text = (result.get("content") or "").strip()
539
+
540
+ if not ai_text:
541
+ ai_text = "抱歉,我没有理解你的意思,能再说一遍吗?"
542
+
543
+ logger.info("VoiceChat: AI reply: %s", ai_text)
544
+ await _send_json(ws, {"type": "ai_text", "text": ai_text})
545
+
546
+ await _send_json(ws, {"type": "debug_messages", "messages": [m for m in messages]})
547
+
548
+ await _send_tts_segments(ws, tts, ai_text, language)
549
+
550
+ try:
551
+ await asr.stop_stream()
552
+ await asr.start_stream(language)
553
+ except Exception:
554
+ logger.debug("VoiceChat: ASR re-init failed", exc_info=True)