kuavo-humanoid-sdk 1.2.1b3319__20250917133850-py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kuavo-humanoid-sdk might be problematic. Click here for more details.
- kuavo_humanoid_sdk/__init__.py +6 -0
- kuavo_humanoid_sdk/common/logger.py +45 -0
- kuavo_humanoid_sdk/interfaces/__init__.py +4 -0
- kuavo_humanoid_sdk/interfaces/data_types.py +288 -0
- kuavo_humanoid_sdk/interfaces/end_effector.py +62 -0
- kuavo_humanoid_sdk/interfaces/robot.py +22 -0
- kuavo_humanoid_sdk/interfaces/robot_info.py +56 -0
- kuavo_humanoid_sdk/kuavo/__init__.py +16 -0
- kuavo_humanoid_sdk/kuavo/core/audio.py +32 -0
- kuavo_humanoid_sdk/kuavo/core/core.py +666 -0
- kuavo_humanoid_sdk/kuavo/core/dex_hand_control.py +114 -0
- kuavo_humanoid_sdk/kuavo/core/leju_claw_control.py +67 -0
- kuavo_humanoid_sdk/kuavo/core/llm_doubao.py +608 -0
- kuavo_humanoid_sdk/kuavo/core/microphone.py +192 -0
- kuavo_humanoid_sdk/kuavo/core/navigation.py +70 -0
- kuavo_humanoid_sdk/kuavo/core/ros/audio.py +110 -0
- kuavo_humanoid_sdk/kuavo/core/ros/camera.py +105 -0
- kuavo_humanoid_sdk/kuavo/core/ros/control.py +1524 -0
- kuavo_humanoid_sdk/kuavo/core/ros/microphone.py +38 -0
- kuavo_humanoid_sdk/kuavo/core/ros/navigation.py +217 -0
- kuavo_humanoid_sdk/kuavo/core/ros/observation.py +94 -0
- kuavo_humanoid_sdk/kuavo/core/ros/param.py +201 -0
- kuavo_humanoid_sdk/kuavo/core/ros/sat_utils.py +103 -0
- kuavo_humanoid_sdk/kuavo/core/ros/state.py +652 -0
- kuavo_humanoid_sdk/kuavo/core/ros/tools.py +220 -0
- kuavo_humanoid_sdk/kuavo/core/ros/vision.py +234 -0
- kuavo_humanoid_sdk/kuavo/core/ros_env.py +238 -0
- kuavo_humanoid_sdk/kuavo/core/sdk_deprecated.py +41 -0
- kuavo_humanoid_sdk/kuavo/demo_climbstair.py +249 -0
- kuavo_humanoid_sdk/kuavo/dexterous_hand.py +238 -0
- kuavo_humanoid_sdk/kuavo/leju_claw.py +235 -0
- kuavo_humanoid_sdk/kuavo/logger_client.py +80 -0
- kuavo_humanoid_sdk/kuavo/robot.py +561 -0
- kuavo_humanoid_sdk/kuavo/robot_arm.py +411 -0
- kuavo_humanoid_sdk/kuavo/robot_audio.py +39 -0
- kuavo_humanoid_sdk/kuavo/robot_blockly.py +1154 -0
- kuavo_humanoid_sdk/kuavo/robot_climbstair.py +1607 -0
- kuavo_humanoid_sdk/kuavo/robot_head.py +95 -0
- kuavo_humanoid_sdk/kuavo/robot_info.py +134 -0
- kuavo_humanoid_sdk/kuavo/robot_microphone.py +19 -0
- kuavo_humanoid_sdk/kuavo/robot_navigation.py +135 -0
- kuavo_humanoid_sdk/kuavo/robot_observation.py +64 -0
- kuavo_humanoid_sdk/kuavo/robot_speech.py +24 -0
- kuavo_humanoid_sdk/kuavo/robot_state.py +310 -0
- kuavo_humanoid_sdk/kuavo/robot_tool.py +109 -0
- kuavo_humanoid_sdk/kuavo/robot_vision.py +81 -0
- kuavo_humanoid_sdk/kuavo_strategy/__init__.py +2 -0
- kuavo_humanoid_sdk/kuavo_strategy/grasp_box/grasp_box_strategy.py +1325 -0
- kuavo_humanoid_sdk/kuavo_strategy/kuavo_strategy.py +106 -0
- kuavo_humanoid_sdk/kuavo_strategy_v2/common/data_type.py +340 -0
- kuavo_humanoid_sdk/kuavo_strategy_v2/common/events/base_event.py +215 -0
- kuavo_humanoid_sdk/kuavo_strategy_v2/common/robot_sdk.py +25 -0
- kuavo_humanoid_sdk/kuavo_strategy_v2/pick_place_box/case.py +331 -0
- kuavo_humanoid_sdk/kuavo_strategy_v2/pick_place_box/strategy.py +504 -0
- kuavo_humanoid_sdk/kuavo_strategy_v2/utils/logger_setup.py +40 -0
- kuavo_humanoid_sdk/kuavo_strategy_v2/utils/utils.py +88 -0
- kuavo_humanoid_sdk/msg/__init__.py +4 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/__init__.py +7 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_AprilTagDetection.py +306 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_AprilTagDetectionArray.py +437 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_AudioReceiverData.py +122 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_FTsensorData.py +260 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_JoySticks.py +191 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_Metadata.py +199 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_MmDetectionMsg.py +264 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_RobotActionState.py +112 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_TFArray.py +323 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_TaskPoint.py +175 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/__init__.py +62 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armCollisionCheckInfo.py +160 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armHandPose.py +161 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armHandPoseFree.py +171 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armPoseWithTimeStamp.py +168 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armTargetPoses.py +171 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_bezierCurveCubicPoint.py +178 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_dexhandCommand.py +229 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_dexhandTouchState.py +256 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_endEffectorData.py +227 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPose.py +123 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPose6D.py +123 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPose6DTargetTrajectories.py +320 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoseTargetTrajectories.py +301 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoseWithVision.py +136 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoseWithVisionArray.py +231 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoses.py +149 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoses6D.py +149 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_fullBodyTargetTrajectories.py +258 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_gaitTimeName.py +147 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_gestureInfo.py +218 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_gestureTask.py +149 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_handPose.py +136 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_headBodyPose.py +145 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_ikSolveError.py +171 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_ikSolveParam.py +140 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_imuData.py +165 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_jointBezierTrajectory.py +201 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_jointCmd.py +390 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_jointData.py +205 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_kuavoModeSchedule.py +224 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_lejuClawCommand.py +320 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_lejuClawState.py +341 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_motorParam.py +122 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_picoPoseInfo.py +143 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_picoPoseInfoList.py +220 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_planArmState.py +120 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_qv.py +121 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotArmQVVD.py +177 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotBodyMatrices.py +332 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotHandPosition.py +225 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotHeadMotionData.py +128 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotState.py +222 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_sensorsData.py +655 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_switchGaitByName.py +200 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_tagDataArray.py +216 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_touchSensorStatus.py +162 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPose.py +273 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPoseCmd.py +316 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPoseCmdFree.py +338 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPoseFree.py +299 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_yoloDetection.py +251 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_yoloOutputData.py +168 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_CreatePath.py +581 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_ExecuteArmAction.py +281 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_GetAllMaps.py +241 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_GetCurrentMap.py +225 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_GetTargetPartPoseInCamera.py +298 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_InitialPoseWithTaskPoint.py +281 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_LoadMap.py +281 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_NavigateToTaskPoint.py +281 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_RepublishTFs.py +373 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetInitialPose.py +394 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetJoyTopic.py +282 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetLEDMode.py +468 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetLEDMode_free.py +289 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SpeechSynthesis.py +270 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_TaskPointOperation.py +536 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/__init__.py +43 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_adjustZeroPoint.py +277 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeArmCtrlMode.py +275 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeArmCtrlModeKuavo.py +236 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeMotorParam.py +299 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeTorsoCtrlMode.py +274 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_controlLejuClaw.py +408 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_enableHandTouchSensor.py +304 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_fkSrv.py +395 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_footPose6DTargetTrajectoriesSrv.py +426 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_footPoseTargetTrajectoriesSrv.py +409 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_gestureExecute.py +339 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_gestureExecuteState.py +257 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_gestureList.py +418 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_getCurrentGaitName.py +253 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_getMotorParam.py +299 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_getMotorZeroPoints.py +286 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_handForceLevel.py +330 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_jointMoveTo.py +302 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_planArmTrajectoryBezierCurve.py +422 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_planArmTrajectoryCubicSpline.py +490 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_playmusic.py +268 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setHwIntialState.py +304 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setMmCtrlFrame.py +273 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setMotorEncoderRoundService.py +283 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setTagId.py +275 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_singleStepControl.py +444 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_twoArmHandPoseCmdFreeSrv.py +716 -0
- kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_twoArmHandPoseCmdSrv.py +664 -0
- kuavo_humanoid_sdk/msg/motion_capture_ik/__init__.py +7 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/__init__.py +7 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/__init__.py +12 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_constraint.py +142 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_controller_data.py +121 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_lagrangian_metrics.py +148 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mode_schedule.py +150 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_flattened_controller.py +666 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_input.py +122 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_observation.py +209 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_performance_indices.py +140 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_solver_data.py +886 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_state.py +122 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_target_trajectories.py +239 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_multiplier.py +148 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/srv/__init__.py +1 -0
- kuavo_humanoid_sdk/msg/ocs2_msgs/srv/_reset.py +376 -0
- kuavo_humanoid_sdk-1.2.1b3319.dist-info/METADATA +297 -0
- kuavo_humanoid_sdk-1.2.1b3319.dist-info/RECORD +186 -0
- kuavo_humanoid_sdk-1.2.1b3319.dist-info/WHEEL +6 -0
- kuavo_humanoid_sdk-1.2.1b3319.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import rospy
|
|
3
|
+
from kuavo_humanoid_sdk.common.logger import SDKLogger
|
|
4
|
+
from kuavo_humanoid_sdk.kuavo.core.ros.microphone import Microphone
|
|
5
|
+
from kuavo_humanoid_sdk.kuavo.core.ros.audio import Audio
|
|
6
|
+
from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib import *
|
|
7
|
+
from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib.audio_manager import DialogSession
|
|
8
|
+
from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib.realtime_dialog_client import RealtimeDialogClient
|
|
9
|
+
from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib import config
|
|
10
|
+
import os
|
|
11
|
+
import asyncio
|
|
12
|
+
import threading
|
|
13
|
+
import queue
|
|
14
|
+
import time
|
|
15
|
+
import struct
|
|
16
|
+
import uuid
|
|
17
|
+
from typing import Optional, Dict, Any
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ROSDialogSession(DialogSession):
|
|
21
|
+
"""Custom DialogSession that integrates with ROS audio system"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, ws_config: Dict[str, Any], audio_interface: Audio, enable_signal_handler: bool = False):
|
|
24
|
+
# Initialize session ID and client without calling parent __init__
|
|
25
|
+
self.session_id = str(uuid.uuid4())
|
|
26
|
+
self.client = RealtimeDialogClient(config=ws_config, session_id=self.session_id)
|
|
27
|
+
|
|
28
|
+
# Store reference to audio interface for ROS audio publishing
|
|
29
|
+
self.audio_interface = audio_interface
|
|
30
|
+
|
|
31
|
+
# Initialize session state variables
|
|
32
|
+
self.is_running = True
|
|
33
|
+
self.is_session_finished = False
|
|
34
|
+
self.is_user_querying = False
|
|
35
|
+
self.is_sending_chat_tts_text = False
|
|
36
|
+
self.audio_buffer = b''
|
|
37
|
+
|
|
38
|
+
# Audio chunk buffer for handling large audio chunks
|
|
39
|
+
self.audio_chunk_buffer = []
|
|
40
|
+
self.buffer_lock = threading.Lock()
|
|
41
|
+
|
|
42
|
+
# Skip PyAudio initialization completely
|
|
43
|
+
self.audio_device = None
|
|
44
|
+
self.audio_queue = None
|
|
45
|
+
self.output_stream = None
|
|
46
|
+
self.input_stream = None
|
|
47
|
+
self.player_thread = None
|
|
48
|
+
self.is_recording = True
|
|
49
|
+
self.is_playing = False # We don't use PyAudio playing
|
|
50
|
+
|
|
51
|
+
# Start audio chunk processing thread
|
|
52
|
+
self.chunk_processor_thread = threading.Thread(target=self._process_audio_chunks)
|
|
53
|
+
self.chunk_processor_thread.daemon = True
|
|
54
|
+
self.chunk_processor_thread.start()
|
|
55
|
+
|
|
56
|
+
# Only set signal handler if requested and in main thread
|
|
57
|
+
if enable_signal_handler:
|
|
58
|
+
try:
|
|
59
|
+
import signal
|
|
60
|
+
signal.signal(signal.SIGINT, self._keyboard_signal)
|
|
61
|
+
except ValueError as e:
|
|
62
|
+
SDKLogger.warning(f"Warning: Cannot set signal handler (not in main thread): {e}")
|
|
63
|
+
|
|
64
|
+
def cleanup(self):
|
|
65
|
+
"""Clean up resources (no PyAudio to clean up)"""
|
|
66
|
+
self.is_running = False
|
|
67
|
+
self.is_recording = False
|
|
68
|
+
self.is_playing = False
|
|
69
|
+
|
|
70
|
+
# Clear audio buffer
|
|
71
|
+
with self.buffer_lock:
|
|
72
|
+
self.audio_chunk_buffer.clear()
|
|
73
|
+
|
|
74
|
+
# Wait for chunk processor thread to finish
|
|
75
|
+
if hasattr(self, 'chunk_processor_thread') and self.chunk_processor_thread.is_alive():
|
|
76
|
+
self.chunk_processor_thread.join(timeout=2)
|
|
77
|
+
|
|
78
|
+
# No audio device cleanup needed since we're using ROS
|
|
79
|
+
|
|
80
|
+
def _keyboard_signal(self, sig, frame):
|
|
81
|
+
"""Handle keyboard interrupt signal"""
|
|
82
|
+
SDKLogger.info(f"receive keyboard Ctrl+C")
|
|
83
|
+
self.is_recording = False
|
|
84
|
+
self.is_playing = False
|
|
85
|
+
self.is_running = False
|
|
86
|
+
|
|
87
|
+
def _process_audio_chunks(self):
|
|
88
|
+
"""Process buffered audio chunks in a separate thread"""
|
|
89
|
+
while self.is_running:
|
|
90
|
+
try:
|
|
91
|
+
with self.buffer_lock:
|
|
92
|
+
if self.audio_chunk_buffer:
|
|
93
|
+
chunk = self.audio_chunk_buffer.pop(0)
|
|
94
|
+
else:
|
|
95
|
+
chunk = None
|
|
96
|
+
|
|
97
|
+
if chunk:
|
|
98
|
+
self._publish_audio_chunk_to_ros(chunk)
|
|
99
|
+
# Small delay to prevent overwhelming the ROS system
|
|
100
|
+
time.sleep(0.01)
|
|
101
|
+
else:
|
|
102
|
+
# No chunks to process, wait a bit
|
|
103
|
+
time.sleep(0.05)
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
SDKLogger.error(f"[Speech] Error processing audio chunks: {e}")
|
|
107
|
+
time.sleep(0.1)
|
|
108
|
+
|
|
109
|
+
def _add_audio_chunk_to_buffer(self, audio_chunk):
|
|
110
|
+
"""Add audio chunk to buffer for processing"""
|
|
111
|
+
with self.buffer_lock:
|
|
112
|
+
self.audio_chunk_buffer.append(audio_chunk)
|
|
113
|
+
# Limit buffer size to prevent memory issues
|
|
114
|
+
if len(self.audio_chunk_buffer) > 50:
|
|
115
|
+
SDKLogger.warn(f"[Speech] Audio buffer full, dropping oldest chunk")
|
|
116
|
+
self.audio_chunk_buffer.pop(0)
|
|
117
|
+
|
|
118
|
+
def _convert_audio_bytes_to_int_list(self, audio_bytes: bytes):
|
|
119
|
+
"""Convert audio bytes (PCM) to list of integers for ROS audio playback"""
|
|
120
|
+
try:
|
|
121
|
+
# SDKLogger.debug(f"[Speech] Converting audio bytes: length={len(audio_bytes)}")
|
|
122
|
+
|
|
123
|
+
if len(audio_bytes) < 4: # Float32 needs at least 4 bytes
|
|
124
|
+
SDKLogger.warn(f"[Speech] Audio data too short: {len(audio_bytes)} bytes")
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
# Try to detect audio format and convert accordingly
|
|
128
|
+
audio_ints = self._convert_audio_with_format_detection(audio_bytes)
|
|
129
|
+
|
|
130
|
+
if audio_ints:
|
|
131
|
+
# Resample from 24kHz to 16kHz for ROS compatibility
|
|
132
|
+
audio_ints = self._resample_audio(audio_ints, 24000, 16000)
|
|
133
|
+
|
|
134
|
+
# Split large audio chunks into smaller ones for better ROS compatibility
|
|
135
|
+
chunk_size = 8192
|
|
136
|
+
if len(audio_ints) > chunk_size:
|
|
137
|
+
SDKLogger.debug(f"[Speech] Splitting large audio chunk ({len(audio_ints)} samples) into {len(audio_ints) // chunk_size + 1} smaller chunks")
|
|
138
|
+
# Split into multiple chunks and add to buffer
|
|
139
|
+
for i in range(0, len(audio_ints), chunk_size):
|
|
140
|
+
chunk = audio_ints[i:i + chunk_size]
|
|
141
|
+
if len(chunk) > 0:
|
|
142
|
+
self._add_audio_chunk_to_buffer(chunk)
|
|
143
|
+
return [] # Return empty since we've buffered the chunks
|
|
144
|
+
else:
|
|
145
|
+
return audio_ints
|
|
146
|
+
else:
|
|
147
|
+
SDKLogger.warn(f"[Speech] No audio samples extracted from {len(audio_bytes)} bytes")
|
|
148
|
+
return []
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
SDKLogger.error(f"[Speech] Error converting audio bytes to int list: {e}")
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
def _convert_audio_with_format_detection(self, audio_bytes: bytes):
|
|
155
|
+
"""Convert audio bytes with automatic format detection"""
|
|
156
|
+
import struct
|
|
157
|
+
import numpy as np
|
|
158
|
+
|
|
159
|
+
# Try Float32 format first (24kHz server format)
|
|
160
|
+
try:
|
|
161
|
+
if len(audio_bytes) % 4 == 0: # Float32 should be divisible by 4
|
|
162
|
+
float_samples = []
|
|
163
|
+
for i in range(0, len(audio_bytes), 4):
|
|
164
|
+
if i + 3 < len(audio_bytes):
|
|
165
|
+
# Convert 4 bytes to float32 (little-endian)
|
|
166
|
+
sample_float = struct.unpack('<f', audio_bytes[i:i+4])[0]
|
|
167
|
+
float_samples.append(sample_float)
|
|
168
|
+
|
|
169
|
+
if float_samples:
|
|
170
|
+
# Analyze float32 data range
|
|
171
|
+
min_float = min(float_samples)
|
|
172
|
+
max_float = max(float_samples)
|
|
173
|
+
abs_max = max(abs(min_float), abs(max_float))
|
|
174
|
+
|
|
175
|
+
# SDKLogger.debug(f"[Speech] Float32 analysis: count={len(float_samples)}, min={min_float:.6f}, max={max_float:.6f}, abs_max={abs_max:.6f}")
|
|
176
|
+
|
|
177
|
+
# Auto-detect gain based on actual float range
|
|
178
|
+
if abs_max > 0.001: # Avoid division by zero
|
|
179
|
+
# Calculate gain to use full int16 range
|
|
180
|
+
# Leave some headroom (use 0.9 instead of 1.0)
|
|
181
|
+
target_range = 32767 * 0.9
|
|
182
|
+
auto_gain = target_range / abs_max
|
|
183
|
+
|
|
184
|
+
# SDKLogger.debug(f"[Speech] Auto-detected gain: {auto_gain:.2f}")
|
|
185
|
+
|
|
186
|
+
# Apply gain and convert to int16
|
|
187
|
+
samples = []
|
|
188
|
+
for sample_float in float_samples:
|
|
189
|
+
sample_int = int(sample_float * auto_gain)
|
|
190
|
+
sample_int = max(-32768, min(32767, sample_int)) # Clamp
|
|
191
|
+
samples.append(sample_int)
|
|
192
|
+
|
|
193
|
+
# Check final result
|
|
194
|
+
min_val = min(samples)
|
|
195
|
+
max_val = max(samples)
|
|
196
|
+
variation = max_val - min_val
|
|
197
|
+
|
|
198
|
+
# SDKLogger.debug(f"[Speech] Float32 conversion result: count={len(samples)}, variation={variation}")
|
|
199
|
+
|
|
200
|
+
if variation > 100: # Good variation suggests valid conversion
|
|
201
|
+
# SDKLogger.debug(f"[Speech] Using Float32 format (24kHz) with auto-gain {auto_gain:.2f}")
|
|
202
|
+
return samples
|
|
203
|
+
else:
|
|
204
|
+
SDKLogger.warn(f"[Speech] Float32 data range too small (abs_max={abs_max:.6f})")
|
|
205
|
+
except Exception as e:
|
|
206
|
+
SDKLogger.error(f"[Speech] Float32 conversion failed: {e}")
|
|
207
|
+
|
|
208
|
+
# Try Int16 format (fallback)
|
|
209
|
+
try:
|
|
210
|
+
if len(audio_bytes) % 2 == 0: # Int16 should be divisible by 2
|
|
211
|
+
samples = []
|
|
212
|
+
for i in range(0, len(audio_bytes), 2):
|
|
213
|
+
if i + 1 < len(audio_bytes):
|
|
214
|
+
# Convert 2 bytes to signed 16-bit integer (little-endian)
|
|
215
|
+
sample = struct.unpack('<h', audio_bytes[i:i+2])[0]
|
|
216
|
+
samples.append(sample)
|
|
217
|
+
|
|
218
|
+
if samples:
|
|
219
|
+
min_val = min(samples)
|
|
220
|
+
max_val = max(samples)
|
|
221
|
+
variation = max_val - min_val
|
|
222
|
+
|
|
223
|
+
SDKLogger.debug(f"[Speech] Int16 conversion: count={len(samples)}, variation={variation}")
|
|
224
|
+
SDKLogger.debug(f"[Speech] Using Int16 format")
|
|
225
|
+
return samples
|
|
226
|
+
except Exception as e:
|
|
227
|
+
SDKLogger.error(f"[Speech] Int16 conversion failed: {e}")
|
|
228
|
+
|
|
229
|
+
SDKLogger.error(f"[Speech] Failed to convert audio data with any format")
|
|
230
|
+
return []
|
|
231
|
+
|
|
232
|
+
def _resample_audio(self, audio_samples, from_rate, to_rate):
|
|
233
|
+
"""Resample audio from one sample rate to another"""
|
|
234
|
+
if from_rate == to_rate:
|
|
235
|
+
return audio_samples
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
import numpy as np
|
|
239
|
+
from scipy import signal
|
|
240
|
+
|
|
241
|
+
# Convert to numpy array
|
|
242
|
+
audio_array = np.array(audio_samples, dtype=np.float32)
|
|
243
|
+
|
|
244
|
+
# Calculate resampling ratio
|
|
245
|
+
resample_ratio = to_rate / from_rate
|
|
246
|
+
|
|
247
|
+
# Resample using scipy
|
|
248
|
+
resampled_length = int(len(audio_array) * resample_ratio)
|
|
249
|
+
resampled_audio = signal.resample(audio_array, resampled_length)
|
|
250
|
+
|
|
251
|
+
# Convert back to int16 and clamp
|
|
252
|
+
resampled_int = np.clip(resampled_audio, -32768, 32767).astype(np.int16)
|
|
253
|
+
|
|
254
|
+
# SDKLogger.debug(f"[Speech] Resampled audio from {from_rate}Hz to {to_rate}Hz: {len(audio_samples)} -> {len(resampled_int)} samples")
|
|
255
|
+
|
|
256
|
+
return resampled_int.tolist()
|
|
257
|
+
|
|
258
|
+
except ImportError:
|
|
259
|
+
SDKLogger.warn(f"[Speech] scipy not available, using simple decimation for resampling")
|
|
260
|
+
# Simple decimation fallback
|
|
261
|
+
if from_rate > to_rate:
|
|
262
|
+
step = int(from_rate // to_rate)
|
|
263
|
+
return audio_samples[::step]
|
|
264
|
+
else:
|
|
265
|
+
return audio_samples
|
|
266
|
+
except Exception as e:
|
|
267
|
+
SDKLogger.error(f"[Speech] Error resampling audio: {e}")
|
|
268
|
+
return audio_samples
|
|
269
|
+
|
|
270
|
+
def _publish_audio_chunk_to_ros(self, audio_int_list, gain: int = 1):
|
|
271
|
+
"""Publish single audio chunk directly to ROS topic using Audio interface"""
|
|
272
|
+
try:
|
|
273
|
+
if not audio_int_list:
|
|
274
|
+
return
|
|
275
|
+
|
|
276
|
+
# Use the new publish_audio_chunk method from Audio class
|
|
277
|
+
success = self.audio_interface.publish_audio_chunk(audio_int_list, gain=gain)
|
|
278
|
+
|
|
279
|
+
if not success:
|
|
280
|
+
SDKLogger.warn(f"[Speech] Failed to publish audio chunk with {len(audio_int_list)} samples")
|
|
281
|
+
|
|
282
|
+
except Exception as e:
|
|
283
|
+
SDKLogger.error(f"[Speech] Error publishing audio to ROS: {e}")
|
|
284
|
+
|
|
285
|
+
def handle_server_response(self, response: Dict[str, Any]) -> None:
|
|
286
|
+
"""Override to handle audio playback through ROS instead of PyAudio"""
|
|
287
|
+
if response == {}:
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
# Handle audio data from server
|
|
291
|
+
if response['message_type'] == 'SERVER_ACK' and isinstance(response.get('payload_msg'), bytes):
|
|
292
|
+
if self.is_sending_chat_tts_text:
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
audio_data = response['payload_msg']
|
|
296
|
+
self.audio_buffer += audio_data
|
|
297
|
+
|
|
298
|
+
# SDKLogger.debug(f"[Speech] Received audio chunk: {len(audio_data)} bytes")
|
|
299
|
+
|
|
300
|
+
# Play audio through ROS audio system instead of PyAudio
|
|
301
|
+
try:
|
|
302
|
+
audio_int_list = self._convert_audio_bytes_to_int_list(audio_data)
|
|
303
|
+
if audio_int_list:
|
|
304
|
+
# For smaller chunks, publish immediately
|
|
305
|
+
self._publish_audio_chunk_to_ros(audio_int_list)
|
|
306
|
+
# For larger chunks, they are automatically buffered in _convert_audio_bytes_to_int_list
|
|
307
|
+
except Exception as e:
|
|
308
|
+
SDKLogger.error(f"[Speech] Error playing server audio through ROS: {e}")
|
|
309
|
+
|
|
310
|
+
elif response['message_type'] == 'SERVER_FULL_RESPONSE':
|
|
311
|
+
# SDKLogger.info(f"服务器响应: {response}")
|
|
312
|
+
event = response.get('event')
|
|
313
|
+
payload_msg = response.get('payload_msg', {})
|
|
314
|
+
|
|
315
|
+
# Log ASR results (user speech recognition)
|
|
316
|
+
if event == 451:
|
|
317
|
+
# Extract user speech text from ASR results
|
|
318
|
+
results = payload_msg.get('results', [])
|
|
319
|
+
if results and len(results) > 0:
|
|
320
|
+
result = results[0]
|
|
321
|
+
text = result.get('text', '')
|
|
322
|
+
is_interim = result.get('is_interim', True)
|
|
323
|
+
|
|
324
|
+
# Only log final results (not interim)
|
|
325
|
+
if not is_interim and text:
|
|
326
|
+
SDKLogger.info(f"[Speech] 用户说话: {text}")
|
|
327
|
+
|
|
328
|
+
# Log TTS streaming text (AI response)
|
|
329
|
+
elif event == 550:
|
|
330
|
+
content = payload_msg.get('content', '')
|
|
331
|
+
if content:
|
|
332
|
+
# Use info level for visible logging, accumulate content for complete response
|
|
333
|
+
if not hasattr(self, '_current_ai_response'):
|
|
334
|
+
self._current_ai_response = ""
|
|
335
|
+
self._current_ai_response += content
|
|
336
|
+
# SDKLogger.info(f"[Speech] AI回复: {content}")
|
|
337
|
+
|
|
338
|
+
if event == 450:
|
|
339
|
+
SDKLogger.info(f"清空缓存音频: {response['session_id']}")
|
|
340
|
+
# Clear the audio buffer
|
|
341
|
+
with self.buffer_lock:
|
|
342
|
+
self.audio_chunk_buffer.clear()
|
|
343
|
+
self.is_user_querying = True
|
|
344
|
+
|
|
345
|
+
if event == 350 and self.is_sending_chat_tts_text and payload_msg.get("tts_type") == "chat_tts_text":
|
|
346
|
+
# Clear the audio buffer
|
|
347
|
+
with self.buffer_lock:
|
|
348
|
+
self.audio_chunk_buffer.clear()
|
|
349
|
+
self.is_sending_chat_tts_text = False
|
|
350
|
+
|
|
351
|
+
if event == 459:
|
|
352
|
+
self.is_user_querying = False
|
|
353
|
+
|
|
354
|
+
# Log complete AI response when TTS ends
|
|
355
|
+
if event == 351:
|
|
356
|
+
# TTS synthesis completed
|
|
357
|
+
if hasattr(self, '_current_ai_response') and self._current_ai_response:
|
|
358
|
+
SDKLogger.info(f"[Speech] AI完整回复: {self._current_ai_response}")
|
|
359
|
+
self._current_ai_response = "" # Reset for next response
|
|
360
|
+
|
|
361
|
+
elif response['message_type'] == 'SERVER_ERROR':
|
|
362
|
+
SDKLogger.error(f"服务器错误: {response['payload_msg']}")
|
|
363
|
+
raise Exception("服务器错误")
|
|
364
|
+
|
|
365
|
+
async def receive_loop(self):
|
|
366
|
+
"""接收服务器响应的循环"""
|
|
367
|
+
try:
|
|
368
|
+
while True:
|
|
369
|
+
response = await self.client.receive_server_response()
|
|
370
|
+
self.handle_server_response(response)
|
|
371
|
+
if 'event' in response and (response['event'] == 152 or response['event'] == 153):
|
|
372
|
+
SDKLogger.info(f"receive session finished event: {response['event']}")
|
|
373
|
+
self.is_session_finished = True
|
|
374
|
+
break
|
|
375
|
+
except asyncio.CancelledError:
|
|
376
|
+
SDKLogger.info("接收任务已取消")
|
|
377
|
+
except Exception as e:
|
|
378
|
+
SDKLogger.error(f"接收消息错误: {e}")
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class RobotLLMDoubaoCore:
|
|
382
|
+
|
|
383
|
+
def __init__(self, subscribe_topic: str = "/micphone_data"):
|
|
384
|
+
# Microphone interface
|
|
385
|
+
self.microphone = Microphone(subscribe_topic)
|
|
386
|
+
|
|
387
|
+
# ROS Audio interface for direct topic publishing
|
|
388
|
+
self.ros_audio = Audio()
|
|
389
|
+
|
|
390
|
+
# Audio parameters
|
|
391
|
+
self.SAMPLE_RATE = 16000
|
|
392
|
+
self.CHANNELS = 1
|
|
393
|
+
self.BIT_RESOLUTION = 16
|
|
394
|
+
self.BYTES_PER_SAMPLE = self.BIT_RESOLUTION // 8
|
|
395
|
+
|
|
396
|
+
# Dialog session management
|
|
397
|
+
self.dialog_session: Optional[ROSDialogSession] = None
|
|
398
|
+
self.is_running = False
|
|
399
|
+
self.event_loop = None
|
|
400
|
+
self.session_thread = None
|
|
401
|
+
self.ws_config = None
|
|
402
|
+
|
|
403
|
+
# Audio queue for ROS microphone data
|
|
404
|
+
self.audio_queue = queue.Queue()
|
|
405
|
+
|
|
406
|
+
SDKLogger.info("[Speech] RobotLLMDoubaoCore initialized")
|
|
407
|
+
|
|
408
|
+
def _setup_websocket_config(self, app_id: str, access_key: str):
|
|
409
|
+
"""Setup WebSocket configuration with provided credentials"""
|
|
410
|
+
self.ws_config = {
|
|
411
|
+
"base_url": "wss://openspeech.bytedance.com/api/v3/realtime/dialogue",
|
|
412
|
+
"headers": {
|
|
413
|
+
"X-Api-App-ID": app_id,
|
|
414
|
+
"X-Api-Access-Key": access_key,
|
|
415
|
+
"X-Api-Resource-Id": "volc.speech.dialog",
|
|
416
|
+
"X-Api-App-Key": "PlgvMymc7f3tQnJ6",
|
|
417
|
+
"X-Api-Connect-Id": config.ws_connect_config["headers"]["X-Api-Connect-Id"],
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
def _run_async_session(self):
|
|
422
|
+
"""Run dialog session in separate thread with its own event loop"""
|
|
423
|
+
self.event_loop = asyncio.new_event_loop()
|
|
424
|
+
asyncio.set_event_loop(self.event_loop)
|
|
425
|
+
|
|
426
|
+
try:
|
|
427
|
+
self.event_loop.run_until_complete(self._async_session_main())
|
|
428
|
+
except Exception as e:
|
|
429
|
+
SDKLogger.error(f"[Speech] Dialog session error: {e}")
|
|
430
|
+
finally:
|
|
431
|
+
self.event_loop.close()
|
|
432
|
+
|
|
433
|
+
async def _async_session_main(self):
|
|
434
|
+
"""Main async session handler"""
|
|
435
|
+
try:
|
|
436
|
+
# Establish WebSocket connection first (reconnect after test)
|
|
437
|
+
connection_success = await self.dialog_session.client.start_connection()
|
|
438
|
+
if not connection_success:
|
|
439
|
+
SDKLogger.error("[Speech] Failed to establish WebSocket connection in session")
|
|
440
|
+
return
|
|
441
|
+
|
|
442
|
+
await self.dialog_session.client.start_session()
|
|
443
|
+
SDKLogger.info("[Speech] Speech session started successfully")
|
|
444
|
+
|
|
445
|
+
# Start receiving responses
|
|
446
|
+
receive_task = asyncio.create_task(self.dialog_session.receive_loop())
|
|
447
|
+
|
|
448
|
+
# Start processing ROS microphone data
|
|
449
|
+
audio_task = asyncio.create_task(self._process_ros_microphone_data())
|
|
450
|
+
|
|
451
|
+
# Send hello message
|
|
452
|
+
await self.dialog_session.client.say_hello()
|
|
453
|
+
|
|
454
|
+
# Wait for session to finish
|
|
455
|
+
while self.is_running and not self.dialog_session.is_session_finished:
|
|
456
|
+
await asyncio.sleep(0.1)
|
|
457
|
+
|
|
458
|
+
# Clean up tasks
|
|
459
|
+
receive_task.cancel()
|
|
460
|
+
audio_task.cancel()
|
|
461
|
+
|
|
462
|
+
# Finish session
|
|
463
|
+
await self.dialog_session.client.finish_session()
|
|
464
|
+
while not self.dialog_session.is_session_finished:
|
|
465
|
+
await asyncio.sleep(0.1)
|
|
466
|
+
await self.dialog_session.client.finish_connection()
|
|
467
|
+
await self.dialog_session.client.close()
|
|
468
|
+
|
|
469
|
+
SDKLogger.info(f"[Speech] Dialog session ended, logid: {self.dialog_session.client.logid}")
|
|
470
|
+
|
|
471
|
+
except Exception as e:
|
|
472
|
+
SDKLogger.error(f"[Speech] Session error: {e}")
|
|
473
|
+
finally:
|
|
474
|
+
if self.dialog_session:
|
|
475
|
+
self.dialog_session.cleanup()
|
|
476
|
+
|
|
477
|
+
async def _process_ros_microphone_data(self):
|
|
478
|
+
"""Process microphone data from ROS topic"""
|
|
479
|
+
SDKLogger.info("[Speech] Starting ROS microphone data processing")
|
|
480
|
+
|
|
481
|
+
while self.is_running:
|
|
482
|
+
try:
|
|
483
|
+
# Get audio data from ROS microphone
|
|
484
|
+
audio_data = self.microphone.get_data()
|
|
485
|
+
|
|
486
|
+
if audio_data is not None and len(audio_data) > 0:
|
|
487
|
+
# Convert numpy array to bytes if needed
|
|
488
|
+
if isinstance(audio_data, np.ndarray):
|
|
489
|
+
audio_bytes = audio_data.tobytes()
|
|
490
|
+
else:
|
|
491
|
+
audio_bytes = audio_data
|
|
492
|
+
|
|
493
|
+
# Send audio data to dialog service
|
|
494
|
+
await self.dialog_session.client.task_request(audio_bytes)
|
|
495
|
+
|
|
496
|
+
await asyncio.sleep(0.01) # Small delay to prevent CPU overload
|
|
497
|
+
|
|
498
|
+
except Exception as e:
|
|
499
|
+
SDKLogger.warn(f"[Speech] Error processing ROS microphone data: {e}")
|
|
500
|
+
await asyncio.sleep(0.1)
|
|
501
|
+
|
|
502
|
+
def verify_connection(self, app_id: str, access_key: str) -> bool:
|
|
503
|
+
"""Set the app ID and access key for the speech system."""
|
|
504
|
+
if not app_id or not access_key:
|
|
505
|
+
SDKLogger.error("[Speech] App ID and Access Key are required")
|
|
506
|
+
return False
|
|
507
|
+
|
|
508
|
+
# Setup WebSocket configuration
|
|
509
|
+
self._setup_websocket_config(app_id, access_key)
|
|
510
|
+
# Use custom ROS-integrated DialogSession with Audio interface
|
|
511
|
+
self.dialog_session = ROSDialogSession(self.ws_config, self.ros_audio, enable_signal_handler=False)
|
|
512
|
+
|
|
513
|
+
# Test connection using event loop
|
|
514
|
+
try:
|
|
515
|
+
import asyncio
|
|
516
|
+
loop = asyncio.new_event_loop()
|
|
517
|
+
asyncio.set_event_loop(loop)
|
|
518
|
+
|
|
519
|
+
try:
|
|
520
|
+
connection_successful = loop.run_until_complete(self.dialog_session.client.start_connection())
|
|
521
|
+
if connection_successful:
|
|
522
|
+
# Close the test connection since we'll reconnect in _async_session_main
|
|
523
|
+
loop.run_until_complete(self.dialog_session.client.close())
|
|
524
|
+
SDKLogger.info("[Speech] WebSocket connected successfully")
|
|
525
|
+
return True
|
|
526
|
+
else:
|
|
527
|
+
SDKLogger.error("[Speech] WebSocket connection failed")
|
|
528
|
+
self.dialog_session = None # Clear failed session
|
|
529
|
+
return False
|
|
530
|
+
finally:
|
|
531
|
+
loop.close()
|
|
532
|
+
|
|
533
|
+
except Exception as e:
|
|
534
|
+
SDKLogger.error(f"[Speech] Failed to test WebSocket connection: {e}")
|
|
535
|
+
self.dialog_session = None # Clear failed session
|
|
536
|
+
return False
|
|
537
|
+
|
|
538
|
+
def start_speech_system(self):
|
|
539
|
+
"""Start the speech dialog system with Doubao service."""
|
|
540
|
+
if self.is_running:
|
|
541
|
+
SDKLogger.warn("[Speech] Speech system is already running")
|
|
542
|
+
|
|
543
|
+
if self.dialog_session is None:
|
|
544
|
+
SDKLogger.error("[Speech] Dialog session not initialized. Please call verify_connection() first with valid credentials.")
|
|
545
|
+
|
|
546
|
+
try:
|
|
547
|
+
SDKLogger.info(f"[Speech] Starting speech system")
|
|
548
|
+
|
|
549
|
+
# Start dialog session in separate thread
|
|
550
|
+
self.is_running = True
|
|
551
|
+
self.session_thread = threading.Thread(target=self._run_async_session)
|
|
552
|
+
self.session_thread.daemon = True
|
|
553
|
+
self.session_thread.start()
|
|
554
|
+
|
|
555
|
+
# Wait a bit for connection to establish
|
|
556
|
+
time.sleep(2)
|
|
557
|
+
|
|
558
|
+
SDKLogger.info("[Speech] Speech system started successfully")
|
|
559
|
+
|
|
560
|
+
except Exception as e:
|
|
561
|
+
SDKLogger.error(f"[Speech] Failed to start speech system: {e}")
|
|
562
|
+
self.is_running = False
|
|
563
|
+
|
|
564
|
+
def stop_speech_system(self):
|
|
565
|
+
"""Stop the Doubao speech system."""
|
|
566
|
+
if not self.is_running:
|
|
567
|
+
SDKLogger.warn("[Speech] Speech system is not running")
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
SDKLogger.info("[Speech] Stopping speech system")
|
|
571
|
+
|
|
572
|
+
# Signal to stop
|
|
573
|
+
self.is_running = False
|
|
574
|
+
|
|
575
|
+
# Stop dialog session
|
|
576
|
+
if self.dialog_session:
|
|
577
|
+
self.dialog_session.is_running = False
|
|
578
|
+
self.dialog_session.is_recording = False
|
|
579
|
+
self.dialog_session.is_playing = False
|
|
580
|
+
|
|
581
|
+
# Wait for session thread to finish
|
|
582
|
+
if self.session_thread and self.session_thread.is_alive():
|
|
583
|
+
self.session_thread.join(timeout=5)
|
|
584
|
+
|
|
585
|
+
SDKLogger.info("[Speech] Speech system stopped successfully")
|
|
586
|
+
|
|
587
|
+
except Exception as e:
|
|
588
|
+
SDKLogger.error(f"[Speech] Failed to stop speech system: {e}")
|
|
589
|
+
|
|
590
|
+
def is_system_running(self) -> bool:
|
|
591
|
+
"""Check if the speech system is currently running."""
|
|
592
|
+
return self.is_running
|
|
593
|
+
|
|
594
|
+
def get_session_status(self) -> dict:
|
|
595
|
+
"""Get current session status information."""
|
|
596
|
+
status = {
|
|
597
|
+
"is_running": self.is_running,
|
|
598
|
+
"has_session": self.dialog_session is not None,
|
|
599
|
+
"session_finished": False,
|
|
600
|
+
"logid": ""
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
if self.dialog_session:
|
|
604
|
+
status["session_finished"] = self.dialog_session.is_session_finished
|
|
605
|
+
if self.dialog_session.client:
|
|
606
|
+
status["logid"] = self.dialog_session.client.logid
|
|
607
|
+
|
|
608
|
+
return status
|