kuavo-humanoid-sdk 1.2.2b3208__20250922170818-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kuavo-humanoid-sdk might be problematic. Click here for more details.

Files changed (186) hide show
  1. kuavo_humanoid_sdk/__init__.py +6 -0
  2. kuavo_humanoid_sdk/common/logger.py +45 -0
  3. kuavo_humanoid_sdk/interfaces/__init__.py +4 -0
  4. kuavo_humanoid_sdk/interfaces/data_types.py +288 -0
  5. kuavo_humanoid_sdk/interfaces/end_effector.py +62 -0
  6. kuavo_humanoid_sdk/interfaces/robot.py +22 -0
  7. kuavo_humanoid_sdk/interfaces/robot_info.py +56 -0
  8. kuavo_humanoid_sdk/kuavo/__init__.py +16 -0
  9. kuavo_humanoid_sdk/kuavo/core/audio.py +32 -0
  10. kuavo_humanoid_sdk/kuavo/core/core.py +666 -0
  11. kuavo_humanoid_sdk/kuavo/core/dex_hand_control.py +114 -0
  12. kuavo_humanoid_sdk/kuavo/core/leju_claw_control.py +67 -0
  13. kuavo_humanoid_sdk/kuavo/core/llm_doubao.py +608 -0
  14. kuavo_humanoid_sdk/kuavo/core/microphone.py +192 -0
  15. kuavo_humanoid_sdk/kuavo/core/navigation.py +70 -0
  16. kuavo_humanoid_sdk/kuavo/core/ros/audio.py +110 -0
  17. kuavo_humanoid_sdk/kuavo/core/ros/camera.py +105 -0
  18. kuavo_humanoid_sdk/kuavo/core/ros/control.py +1524 -0
  19. kuavo_humanoid_sdk/kuavo/core/ros/microphone.py +38 -0
  20. kuavo_humanoid_sdk/kuavo/core/ros/navigation.py +217 -0
  21. kuavo_humanoid_sdk/kuavo/core/ros/observation.py +94 -0
  22. kuavo_humanoid_sdk/kuavo/core/ros/param.py +201 -0
  23. kuavo_humanoid_sdk/kuavo/core/ros/sat_utils.py +103 -0
  24. kuavo_humanoid_sdk/kuavo/core/ros/state.py +652 -0
  25. kuavo_humanoid_sdk/kuavo/core/ros/tools.py +220 -0
  26. kuavo_humanoid_sdk/kuavo/core/ros/vision.py +234 -0
  27. kuavo_humanoid_sdk/kuavo/core/ros_env.py +238 -0
  28. kuavo_humanoid_sdk/kuavo/core/sdk_deprecated.py +41 -0
  29. kuavo_humanoid_sdk/kuavo/demo_climbstair.py +249 -0
  30. kuavo_humanoid_sdk/kuavo/dexterous_hand.py +238 -0
  31. kuavo_humanoid_sdk/kuavo/leju_claw.py +235 -0
  32. kuavo_humanoid_sdk/kuavo/logger_client.py +80 -0
  33. kuavo_humanoid_sdk/kuavo/robot.py +646 -0
  34. kuavo_humanoid_sdk/kuavo/robot_arm.py +411 -0
  35. kuavo_humanoid_sdk/kuavo/robot_audio.py +39 -0
  36. kuavo_humanoid_sdk/kuavo/robot_blockly.py +1154 -0
  37. kuavo_humanoid_sdk/kuavo/robot_climbstair.py +1607 -0
  38. kuavo_humanoid_sdk/kuavo/robot_head.py +95 -0
  39. kuavo_humanoid_sdk/kuavo/robot_info.py +134 -0
  40. kuavo_humanoid_sdk/kuavo/robot_microphone.py +19 -0
  41. kuavo_humanoid_sdk/kuavo/robot_navigation.py +135 -0
  42. kuavo_humanoid_sdk/kuavo/robot_observation.py +64 -0
  43. kuavo_humanoid_sdk/kuavo/robot_speech.py +24 -0
  44. kuavo_humanoid_sdk/kuavo/robot_state.py +310 -0
  45. kuavo_humanoid_sdk/kuavo/robot_tool.py +109 -0
  46. kuavo_humanoid_sdk/kuavo/robot_vision.py +81 -0
  47. kuavo_humanoid_sdk/kuavo_strategy/__init__.py +2 -0
  48. kuavo_humanoid_sdk/kuavo_strategy/grasp_box/grasp_box_strategy.py +1325 -0
  49. kuavo_humanoid_sdk/kuavo_strategy/kuavo_strategy.py +106 -0
  50. kuavo_humanoid_sdk/kuavo_strategy_v2/common/data_type.py +340 -0
  51. kuavo_humanoid_sdk/kuavo_strategy_v2/common/events/base_event.py +215 -0
  52. kuavo_humanoid_sdk/kuavo_strategy_v2/common/robot_sdk.py +25 -0
  53. kuavo_humanoid_sdk/kuavo_strategy_v2/pick_place_box/case.py +331 -0
  54. kuavo_humanoid_sdk/kuavo_strategy_v2/pick_place_box/strategy.py +504 -0
  55. kuavo_humanoid_sdk/kuavo_strategy_v2/utils/logger_setup.py +40 -0
  56. kuavo_humanoid_sdk/kuavo_strategy_v2/utils/utils.py +88 -0
  57. kuavo_humanoid_sdk/msg/__init__.py +4 -0
  58. kuavo_humanoid_sdk/msg/kuavo_msgs/__init__.py +7 -0
  59. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_AprilTagDetection.py +306 -0
  60. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_AprilTagDetectionArray.py +437 -0
  61. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_AudioReceiverData.py +122 -0
  62. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_FTsensorData.py +260 -0
  63. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_JoySticks.py +191 -0
  64. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_Metadata.py +199 -0
  65. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_MmDetectionMsg.py +264 -0
  66. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_RobotActionState.py +112 -0
  67. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_TFArray.py +323 -0
  68. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_TaskPoint.py +175 -0
  69. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/__init__.py +62 -0
  70. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armCollisionCheckInfo.py +160 -0
  71. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armHandPose.py +161 -0
  72. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armHandPoseFree.py +171 -0
  73. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armPoseWithTimeStamp.py +168 -0
  74. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_armTargetPoses.py +171 -0
  75. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_bezierCurveCubicPoint.py +178 -0
  76. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_dexhandCommand.py +229 -0
  77. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_dexhandTouchState.py +256 -0
  78. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_endEffectorData.py +227 -0
  79. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPose.py +123 -0
  80. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPose6D.py +123 -0
  81. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPose6DTargetTrajectories.py +320 -0
  82. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoseTargetTrajectories.py +301 -0
  83. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoseWithVision.py +136 -0
  84. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoseWithVisionArray.py +231 -0
  85. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoses.py +149 -0
  86. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_footPoses6D.py +149 -0
  87. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_fullBodyTargetTrajectories.py +258 -0
  88. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_gaitTimeName.py +147 -0
  89. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_gestureInfo.py +218 -0
  90. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_gestureTask.py +149 -0
  91. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_handPose.py +136 -0
  92. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_headBodyPose.py +145 -0
  93. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_ikSolveError.py +171 -0
  94. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_ikSolveParam.py +140 -0
  95. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_imuData.py +165 -0
  96. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_jointBezierTrajectory.py +201 -0
  97. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_jointCmd.py +390 -0
  98. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_jointData.py +205 -0
  99. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_kuavoModeSchedule.py +224 -0
  100. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_lejuClawCommand.py +320 -0
  101. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_lejuClawState.py +341 -0
  102. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_motorParam.py +122 -0
  103. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_picoPoseInfo.py +143 -0
  104. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_picoPoseInfoList.py +220 -0
  105. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_planArmState.py +120 -0
  106. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_qv.py +121 -0
  107. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotArmQVVD.py +177 -0
  108. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotBodyMatrices.py +332 -0
  109. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotHandPosition.py +225 -0
  110. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotHeadMotionData.py +128 -0
  111. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_robotState.py +222 -0
  112. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_sensorsData.py +655 -0
  113. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_switchGaitByName.py +200 -0
  114. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_tagDataArray.py +216 -0
  115. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_touchSensorStatus.py +162 -0
  116. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPose.py +273 -0
  117. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPoseCmd.py +316 -0
  118. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPoseCmdFree.py +338 -0
  119. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_twoArmHandPoseFree.py +299 -0
  120. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_yoloDetection.py +251 -0
  121. kuavo_humanoid_sdk/msg/kuavo_msgs/msg/_yoloOutputData.py +168 -0
  122. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_CreatePath.py +581 -0
  123. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_ExecuteArmAction.py +281 -0
  124. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_GetAllMaps.py +241 -0
  125. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_GetCurrentMap.py +225 -0
  126. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_GetTargetPartPoseInCamera.py +298 -0
  127. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_InitialPoseWithTaskPoint.py +281 -0
  128. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_LoadMap.py +281 -0
  129. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_NavigateToTaskPoint.py +281 -0
  130. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_RepublishTFs.py +373 -0
  131. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetInitialPose.py +394 -0
  132. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetJoyTopic.py +282 -0
  133. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetLEDMode.py +468 -0
  134. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SetLEDMode_free.py +289 -0
  135. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_SpeechSynthesis.py +270 -0
  136. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_TaskPointOperation.py +536 -0
  137. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/__init__.py +43 -0
  138. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_adjustZeroPoint.py +277 -0
  139. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeArmCtrlMode.py +275 -0
  140. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeArmCtrlModeKuavo.py +236 -0
  141. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeMotorParam.py +299 -0
  142. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_changeTorsoCtrlMode.py +274 -0
  143. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_controlLejuClaw.py +408 -0
  144. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_enableHandTouchSensor.py +304 -0
  145. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_fkSrv.py +395 -0
  146. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_footPose6DTargetTrajectoriesSrv.py +426 -0
  147. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_footPoseTargetTrajectoriesSrv.py +409 -0
  148. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_gestureExecute.py +339 -0
  149. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_gestureExecuteState.py +257 -0
  150. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_gestureList.py +418 -0
  151. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_getCurrentGaitName.py +253 -0
  152. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_getMotorParam.py +299 -0
  153. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_getMotorZeroPoints.py +286 -0
  154. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_handForceLevel.py +330 -0
  155. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_jointMoveTo.py +302 -0
  156. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_planArmTrajectoryBezierCurve.py +422 -0
  157. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_planArmTrajectoryCubicSpline.py +490 -0
  158. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_playmusic.py +268 -0
  159. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setHwIntialState.py +304 -0
  160. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setMmCtrlFrame.py +273 -0
  161. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setMotorEncoderRoundService.py +283 -0
  162. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_setTagId.py +275 -0
  163. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_singleStepControl.py +444 -0
  164. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_twoArmHandPoseCmdFreeSrv.py +716 -0
  165. kuavo_humanoid_sdk/msg/kuavo_msgs/srv/_twoArmHandPoseCmdSrv.py +664 -0
  166. kuavo_humanoid_sdk/msg/motion_capture_ik/__init__.py +7 -0
  167. kuavo_humanoid_sdk/msg/ocs2_msgs/__init__.py +7 -0
  168. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/__init__.py +12 -0
  169. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_constraint.py +142 -0
  170. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_controller_data.py +121 -0
  171. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_lagrangian_metrics.py +148 -0
  172. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mode_schedule.py +150 -0
  173. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_flattened_controller.py +666 -0
  174. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_input.py +122 -0
  175. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_observation.py +209 -0
  176. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_performance_indices.py +140 -0
  177. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_solver_data.py +886 -0
  178. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_state.py +122 -0
  179. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_mpc_target_trajectories.py +239 -0
  180. kuavo_humanoid_sdk/msg/ocs2_msgs/msg/_multiplier.py +148 -0
  181. kuavo_humanoid_sdk/msg/ocs2_msgs/srv/__init__.py +1 -0
  182. kuavo_humanoid_sdk/msg/ocs2_msgs/srv/_reset.py +376 -0
  183. kuavo_humanoid_sdk-1.2.2b3208.dist-info/METADATA +297 -0
  184. kuavo_humanoid_sdk-1.2.2b3208.dist-info/RECORD +186 -0
  185. kuavo_humanoid_sdk-1.2.2b3208.dist-info/WHEEL +6 -0
  186. kuavo_humanoid_sdk-1.2.2b3208.dist-info/top_level.txt +1 -0
@@ -0,0 +1,608 @@
1
+ import numpy as np
2
+ import rospy
3
+ from kuavo_humanoid_sdk.common.logger import SDKLogger
4
+ from kuavo_humanoid_sdk.kuavo.core.ros.microphone import Microphone
5
+ from kuavo_humanoid_sdk.kuavo.core.ros.audio import Audio
6
+ from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib import *
7
+ from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib.audio_manager import DialogSession
8
+ from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib.realtime_dialog_client import RealtimeDialogClient
9
+ from kuavo_humanoid_sdk.kuavo.core.llm_doubao_lib import config
10
+ import os
11
+ import asyncio
12
+ import threading
13
+ import queue
14
+ import time
15
+ import struct
16
+ import uuid
17
+ from typing import Optional, Dict, Any
18
+
19
+
20
+ class ROSDialogSession(DialogSession):
21
+ """Custom DialogSession that integrates with ROS audio system"""
22
+
23
+ def __init__(self, ws_config: Dict[str, Any], audio_interface: Audio, enable_signal_handler: bool = False):
24
+ # Initialize session ID and client without calling parent __init__
25
+ self.session_id = str(uuid.uuid4())
26
+ self.client = RealtimeDialogClient(config=ws_config, session_id=self.session_id)
27
+
28
+ # Store reference to audio interface for ROS audio publishing
29
+ self.audio_interface = audio_interface
30
+
31
+ # Initialize session state variables
32
+ self.is_running = True
33
+ self.is_session_finished = False
34
+ self.is_user_querying = False
35
+ self.is_sending_chat_tts_text = False
36
+ self.audio_buffer = b''
37
+
38
+ # Audio chunk buffer for handling large audio chunks
39
+ self.audio_chunk_buffer = []
40
+ self.buffer_lock = threading.Lock()
41
+
42
+ # Skip PyAudio initialization completely
43
+ self.audio_device = None
44
+ self.audio_queue = None
45
+ self.output_stream = None
46
+ self.input_stream = None
47
+ self.player_thread = None
48
+ self.is_recording = True
49
+ self.is_playing = False # We don't use PyAudio playing
50
+
51
+ # Start audio chunk processing thread
52
+ self.chunk_processor_thread = threading.Thread(target=self._process_audio_chunks)
53
+ self.chunk_processor_thread.daemon = True
54
+ self.chunk_processor_thread.start()
55
+
56
+ # Only set signal handler if requested and in main thread
57
+ if enable_signal_handler:
58
+ try:
59
+ import signal
60
+ signal.signal(signal.SIGINT, self._keyboard_signal)
61
+ except ValueError as e:
62
+ SDKLogger.warning(f"Warning: Cannot set signal handler (not in main thread): {e}")
63
+
64
+ def cleanup(self):
65
+ """Clean up resources (no PyAudio to clean up)"""
66
+ self.is_running = False
67
+ self.is_recording = False
68
+ self.is_playing = False
69
+
70
+ # Clear audio buffer
71
+ with self.buffer_lock:
72
+ self.audio_chunk_buffer.clear()
73
+
74
+ # Wait for chunk processor thread to finish
75
+ if hasattr(self, 'chunk_processor_thread') and self.chunk_processor_thread.is_alive():
76
+ self.chunk_processor_thread.join(timeout=2)
77
+
78
+ # No audio device cleanup needed since we're using ROS
79
+
80
+ def _keyboard_signal(self, sig, frame):
81
+ """Handle keyboard interrupt signal"""
82
+ SDKLogger.info(f"receive keyboard Ctrl+C")
83
+ self.is_recording = False
84
+ self.is_playing = False
85
+ self.is_running = False
86
+
87
+ def _process_audio_chunks(self):
88
+ """Process buffered audio chunks in a separate thread"""
89
+ while self.is_running:
90
+ try:
91
+ with self.buffer_lock:
92
+ if self.audio_chunk_buffer:
93
+ chunk = self.audio_chunk_buffer.pop(0)
94
+ else:
95
+ chunk = None
96
+
97
+ if chunk:
98
+ self._publish_audio_chunk_to_ros(chunk)
99
+ # Small delay to prevent overwhelming the ROS system
100
+ time.sleep(0.01)
101
+ else:
102
+ # No chunks to process, wait a bit
103
+ time.sleep(0.05)
104
+
105
+ except Exception as e:
106
+ SDKLogger.error(f"[Speech] Error processing audio chunks: {e}")
107
+ time.sleep(0.1)
108
+
109
+ def _add_audio_chunk_to_buffer(self, audio_chunk):
110
+ """Add audio chunk to buffer for processing"""
111
+ with self.buffer_lock:
112
+ self.audio_chunk_buffer.append(audio_chunk)
113
+ # Limit buffer size to prevent memory issues
114
+ if len(self.audio_chunk_buffer) > 50:
115
+ SDKLogger.warn(f"[Speech] Audio buffer full, dropping oldest chunk")
116
+ self.audio_chunk_buffer.pop(0)
117
+
118
+ def _convert_audio_bytes_to_int_list(self, audio_bytes: bytes):
119
+ """Convert audio bytes (PCM) to list of integers for ROS audio playback"""
120
+ try:
121
+ # SDKLogger.debug(f"[Speech] Converting audio bytes: length={len(audio_bytes)}")
122
+
123
+ if len(audio_bytes) < 4: # Float32 needs at least 4 bytes
124
+ SDKLogger.warn(f"[Speech] Audio data too short: {len(audio_bytes)} bytes")
125
+ return []
126
+
127
+ # Try to detect audio format and convert accordingly
128
+ audio_ints = self._convert_audio_with_format_detection(audio_bytes)
129
+
130
+ if audio_ints:
131
+ # Resample from 24kHz to 16kHz for ROS compatibility
132
+ audio_ints = self._resample_audio(audio_ints, 24000, 16000)
133
+
134
+ # Split large audio chunks into smaller ones for better ROS compatibility
135
+ chunk_size = 8192
136
+ if len(audio_ints) > chunk_size:
137
+ SDKLogger.debug(f"[Speech] Splitting large audio chunk ({len(audio_ints)} samples) into {len(audio_ints) // chunk_size + 1} smaller chunks")
138
+ # Split into multiple chunks and add to buffer
139
+ for i in range(0, len(audio_ints), chunk_size):
140
+ chunk = audio_ints[i:i + chunk_size]
141
+ if len(chunk) > 0:
142
+ self._add_audio_chunk_to_buffer(chunk)
143
+ return [] # Return empty since we've buffered the chunks
144
+ else:
145
+ return audio_ints
146
+ else:
147
+ SDKLogger.warn(f"[Speech] No audio samples extracted from {len(audio_bytes)} bytes")
148
+ return []
149
+
150
+ except Exception as e:
151
+ SDKLogger.error(f"[Speech] Error converting audio bytes to int list: {e}")
152
+ return []
153
+
154
+ def _convert_audio_with_format_detection(self, audio_bytes: bytes):
155
+ """Convert audio bytes with automatic format detection"""
156
+ import struct
157
+ import numpy as np
158
+
159
+ # Try Float32 format first (24kHz server format)
160
+ try:
161
+ if len(audio_bytes) % 4 == 0: # Float32 should be divisible by 4
162
+ float_samples = []
163
+ for i in range(0, len(audio_bytes), 4):
164
+ if i + 3 < len(audio_bytes):
165
+ # Convert 4 bytes to float32 (little-endian)
166
+ sample_float = struct.unpack('<f', audio_bytes[i:i+4])[0]
167
+ float_samples.append(sample_float)
168
+
169
+ if float_samples:
170
+ # Analyze float32 data range
171
+ min_float = min(float_samples)
172
+ max_float = max(float_samples)
173
+ abs_max = max(abs(min_float), abs(max_float))
174
+
175
+ # SDKLogger.debug(f"[Speech] Float32 analysis: count={len(float_samples)}, min={min_float:.6f}, max={max_float:.6f}, abs_max={abs_max:.6f}")
176
+
177
+ # Auto-detect gain based on actual float range
178
+ if abs_max > 0.001: # Avoid division by zero
179
+ # Calculate gain to use full int16 range
180
+ # Leave some headroom (use 0.9 instead of 1.0)
181
+ target_range = 32767 * 0.9
182
+ auto_gain = target_range / abs_max
183
+
184
+ # SDKLogger.debug(f"[Speech] Auto-detected gain: {auto_gain:.2f}")
185
+
186
+ # Apply gain and convert to int16
187
+ samples = []
188
+ for sample_float in float_samples:
189
+ sample_int = int(sample_float * auto_gain)
190
+ sample_int = max(-32768, min(32767, sample_int)) # Clamp
191
+ samples.append(sample_int)
192
+
193
+ # Check final result
194
+ min_val = min(samples)
195
+ max_val = max(samples)
196
+ variation = max_val - min_val
197
+
198
+ # SDKLogger.debug(f"[Speech] Float32 conversion result: count={len(samples)}, variation={variation}")
199
+
200
+ if variation > 100: # Good variation suggests valid conversion
201
+ # SDKLogger.debug(f"[Speech] Using Float32 format (24kHz) with auto-gain {auto_gain:.2f}")
202
+ return samples
203
+ else:
204
+ SDKLogger.warn(f"[Speech] Float32 data range too small (abs_max={abs_max:.6f})")
205
+ except Exception as e:
206
+ SDKLogger.error(f"[Speech] Float32 conversion failed: {e}")
207
+
208
+ # Try Int16 format (fallback)
209
+ try:
210
+ if len(audio_bytes) % 2 == 0: # Int16 should be divisible by 2
211
+ samples = []
212
+ for i in range(0, len(audio_bytes), 2):
213
+ if i + 1 < len(audio_bytes):
214
+ # Convert 2 bytes to signed 16-bit integer (little-endian)
215
+ sample = struct.unpack('<h', audio_bytes[i:i+2])[0]
216
+ samples.append(sample)
217
+
218
+ if samples:
219
+ min_val = min(samples)
220
+ max_val = max(samples)
221
+ variation = max_val - min_val
222
+
223
+ SDKLogger.debug(f"[Speech] Int16 conversion: count={len(samples)}, variation={variation}")
224
+ SDKLogger.debug(f"[Speech] Using Int16 format")
225
+ return samples
226
+ except Exception as e:
227
+ SDKLogger.error(f"[Speech] Int16 conversion failed: {e}")
228
+
229
+ SDKLogger.error(f"[Speech] Failed to convert audio data with any format")
230
+ return []
231
+
232
+ def _resample_audio(self, audio_samples, from_rate, to_rate):
233
+ """Resample audio from one sample rate to another"""
234
+ if from_rate == to_rate:
235
+ return audio_samples
236
+
237
+ try:
238
+ import numpy as np
239
+ from scipy import signal
240
+
241
+ # Convert to numpy array
242
+ audio_array = np.array(audio_samples, dtype=np.float32)
243
+
244
+ # Calculate resampling ratio
245
+ resample_ratio = to_rate / from_rate
246
+
247
+ # Resample using scipy
248
+ resampled_length = int(len(audio_array) * resample_ratio)
249
+ resampled_audio = signal.resample(audio_array, resampled_length)
250
+
251
+ # Convert back to int16 and clamp
252
+ resampled_int = np.clip(resampled_audio, -32768, 32767).astype(np.int16)
253
+
254
+ # SDKLogger.debug(f"[Speech] Resampled audio from {from_rate}Hz to {to_rate}Hz: {len(audio_samples)} -> {len(resampled_int)} samples")
255
+
256
+ return resampled_int.tolist()
257
+
258
+ except ImportError:
259
+ SDKLogger.warn(f"[Speech] scipy not available, using simple decimation for resampling")
260
+ # Simple decimation fallback
261
+ if from_rate > to_rate:
262
+ step = int(from_rate // to_rate)
263
+ return audio_samples[::step]
264
+ else:
265
+ return audio_samples
266
+ except Exception as e:
267
+ SDKLogger.error(f"[Speech] Error resampling audio: {e}")
268
+ return audio_samples
269
+
270
+ def _publish_audio_chunk_to_ros(self, audio_int_list, gain: int = 1):
271
+ """Publish single audio chunk directly to ROS topic using Audio interface"""
272
+ try:
273
+ if not audio_int_list:
274
+ return
275
+
276
+ # Use the new publish_audio_chunk method from Audio class
277
+ success = self.audio_interface.publish_audio_chunk(audio_int_list, gain=gain)
278
+
279
+ if not success:
280
+ SDKLogger.warn(f"[Speech] Failed to publish audio chunk with {len(audio_int_list)} samples")
281
+
282
+ except Exception as e:
283
+ SDKLogger.error(f"[Speech] Error publishing audio to ROS: {e}")
284
+
285
+ def handle_server_response(self, response: Dict[str, Any]) -> None:
286
+ """Override to handle audio playback through ROS instead of PyAudio"""
287
+ if response == {}:
288
+ return
289
+
290
+ # Handle audio data from server
291
+ if response['message_type'] == 'SERVER_ACK' and isinstance(response.get('payload_msg'), bytes):
292
+ if self.is_sending_chat_tts_text:
293
+ return
294
+
295
+ audio_data = response['payload_msg']
296
+ self.audio_buffer += audio_data
297
+
298
+ # SDKLogger.debug(f"[Speech] Received audio chunk: {len(audio_data)} bytes")
299
+
300
+ # Play audio through ROS audio system instead of PyAudio
301
+ try:
302
+ audio_int_list = self._convert_audio_bytes_to_int_list(audio_data)
303
+ if audio_int_list:
304
+ # For smaller chunks, publish immediately
305
+ self._publish_audio_chunk_to_ros(audio_int_list)
306
+ # For larger chunks, they are automatically buffered in _convert_audio_bytes_to_int_list
307
+ except Exception as e:
308
+ SDKLogger.error(f"[Speech] Error playing server audio through ROS: {e}")
309
+
310
+ elif response['message_type'] == 'SERVER_FULL_RESPONSE':
311
+ # SDKLogger.info(f"服务器响应: {response}")
312
+ event = response.get('event')
313
+ payload_msg = response.get('payload_msg', {})
314
+
315
+ # Log ASR results (user speech recognition)
316
+ if event == 451:
317
+ # Extract user speech text from ASR results
318
+ results = payload_msg.get('results', [])
319
+ if results and len(results) > 0:
320
+ result = results[0]
321
+ text = result.get('text', '')
322
+ is_interim = result.get('is_interim', True)
323
+
324
+ # Only log final results (not interim)
325
+ if not is_interim and text:
326
+ SDKLogger.info(f"[Speech] 用户说话: {text}")
327
+
328
+ # Log TTS streaming text (AI response)
329
+ elif event == 550:
330
+ content = payload_msg.get('content', '')
331
+ if content:
332
+ # Use info level for visible logging, accumulate content for complete response
333
+ if not hasattr(self, '_current_ai_response'):
334
+ self._current_ai_response = ""
335
+ self._current_ai_response += content
336
+ # SDKLogger.info(f"[Speech] AI回复: {content}")
337
+
338
+ if event == 450:
339
+ SDKLogger.info(f"清空缓存音频: {response['session_id']}")
340
+ # Clear the audio buffer
341
+ with self.buffer_lock:
342
+ self.audio_chunk_buffer.clear()
343
+ self.is_user_querying = True
344
+
345
+ if event == 350 and self.is_sending_chat_tts_text and payload_msg.get("tts_type") == "chat_tts_text":
346
+ # Clear the audio buffer
347
+ with self.buffer_lock:
348
+ self.audio_chunk_buffer.clear()
349
+ self.is_sending_chat_tts_text = False
350
+
351
+ if event == 459:
352
+ self.is_user_querying = False
353
+
354
+ # Log complete AI response when TTS ends
355
+ if event == 351:
356
+ # TTS synthesis completed
357
+ if hasattr(self, '_current_ai_response') and self._current_ai_response:
358
+ SDKLogger.info(f"[Speech] AI完整回复: {self._current_ai_response}")
359
+ self._current_ai_response = "" # Reset for next response
360
+
361
+ elif response['message_type'] == 'SERVER_ERROR':
362
+ SDKLogger.error(f"服务器错误: {response['payload_msg']}")
363
+ raise Exception("服务器错误")
364
+
365
+ async def receive_loop(self):
366
+ """接收服务器响应的循环"""
367
+ try:
368
+ while True:
369
+ response = await self.client.receive_server_response()
370
+ self.handle_server_response(response)
371
+ if 'event' in response and (response['event'] == 152 or response['event'] == 153):
372
+ SDKLogger.info(f"receive session finished event: {response['event']}")
373
+ self.is_session_finished = True
374
+ break
375
+ except asyncio.CancelledError:
376
+ SDKLogger.info("接收任务已取消")
377
+ except Exception as e:
378
+ SDKLogger.error(f"接收消息错误: {e}")
379
+
380
+
381
+ class RobotLLMDoubaoCore:
382
+
383
+ def __init__(self, subscribe_topic: str = "/micphone_data"):
384
+ # Microphone interface
385
+ self.microphone = Microphone(subscribe_topic)
386
+
387
+ # ROS Audio interface for direct topic publishing
388
+ self.ros_audio = Audio()
389
+
390
+ # Audio parameters
391
+ self.SAMPLE_RATE = 16000
392
+ self.CHANNELS = 1
393
+ self.BIT_RESOLUTION = 16
394
+ self.BYTES_PER_SAMPLE = self.BIT_RESOLUTION // 8
395
+
396
+ # Dialog session management
397
+ self.dialog_session: Optional[ROSDialogSession] = None
398
+ self.is_running = False
399
+ self.event_loop = None
400
+ self.session_thread = None
401
+ self.ws_config = None
402
+
403
+ # Audio queue for ROS microphone data
404
+ self.audio_queue = queue.Queue()
405
+
406
+ SDKLogger.info("[Speech] RobotLLMDoubaoCore initialized")
407
+
408
+ def _setup_websocket_config(self, app_id: str, access_key: str):
409
+ """Setup WebSocket configuration with provided credentials"""
410
+ self.ws_config = {
411
+ "base_url": "wss://openspeech.bytedance.com/api/v3/realtime/dialogue",
412
+ "headers": {
413
+ "X-Api-App-ID": app_id,
414
+ "X-Api-Access-Key": access_key,
415
+ "X-Api-Resource-Id": "volc.speech.dialog",
416
+ "X-Api-App-Key": "PlgvMymc7f3tQnJ6",
417
+ "X-Api-Connect-Id": config.ws_connect_config["headers"]["X-Api-Connect-Id"],
418
+ }
419
+ }
420
+
421
+ def _run_async_session(self):
422
+ """Run dialog session in separate thread with its own event loop"""
423
+ self.event_loop = asyncio.new_event_loop()
424
+ asyncio.set_event_loop(self.event_loop)
425
+
426
+ try:
427
+ self.event_loop.run_until_complete(self._async_session_main())
428
+ except Exception as e:
429
+ SDKLogger.error(f"[Speech] Dialog session error: {e}")
430
+ finally:
431
+ self.event_loop.close()
432
+
433
+ async def _async_session_main(self):
434
+ """Main async session handler"""
435
+ try:
436
+ # Establish WebSocket connection first (reconnect after test)
437
+ connection_success = await self.dialog_session.client.start_connection()
438
+ if not connection_success:
439
+ SDKLogger.error("[Speech] Failed to establish WebSocket connection in session")
440
+ return
441
+
442
+ await self.dialog_session.client.start_session()
443
+ SDKLogger.info("[Speech] Speech session started successfully")
444
+
445
+ # Start receiving responses
446
+ receive_task = asyncio.create_task(self.dialog_session.receive_loop())
447
+
448
+ # Start processing ROS microphone data
449
+ audio_task = asyncio.create_task(self._process_ros_microphone_data())
450
+
451
+ # Send hello message
452
+ await self.dialog_session.client.say_hello()
453
+
454
+ # Wait for session to finish
455
+ while self.is_running and not self.dialog_session.is_session_finished:
456
+ await asyncio.sleep(0.1)
457
+
458
+ # Clean up tasks
459
+ receive_task.cancel()
460
+ audio_task.cancel()
461
+
462
+ # Finish session
463
+ await self.dialog_session.client.finish_session()
464
+ while not self.dialog_session.is_session_finished:
465
+ await asyncio.sleep(0.1)
466
+ await self.dialog_session.client.finish_connection()
467
+ await self.dialog_session.client.close()
468
+
469
+ SDKLogger.info(f"[Speech] Dialog session ended, logid: {self.dialog_session.client.logid}")
470
+
471
+ except Exception as e:
472
+ SDKLogger.error(f"[Speech] Session error: {e}")
473
+ finally:
474
+ if self.dialog_session:
475
+ self.dialog_session.cleanup()
476
+
477
+ async def _process_ros_microphone_data(self):
478
+ """Process microphone data from ROS topic"""
479
+ SDKLogger.info("[Speech] Starting ROS microphone data processing")
480
+
481
+ while self.is_running:
482
+ try:
483
+ # Get audio data from ROS microphone
484
+ audio_data = self.microphone.get_data()
485
+
486
+ if audio_data is not None and len(audio_data) > 0:
487
+ # Convert numpy array to bytes if needed
488
+ if isinstance(audio_data, np.ndarray):
489
+ audio_bytes = audio_data.tobytes()
490
+ else:
491
+ audio_bytes = audio_data
492
+
493
+ # Send audio data to dialog service
494
+ await self.dialog_session.client.task_request(audio_bytes)
495
+
496
+ await asyncio.sleep(0.01) # Small delay to prevent CPU overload
497
+
498
+ except Exception as e:
499
+ SDKLogger.warn(f"[Speech] Error processing ROS microphone data: {e}")
500
+ await asyncio.sleep(0.1)
501
+
502
+ def verify_connection(self, app_id: str, access_key: str) -> bool:
503
+ """Set the app ID and access key for the speech system."""
504
+ if not app_id or not access_key:
505
+ SDKLogger.error("[Speech] App ID and Access Key are required")
506
+ return False
507
+
508
+ # Setup WebSocket configuration
509
+ self._setup_websocket_config(app_id, access_key)
510
+ # Use custom ROS-integrated DialogSession with Audio interface
511
+ self.dialog_session = ROSDialogSession(self.ws_config, self.ros_audio, enable_signal_handler=False)
512
+
513
+ # Test connection using event loop
514
+ try:
515
+ import asyncio
516
+ loop = asyncio.new_event_loop()
517
+ asyncio.set_event_loop(loop)
518
+
519
+ try:
520
+ connection_successful = loop.run_until_complete(self.dialog_session.client.start_connection())
521
+ if connection_successful:
522
+ # Close the test connection since we'll reconnect in _async_session_main
523
+ loop.run_until_complete(self.dialog_session.client.close())
524
+ SDKLogger.info("[Speech] WebSocket connected successfully")
525
+ return True
526
+ else:
527
+ SDKLogger.error("[Speech] WebSocket connection failed")
528
+ self.dialog_session = None # Clear failed session
529
+ return False
530
+ finally:
531
+ loop.close()
532
+
533
+ except Exception as e:
534
+ SDKLogger.error(f"[Speech] Failed to test WebSocket connection: {e}")
535
+ self.dialog_session = None # Clear failed session
536
+ return False
537
+
538
+ def start_speech_system(self):
539
+ """Start the speech dialog system with Doubao service."""
540
+ if self.is_running:
541
+ SDKLogger.warn("[Speech] Speech system is already running")
542
+
543
+ if self.dialog_session is None:
544
+ SDKLogger.error("[Speech] Dialog session not initialized. Please call verify_connection() first with valid credentials.")
545
+
546
+ try:
547
+ SDKLogger.info(f"[Speech] Starting speech system")
548
+
549
+ # Start dialog session in separate thread
550
+ self.is_running = True
551
+ self.session_thread = threading.Thread(target=self._run_async_session)
552
+ self.session_thread.daemon = True
553
+ self.session_thread.start()
554
+
555
+ # Wait a bit for connection to establish
556
+ time.sleep(2)
557
+
558
+ SDKLogger.info("[Speech] Speech system started successfully")
559
+
560
+ except Exception as e:
561
+ SDKLogger.error(f"[Speech] Failed to start speech system: {e}")
562
+ self.is_running = False
563
+
564
+ def stop_speech_system(self):
565
+ """Stop the Doubao speech system."""
566
+ if not self.is_running:
567
+ SDKLogger.warn("[Speech] Speech system is not running")
568
+
569
+ try:
570
+ SDKLogger.info("[Speech] Stopping speech system")
571
+
572
+ # Signal to stop
573
+ self.is_running = False
574
+
575
+ # Stop dialog session
576
+ if self.dialog_session:
577
+ self.dialog_session.is_running = False
578
+ self.dialog_session.is_recording = False
579
+ self.dialog_session.is_playing = False
580
+
581
+ # Wait for session thread to finish
582
+ if self.session_thread and self.session_thread.is_alive():
583
+ self.session_thread.join(timeout=5)
584
+
585
+ SDKLogger.info("[Speech] Speech system stopped successfully")
586
+
587
+ except Exception as e:
588
+ SDKLogger.error(f"[Speech] Failed to stop speech system: {e}")
589
+
590
+ def is_system_running(self) -> bool:
591
+ """Check if the speech system is currently running."""
592
+ return self.is_running
593
+
594
+ def get_session_status(self) -> dict:
595
+ """Get current session status information."""
596
+ status = {
597
+ "is_running": self.is_running,
598
+ "has_session": self.dialog_session is not None,
599
+ "session_finished": False,
600
+ "logid": ""
601
+ }
602
+
603
+ if self.dialog_session:
604
+ status["session_finished"] = self.dialog_session.is_session_finished
605
+ if self.dialog_session.client:
606
+ status["logid"] = self.dialog_session.client.logid
607
+
608
+ return status