agora-python-server-sdk 2.1.0__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agora-python-server-sdk might be problematic. Click here for more details.

Files changed (40) hide show
  1. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/PKG-INFO +1 -5
  2. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_audio_frame_observer.py +15 -6
  3. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_ctypes_data.py +3 -3
  4. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_local_user_observer.py +8 -2
  5. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/agora_base.py +1 -1
  6. agora_python_server_sdk-2.1.2/agora/rtc/audio_sessionctrl.py +484 -0
  7. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/local_user_observer.py +1 -1
  8. agora_python_server_sdk-2.1.2/agora/rtc/voice_detection.py +240 -0
  9. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora_python_server_sdk.egg-info/PKG-INFO +2 -6
  10. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora_python_server_sdk.egg-info/SOURCES.txt +2 -0
  11. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/setup.py +1 -1
  12. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/MANIFEST.in +0 -0
  13. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/README.md +0 -0
  14. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/__init__.py +0 -0
  15. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_rtc_connection_observer.py +0 -0
  16. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_video_encoded_frame_observer.py +0 -0
  17. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_video_frame_observer.py +0 -0
  18. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_utils/globals.py +0 -0
  19. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/agora_parameter.py +0 -0
  20. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/agora_service.py +0 -0
  21. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/audio_encoded_frame_sender.py +0 -0
  22. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/audio_frame_observer.py +0 -0
  23. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/audio_pcm_data_sender.py +0 -0
  24. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/audio_vad.py +0 -0
  25. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/local_audio_track.py +0 -0
  26. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/local_user.py +0 -0
  27. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/local_video_track.py +0 -0
  28. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/media_node_factory.py +0 -0
  29. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/remote_audio_track.py +0 -0
  30. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/remote_video_track.py +0 -0
  31. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/rtc_connection.py +0 -0
  32. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/rtc_connection_observer.py +0 -0
  33. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/video_encoded_frame_observer.py +0 -0
  34. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/video_encoded_image_sender.py +0 -0
  35. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/video_frame_observer.py +0 -0
  36. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/video_frame_sender.py +0 -0
  37. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora_python_server_sdk.egg-info/dependency_links.txt +0 -0
  38. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora_python_server_sdk.egg-info/top_level.txt +0 -0
  39. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/pyproject.toml +0 -0
  40. {agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/setup.cfg +0 -0
@@ -1,10 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: agora_python_server_sdk
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: A Python SDK for Agora Server
5
5
  Home-page: https://github.com/AgoraIO-Extensions/Agora-Python-Server-SDK
6
- License: UNKNOWN
7
- Platform: UNKNOWN
8
6
  Classifier: Intended Audience :: Developers
9
7
  Classifier: License :: OSI Approved :: MIT License
10
8
  Classifier: Topic :: Multimedia :: Sound/Audio
@@ -51,5 +49,3 @@ pip install agora_python_server_sdk
51
49
  python agora_rtc/examples/example_audio_pcm_send.py --appId=xxx --channelId=xxx --audioFile=./test_data/demo.pcm --sampleRate=16000 --numOfChannels=1
52
50
  ```
53
51
 
54
-
55
-
@@ -5,6 +5,7 @@ import ctypes
5
5
  from ..audio_frame_observer import *
6
6
  import logging
7
7
  logger = logging.getLogger(__name__)
8
+ #from ..audio_sessionctrl import *
8
9
 
9
10
  ON_RECORD_AUDIO_FRAME_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_int, AGORA_HANDLE, ctypes.c_char_p, ctypes.POINTER(AudioFrameInner))
10
11
  ON_PLAYBACK_AUDIO_FRAME_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_int, AGORA_HANDLE, ctypes.c_char_p, ctypes.POINTER(AudioFrameInner))
@@ -43,6 +44,7 @@ class AudioFrameObserverInner(ctypes.Structure):
43
44
  self.on_ear_monitoring_audio_frame = ON_EAR_MONITORING_AUDIO_FRAME_CALLBACK(self._on_ear_monitoring_audio_frame)
44
45
  self.on_playback_audio_frame_before_mixing = ON_PLAYBACK_AUDIO_FRAME_BEFORE_MIXING_CALLBACK(self._on_playback_audio_frame_before_mixing)
45
46
  self.on_get_audio_frame_position = ON_GET_AUDIO_FRAME_POSITION_CALLBACK(self._on_get_audio_frame_position)
47
+ self._session_ctrl_manager = None #SessionCtrlManager()
46
48
 
47
49
  # self.on_get_playback_audio_frame_param = ON_GET_PLAYBACK_AUDIO_FRAME_PARAM_CALLBACK(self._on_get_playback_audio_frame_param)
48
50
  # self.on_get_record_audio_frame_param = ON_GET_RECORD_AUDIO_FRAME_PARAM_CALLBACK(self._on_get_record_audio_frame_param)
@@ -51,30 +53,37 @@ class AudioFrameObserverInner(ctypes.Structure):
51
53
 
52
54
  def _on_record_audio_frame(self, local_user_handle, channel_id, audio_frame_inner):
53
55
  logger.debug(f"AudioFrameObserverInner _on_record_audio_frame: {local_user_handle}, {channel_id}, {audio_frame_inner}")
54
- channel_id_str = channel_id.decode('utf-8')
56
+ channel_id_str = channel_id.decode('utf-8') if channel_id else ""
55
57
  frame = audio_frame_inner.contents.get()
56
58
  ret = self.observer.on_record_audio_frame(self.local_user, channel_id_str, frame)
57
59
  return ret
58
60
 
59
61
  def _on_playback_audio_frame(self, local_user_handle, channel_id, audio_frame_inner):
60
62
  logger.debug(f"AudioFrameObserverInner _on_playback_audio_frame: {local_user_handle}, {channel_id}, {audio_frame_inner}")
61
- channel_id_str = channel_id.decode('utf-8')
63
+ channel_id_str = channel_id.decode('utf-8') if channel_id else ""
62
64
  frame = audio_frame_inner.contents.get()
63
65
  ret = self.observer.on_playback_audio_frame(self.local_user, channel_id_str, frame)
64
66
  return ret
65
67
 
66
68
  def _on_mixed_audio_frame(self, local_user_handle, channel_id, audio_frame_inner):
67
69
  logger.debug(f"AudioFrameObserverInner _on_mixed_audio_frame: {local_user_handle}, {channel_id}, {audio_frame_inner}")
68
- ret = self.observer.on_mixed_audio_frame(self.local_user, audio_frame_inner)
70
+ channel_id_str = channel_id.decode('utf-8') if channel_id else ""
71
+ frame = audio_frame_inner.contents.get()
72
+ ret = self.observer.on_mixed_audio_frame(self.local_user, channel_id_str, frame)
69
73
  return ret
70
74
 
71
75
  def _on_ear_monitoring_audio_frame(self, local_user_handle, audio_frame_inner):
72
76
  logger.debug(f"AudioFrameObserverInner _on_ear_monitoring_audio_frame: {local_user_handle}, {audio_frame_inner}")
73
- ret = self.observer.on_ear_monitoring_audio_frame(self.local_user, audio_frame_inner)
77
+ frame = audio_frame_inner.contents.get()
78
+ ret = self.observer.on_ear_monitoring_audio_frame(self.local_user, frame)
74
79
  return ret
75
80
 
76
- def _on_playback_audio_frame_before_mixing(self, local_user_handle, channel_id, user_id, audio_frame_inner: AudioFrameInner):
77
- # logger.debug(f"AudioFrameObserverInner _on_playback_audio_frame_before_mixing: {local_user_handle}, {channel_id}, {user_id}, {audio_frame_inner}")
81
+ def _on_playback_audio_frame_before_mixing(self, local_user_handle, channel_id, user_id, audio_frame_inner):
82
+ #session control here !
83
+ #ret, c_data = self._session_ctrl_manager.process_audio_frame(user_id, audio_frame_inner.contents.buffer, audio_frame_inner.contents.samples_per_channel)
84
+
85
+ #print("ret = ", ret)
86
+ #logger.debug(f"AudioFrameObserverInner _on_playback_audio_frame_before_mixing: {local_user_handle}, {channel_id}, {user_id}, {audio_frame_inner}")
78
87
  if channel_id is None:
79
88
  channel_id_str = ""
80
89
  else:
@@ -979,7 +979,7 @@ class EncodedAudioFrameInfoInner(ctypes.Structure):
979
979
 
980
980
  class AudioVolumeInfoInner(ctypes.Structure):
981
981
  _fields_ = [
982
- ("user_id", ctypes.c_uint),
982
+ ("user_id", user_id_t),
983
983
  ("volume", ctypes.c_uint),
984
984
  ("vad", ctypes.c_uint),
985
985
  ("voicePitch", ctypes.c_double)
@@ -987,10 +987,10 @@ class AudioVolumeInfoInner(ctypes.Structure):
987
987
 
988
988
  def get(self):
989
989
  return AudioVolumeInfo(
990
- user_id=self.user_id,
990
+ user_id=self.user_id.decode() if self.user_id else "",
991
991
  volume=self.volume,
992
992
  vad=self.vad,
993
- voicePitch=self.voicePitch
993
+ voice_pitch=self.voicePitch
994
994
  )
995
995
 
996
996
 
@@ -272,8 +272,14 @@ class RTCLocalUserObserverInner(ctypes.Structure):
272
272
 
273
273
  def _on_audio_volume_indication(self, local_user_handle, audio_volume_info_ptr, speaker_number, total_volume):
274
274
  logger.debug(f"LocalUserCB _on_audio_volume_indication: {local_user_handle}, {audio_volume_info_ptr}, {speaker_number}, {total_volume}")
275
- audio_volume_info = audio_volume_info_ptr.contents
276
- self.local_user_observer.on_audio_volume_indication(self.local_user, audio_volume_info, speaker_number, total_volume)
275
+
276
+ # enum
277
+ audio_volume_info_list = []
278
+ for i in range(speaker_number):
279
+ speaker_info = audio_volume_info_ptr[i]
280
+ audio_volume_info = audio_volume_info_ptr[i].get()
281
+ audio_volume_info_list.append(audio_volume_info)
282
+ self.local_user_observer.on_audio_volume_indication(self.local_user, audio_volume_info_list, speaker_number, total_volume)
277
283
 
278
284
  def _on_active_speaker(self, local_user_handle, user_id):
279
285
  logger.debug(f"LocalUserCB _on_active_speaker: {local_user_handle}, {user_id}")
@@ -222,7 +222,7 @@ class AudioFrame:
222
222
 
223
223
  @dataclass(frozen=True, kw_only=True)
224
224
  class AudioVolumeInfo:
225
- user_id: int
225
+ user_id: str
226
226
  volume: int
227
227
  vad: int
228
228
  voice_pitch: float
@@ -0,0 +1,484 @@
1
+ import time
2
+ import ctypes
3
+
4
+ import os
5
+ import sys
6
+ from enum import Enum,IntEnum
7
+ import logging
8
+ logger = logging.getLogger(__name__)
9
+
10
+ from . import lib_path
11
+
12
+ #dll def
13
+
14
+ if sys.platform == 'darwin':
15
+ agora_sessionctrl_lib_path =os.path.join(lib_path, 'libagora_session_control.dylib')
16
+ elif sys.platform == 'linux':
17
+ agora_sessionctrl_lib_path =os.path.join(lib_path, 'libagora_session_control.so')
18
+ try:
19
+ sessctrl_lib = ctypes.CDLL(agora_sessionctrl_lib_path)
20
+ except OSError as e:
21
+ logger.error(f"Error loading the library: {e}")
22
+ logger.error(f"Attempted to load from: {agora_sessionctrl_lib_path}")
23
+ sys.exit(1)
24
+
25
+
26
+ #const & enu define
27
+ from enum import Enum
28
+
29
+ # Define constants
30
+ AGORA_UAP_SESSCTRL_VERSION = 20240626
31
+ AGORA_UAP_SESSCTRL_MAX_USERID_LEN = 128
32
+ AGORA_UAP_SESSCTRL_MAX_FRMSZ = 1000
33
+ AGORA_UAP_SESSCTRL_DEBUG_DUMP = 0
34
+ AGORA_UAP_SESSCTRL_COUNTER_ZERO_HISTOGRAM_NUM = 8
35
+ AGORA_UAP_SESSCTRL_COUNTER_LAST_WORD_DELAY_HISTOGRAM_NUM = 11
36
+ AGORA_UAP_SESSCTRL_COUNTER_FIRST_WORD_DELAY_HISTOGRAM_NUM = 11
37
+ AGORA_UAP_SESSCTRL_COUNTER_VAD_LENGTH_HISTOGRAM_NUM = 12
38
+ AGORA_UAP_SESSCTRL_COUNTER_SILENCE_LENGTH_HISTOGRAM_NUM = 18
39
+ AGORA_UAP_SESSCTRL_COUNTER_INPUT_VOLUME_HISTOGRAM_NUM = 10
40
+
41
+ # Enum for sample rate
42
+ class SessCtrlFs(ctypes.c_int):
43
+ kFs_16000 = 16000
44
+ kFs_32000 = 32000
45
+ kFs_44100 = 44100
46
+ kFs_48000 = 48000
47
+ kFs_24000 = 24000
48
+
49
+ # Enum for session control status
50
+ class SessCtrlStatus(ctypes.c_int):
51
+ kSCStatus_None = 0 # Not in Session
52
+ kSCStatus_SOS = 1 # Start Of Sentence
53
+ kSCStatus_Continue = 2 # Continue sending data during a sentence
54
+ kSCStatus_Wait4Dec = 3 # Waiting status, It will change status to EOS if waiting for M ms
55
+ kSCStatus_EOS = 4 # End Of Sentence
56
+ kSCStatus_EOSRETRY = 5 # Retry of EOS event to enforce "final" from ASR
57
+ kSCStatus_EOSRETRYSTOP = 6 # Stop EOS retry because over max iteration[eosRetryMaxIteration]
58
+ kSCStatus_EOSDISCONNECT = 7 # Disconnect ASR server at EOS
59
+ kSCStatus_Cnt = 8 # Number of status
60
+
61
+ # Enum for ASR events
62
+ class SessCtrlAsrEvent(ctypes.c_int):
63
+ kSCAsrEvent_NONFINAL = 0 # "final"-waiting has been timeout
64
+ kSCAsrEvent_FINAL = 1 # A "final" event has been received from ASR service
65
+ kSCAsrEvent_TIME_OUT = 2 # Time out event
66
+ kSCAsrEvent_Cnt = 3 # Number of events
67
+
68
+ # Enum for sentence finalization status
69
+ class SessCtrlSentenceFinal(ctypes.c_int):
70
+ kSCSentence_NONFINAL = 0 # Non-final
71
+ kSCSentence_FINAL = 1 # Final
72
+ kSCSentence_UNKNOWN = 2 # Unknown
73
+
74
+
75
+ # Define the structures
76
+ class SessCtrl_StaticCfg(ctypes.Structure):
77
+ _fields_ = [
78
+ ("userID", ctypes.c_char_p),
79
+ ("frmSz", ctypes.c_int),
80
+ ("smplFrq", ctypes.c_int),
81
+ ("persistentVoiceLenOfSOS", ctypes.c_int),
82
+ ("prePaddingLenOfSessCtrlSOS", ctypes.c_int),
83
+ ("postPaddingLenOfSessCtrlEOS", ctypes.c_int),
84
+ ("unVoiceLenOfTriggerSessCtrlEOS", ctypes.c_int),
85
+ ("unVoiceLenOfTriggerServerEOS", ctypes.c_int),
86
+ ("eosWaitTime", ctypes.c_int),
87
+ ("eosRetryWaitTime", ctypes.c_int),
88
+ ("eosRetryPadding", ctypes.c_int),
89
+ ("eosRetryMaxIteration", ctypes.c_int),
90
+ ("enableMainSpeakerDet", ctypes.c_int)
91
+ ]
92
+ def __init__(self):
93
+ self.userID = ctypes.c_char_p("")
94
+ self.frmSz = 160
95
+ self.smplFrq = 16000
96
+ self.persistentVoiceLenOfSOS = 0
97
+ self.prePaddingLenOfSessCtrlSOS = 0
98
+ self.postPaddingLenOfSessCtrlEOS = 0
99
+ self.unVoiceLenOfTriggerSessCtrlEOS = 0
100
+ pass
101
+
102
+ class MSJudge_Param(ctypes.Structure):
103
+ _fields_ = [
104
+ ("powScale", ctypes.c_float),
105
+ ("powRatio", ctypes.c_float),
106
+ ("biasDelay", ctypes.c_int),
107
+ ("aggressive", ctypes.c_float),
108
+ ("voiceProbThr", ctypes.c_float),
109
+ ("suppressGain", ctypes.c_float),
110
+ ("mainSpeakerMaintance", ctypes.c_int)
111
+ ]
112
+ def __init__(self):
113
+ pass
114
+
115
+
116
+ class SessCtrl_DynamCfg(ctypes.Structure):
117
+ _fields_ = [
118
+ ("logLv", ctypes.c_int),
119
+ ("meterRMSThr", ctypes.c_float),
120
+ ("vadThr", ctypes.c_float),
121
+ ("musicGateFlag", ctypes.c_int),
122
+ ("musicThr", ctypes.c_float),
123
+ ("sessCtrlBSVoiceGateFlag", ctypes.c_int),
124
+ ("sessCtrlBSVoiceAggressive", ctypes.c_float),
125
+ ("voiceThr", ctypes.c_float),
126
+ ("sessCtrlFinalRMSThr", ctypes.c_int),
127
+ ("sessCtrlFinalThr", ctypes.c_int),
128
+ ("sessCtrlFinalThrInc", ctypes.c_int),
129
+ ("sessCtrlFinalThrMax", ctypes.c_int),
130
+ ("vadVolumeThr", ctypes.c_float),
131
+ ("sessCtrlTimeOutInMs", ctypes.c_int),
132
+ ("sessCtrlStartSniffWordGapInMs", ctypes.c_int),
133
+ ("sessCtrlWordGapLenInMs", ctypes.c_int),
134
+ ("sessCtrlWordGapLenVolumeThr", ctypes.c_int),
135
+ ("sessCtrlEOSDisconnectFlag", ctypes.c_int),
136
+ ("sessCtrlAiVadBasedDenoiseFlag", ctypes.c_int),
137
+ ("sessCtrlAiVadBasedDenoiseDelayInMs", ctypes.c_int),
138
+ ("sessCtrlAiVadBasedVoiceDenoiseProbThr", ctypes.c_float),
139
+ ("sessCtrlAiVadBasedMusicDenoiseProbThr", ctypes.c_float),
140
+ ("sessCtrlEnableDumpFlag", ctypes.c_int),
141
+ ("msJude_param", MSJudge_Param)
142
+ ]
143
+ def __init__(self):
144
+ pass
145
+
146
+ class SessCtrl_FrmCtrl(ctypes.Structure):
147
+ _fields_ = [
148
+ ("trash", ctypes.c_int)
149
+ ]
150
+ def __init__(self):
151
+ pass
152
+
153
+
154
+ class SessCtrl_InputData(ctypes.Structure):
155
+ _fields_ = [
156
+ ("pcm", ctypes.POINTER(ctypes.c_short)),
157
+ ("frmIdx", ctypes.c_int),
158
+ ("ts", ctypes.c_long)
159
+ ]
160
+ def __init__(self):
161
+ self.pcm = ctypes.c_void_p(0)
162
+ self.frmIdx = 0
163
+ self.ts = 0
164
+ pass
165
+
166
+ class SessCtrl_OutputData(ctypes.Structure):
167
+ _fields_ = [
168
+ ("userID", ctypes.c_char_p),
169
+ ("sessID", ctypes.c_int),
170
+ ("status", SessCtrlStatus),
171
+ ("pcmBuf", ctypes.POINTER(ctypes.c_short)),
172
+ ("nSamplesInPcmBuf", ctypes.c_int),
173
+ ("eosWaitTimeInMs", ctypes.c_int),
174
+ ("startFrmIdx", ctypes.c_int),
175
+ ("startTs", ctypes.c_long),
176
+ ("lastVoiceTs", ctypes.c_long),
177
+ ("avgVadScore", ctypes.c_float),
178
+ ("avgRMS", ctypes.c_float)
179
+ ]
180
+ def __init__(self):
181
+ self.nSamplesInPcmBuf = 0
182
+
183
+ pass
184
+
185
+
186
+ class SessCtrl_AsrResponse(ctypes.Structure):
187
+ _fields_ = [
188
+ ("sessionID", ctypes.c_int),
189
+ ("event", SessCtrlAsrEvent),
190
+ ("startDataTime", ctypes.c_long),
191
+ ("durationTime", ctypes.c_int)
192
+ ]
193
+ def __init__(self):
194
+ pass
195
+
196
+ class SessCtrl_AsrHandleResponse(ctypes.Structure):
197
+ _fields_ = [
198
+ ("sentenceFinal", SessCtrlSentenceFinal)
199
+ ]
200
+ def __init__(self):
201
+ pass
202
+
203
+
204
+ class SessCtrl_GetData(ctypes.Structure):
205
+ _fields_ = [
206
+ ("trash", ctypes.c_int)
207
+ ]
208
+ def __init__(self):
209
+ pass
210
+
211
+ class SessCtrl_Counter(ctypes.Structure):
212
+ _fields_ = [
213
+ ("rmsVadDataLenInMs", ctypes.c_int),
214
+ ("rmsVadReportPeriodInMs", ctypes.c_int),
215
+ ("asrDataLenInMs", ctypes.c_int),
216
+ ("asrDataReportPeriodInMs", ctypes.c_int)
217
+ ]
218
+ def __init__(self):
219
+ pass
220
+
221
+ class SessCtrl_EventCounter(ctypes.Structure):
222
+ _fields_ = [
223
+ ("remoteUid", ctypes.c_long),
224
+ ("sessCtrlReportNumOfFinalInSession", ctypes.c_int),
225
+ ("sessCtrlReportNumOfFinalBetweenSession", ctypes.c_int),
226
+ ("sessCtrlReportNumOfFinalCrossSession", ctypes.c_int),
227
+ ("sessCtrlReportEOSNumbers", ctypes.c_int),
228
+ ("sessCtrlReportInputLength", ctypes.c_int),
229
+ ("sessCtrlReportOutputLength", ctypes.c_int),
230
+ ("sessCtrlReportZeroDataLength", ctypes.c_int),
231
+ ("sessCtrlReportSendZeroDataLenHistogram", ctypes.c_int * 8),
232
+ ("sessCtrlReportLastWordDelayHistogram", ctypes.c_int * 8),
233
+ ("sessCtrlReportFirstWordDelayHistogram", ctypes.c_int * 8),
234
+ ("sessCtrlReportVadLengthHistogram", ctypes.c_int * 8),
235
+ ("sessCtrlReportSilenceLengthHistogram", ctypes.c_int * 8),
236
+ ("sessCtrlReportVadProbHistogram", ctypes.c_int * 8),
237
+ ("sessCtrlReportSilenceProbHistogram", ctypes.c_int * 8),
238
+ ("sessCtrlReportInputVolumeHistogram", ctypes.c_int * 8)
239
+ ]
240
+ def __init__(self):
241
+ pass
242
+
243
+
244
+
245
+ # Function prototypes
246
+ #AGORA_API int Agora_UAP_SessCtrl_create(void** stPtr);
247
+ Agora_UAP_SessCtrl_create = sessctrl_lib.Agora_UAP_SessCtrl_create
248
+ Agora_UAP_SessCtrl_create.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
249
+ Agora_UAP_SessCtrl_create.restype = ctypes.c_int
250
+
251
+
252
+
253
+ #AGORA_API int Agora_UAP_SessCtrl_destroy(void** stPtr);
254
+ Agora_UAP_SessCtrl_destroy = sessctrl_lib.Agora_UAP_SessCtrl_destroy
255
+ Agora_UAP_SessCtrl_destroy.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
256
+ Agora_UAP_SessCtrl_destroy.restype = ctypes.c_int
257
+
258
+
259
+ #AGORA_API int Agora_UAP_SessCtrl_counterEventReport(void* stPtr,SessCtrl_EventCounter* pEventCounter);
260
+ Agora_UAP_SessCtrl_counterEventReport = sessctrl_lib.Agora_UAP_SessCtrl_counterEventReport
261
+ Agora_UAP_SessCtrl_counterEventReport.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_EventCounter)]
262
+ Agora_UAP_SessCtrl_counterEventReport.restype = ctypes.c_int
263
+
264
+
265
+ #AGORA_API int Agora_UAP_SessCtrl_memAllocate(void* stPtr, const SessCtrl_StaticCfg* pCfg);
266
+ Agora_UAP_SessCtrl_memAllocate = sessctrl_lib.Agora_UAP_SessCtrl_memAllocate
267
+ Agora_UAP_SessCtrl_memAllocate.restype = ctypes.c_int
268
+ Agora_UAP_SessCtrl_memAllocate.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_StaticCfg)]
269
+
270
+ #AGORA_API int Agora_UAP_SessCtrl_init(void* stPtr);
271
+
272
+ Agora_UAP_SessCtrl_init = sessctrl_lib.Agora_UAP_SessCtrl_init
273
+ Agora_UAP_SessCtrl_init.argtypes = [ctypes.c_void_p]
274
+ Agora_UAP_SessCtrl_init.restype = ctypes.c_int
275
+
276
+
277
+ #AGORA_API int Agora_UAP_SessCtrl_setDynamCfg(void* stPtr, const SessCtrl_DynamCfg* pCfg);
278
+ Agora_UAP_SessCtrl_setDynamCfg = sessctrl_lib.Agora_UAP_SessCtrl_setDynamCfg
279
+ Agora_UAP_SessCtrl_setDynamCfg.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_DynamCfg)]
280
+ Agora_UAP_SessCtrl_setDynamCfg.restype = ctypes.c_int
281
+
282
+ #AGORA_API int Agora_UAP_SessCtrl_getStaticCfg(const void* stPtr, SessCtrl_StaticCfg* pCfg);
283
+ Agora_UAP_SessCtrl_getStaticCfg = sessctrl_lib.Agora_UAP_SessCtrl_getStaticCfg
284
+ Agora_UAP_SessCtrl_getStaticCfg.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_StaticCfg)]
285
+ Agora_UAP_SessCtrl_getStaticCfg.restype = ctypes.c_int
286
+
287
+ #AGORA_API int Agora_UAP_SessCtrl_getDefaultStaticCfg(SessCtrl_StaticCfg* pCfg);
288
+ Agora_UAP_SessCtrl_getDefaultStaticCfg = sessctrl_lib.Agora_UAP_SessCtrl_getDefaultStaticCfg
289
+ Agora_UAP_SessCtrl_getDefaultStaticCfg.argtypes = [ctypes.POINTER(SessCtrl_StaticCfg)]
290
+ Agora_UAP_SessCtrl_getDefaultStaticCfg.restype = ctypes.c_int
291
+
292
+ #AGORA_API int Agora_UAP_SessCtrl_getDynamCfg(const void* stPtr, SessCtrl_DynamCfg* pCfg);
293
+ Agora_UAP_SessCtrl_getDynamCfg = sessctrl_lib.Agora_UAP_SessCtrl_getDynamCfg
294
+ Agora_UAP_SessCtrl_getDynamCfg.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_DynamCfg)]
295
+ Agora_UAP_SessCtrl_getDynamCfg.restype = ctypes.c_int
296
+
297
+ #AGORA_API int Agora_UAP_SessCtrl_getDefaultDynamCfg(SessCtrl_FrmCtrl* frmCtrlPtr, SessCtrl_DynamCfg* pDynamCfg);
298
+ Agora_UAP_SessCtrl_getDefaultDynamCfg = sessctrl_lib.Agora_UAP_SessCtrl_getDefaultDynamCfg
299
+ Agora_UAP_SessCtrl_getDefaultDynamCfg.argtypes = [ctypes.POINTER(SessCtrl_FrmCtrl), ctypes.POINTER(SessCtrl_DynamCfg)]
300
+ Agora_UAP_SessCtrl_getDefaultDynamCfg.restype = ctypes.c_int
301
+
302
+ #AGORA_API int Agora_UAP_SessCtrl_getCounter(void* stPtr, SessCtrl_Counter* pCounter);
303
+ Agora_UAP_SessCtrl_getCounter = sessctrl_lib.Agora_UAP_SessCtrl_getCounter
304
+ Agora_UAP_SessCtrl_getCounter.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_Counter)]
305
+ Agora_UAP_SessCtrl_getCounter.restype = ctypes.c_int
306
+
307
+ #AGORA_API int Agora_UAP_SessCtrl_proc(void* stPtr, const SessCtrl_FrmCtrl* pCtrl, const SessCtrl_InputData* pIn,SessCtrl_OutputData* pOut);
308
+ Agora_UAP_SessCtrl_proc = sessctrl_lib.Agora_UAP_SessCtrl_proc
309
+ Agora_UAP_SessCtrl_proc.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_FrmCtrl), ctypes.POINTER(SessCtrl_InputData), ctypes.POINTER(SessCtrl_OutputData)]
310
+ Agora_UAP_SessCtrl_proc.restype = ctypes.c_int
311
+
312
+ #AGORA_API int Agora_UAP_SessCtrl_handleAsrResponse(void* stPtr, const SessCtrl_AsrResponse* pAsrResponse, SessCtrl_OutputData* pOut,SessCtrl_AsrHandleResponseFinal* pFinal);
313
+
314
+
315
+
316
+ class SessionControl:
317
+ def __init__(self, userid:ctypes.c_char_p):
318
+ self._handler = ctypes.c_void_p(0)
319
+ self._static_config = SessCtrl_StaticCfg()
320
+ self._dynamic_config = SessCtrl_DynamCfg()
321
+ self._initialized = False
322
+ #pre allocated null buffer struct for proc
323
+ self._sessctrl_in_data = SessCtrl_InputData()
324
+ self._sessctrl_out_data = SessCtrl_OutputData()
325
+ self._frm_ctrl = SessCtrl_FrmCtrl()
326
+ self._frm_count = 0
327
+ self._user_id = userid #str type
328
+ ret = self._prepare_sessctrl_cfg()
329
+ # for lifetime control
330
+ self._last_access_time = time.time()*1000 #unit in ms
331
+ pass
332
+ def _prepare_sessctrl_cfg(self) -> int:
333
+
334
+ #get default config and default frame config
335
+ ret_static = Agora_UAP_SessCtrl_getDefaultStaticCfg(ctypes.byref(self._static_config))
336
+ ret_dynamic = Agora_UAP_SessCtrl_getDefaultDynamCfg(ctypes.byref(self._frm_ctrl), ctypes.byref(self._dynamic_config))
337
+
338
+
339
+ #assign value to static config
340
+ self._static_config.userID = ctypes.c_char_p(self._user_id.encode('utf-8'))
341
+ self._static_config.frmSz = 160
342
+ self._static_config.smplFrq = SessCtrlFs.kFs_16000
343
+ self._static_config.persistentVoiceLenOfSOS = 10
344
+ self._static_config.prePaddingLenOfSessCtrlSOS = 0
345
+ self._static_config.postPaddingLenOfSessCtrlEOS = 0
346
+ self._static_config.unVoiceLenOfTriggerSessCtrlEOS = 1000000
347
+ self._static_config.unVoiceLenOfTriggerServerEOS = 0
348
+ self._static_config.eosWaitTime = 0
349
+ self._static_config.eosRetryWaitTime =0
350
+ self._static_config.eosRetryPadding = 0
351
+ self._static_config.eosRetryMaxIteration = 0
352
+
353
+ #assign value to dynamic config
354
+ self._dynamic_config.logLv = 10
355
+ self._dynamic_config.sessCtrlTimeOutInMs = 1000000
356
+ self._dynamic_config.sessCtrlStartSniffWordGapInMs = 1000000
357
+ self._dynamic_config.sessCtrlWordGapLenInMs = 10
358
+ self._dynamic_config.sessCtrlWordGapLenVolumeThr = 0
359
+ self._dynamic_config.sessCtrlEnableDumpFlag = 0
360
+ self._dynamic_config.vadThr = -2
361
+ self._dynamic_config.voiceThr = -2
362
+ self._dynamic_config.sessCtrlFinalRMSThr = 80
363
+ self._dynamic_config.sessCtrlFinalThr = 200
364
+ self._dynamic_config.sessCtrlFinalThrInc = 100
365
+ self._dynamic_config.sessCtrlFinalThrMax = 3
366
+ self._dynamic_config.meterRMSThr = 65
367
+ self._dynamic_config.sessCtrlBSVoiceGateFlag = 1
368
+ self._dynamic_config.sessCtrlBSVoiceAggressive = 4
369
+ self._dynamic_config.sessCtrlAiVadBasedDenoiseFlag = 1
370
+ self._dynamic_config.sessCtrlAiVadBasedDenoiseDelayInMs = 50
371
+ self._dynamic_config.sessCtrlAiVadBasedVoiceDenoiseProbThr = 0.5
372
+ self._dynamic_config.sessCtrlAiVadBasedMusicDenoiseProbThr = 0.5
373
+
374
+ return (ret_static and ret_dynamic)
375
+ def _init(self) -> int:
376
+ if self._initialized:
377
+ return 0
378
+ #create handler
379
+ self._handler = ctypes.c_void_p()
380
+ ret = Agora_UAP_SessCtrl_create(ctypes.byref(self._handler))
381
+ if ret < 0:
382
+ return ret
383
+
384
+ #prepari static config & dynamic configure
385
+ ret = self._prepare_sessctrl_cfg()
386
+
387
+ #memory allocate
388
+ ret = Agora_UAP_SessCtrl_memAllocate(self._handler, ctypes.byref(self._static_config))
389
+ if ret < 0:
390
+ return ret
391
+
392
+ #init
393
+ ret = Agora_UAP_SessCtrl_init(self._handler)
394
+ if ret < 0:
395
+ return ret
396
+
397
+ #set dynamic configure
398
+ ret = Agora_UAP_SessCtrl_setDynamCfg(self._handler, ctypes.byref(self._dynamic_config))
399
+ if ret < 0:
400
+ return ret
401
+ self._initialized = True if ret == 0 else False
402
+ return ret
403
+ def process (self, c_buffer:ctypes.c_void_p, size_in_short: int) -> tuple[int, ctypes.c_void_p]: # return ret, pcm data in bytes. ret is len of uint8
404
+
405
+ #update last access time
406
+ self._last_access_time = time.time()*1000
407
+ self._sessctrl_in_data.pcm = c_buffer
408
+ self._sessctrl_in_data.frmIdx = self._frm_count
409
+ self._frm_count += 1
410
+ #todo:
411
+ #如果mute后,是否有必要销毁session ctrl?---暂时不考虑销毁
412
+ #用户id:和外部对齐
413
+ #功能点:
414
+ #
415
+
416
+ #inputData.ts = (frmCnt * frmSz) / (MT_TEST_FS / 1000);
417
+ self._sessctrl_in_data.ts = self._frm_count * self._static_config.frmSz / (SessCtrlFs.kFs_16000 / 1000)
418
+ self._sessctrl_out_data.status = SessCtrlStatus.kSCStatus_None
419
+ self._sessctrl_out_data.pcmBuf = ctypes.c_void_p(0)
420
+ self._sessctrl_out_data.nSamplesInPcmBuf = 0 #added by me ,ToDo check if needed, parameters need to be rest or not?
421
+
422
+
423
+ ret = Agora_UAP_SessCtrl_proc(self._handler, ctypes.byref(self._frm_ctrl), ctypes.byref(self._sessctrl_in_data), ctypes.byref(self._sessctrl_out_data))
424
+ if ret < 0:
425
+ return ret, ctypes.c_void_p(0) #exit(1) ?? indicate error and do not continue to process next frame? ToDo
426
+ #get output data
427
+ if self._sessctrl_out_data.nSamplesInPcmBuf > 0: #unit: unit16
428
+ ret = self._sessctrl_out_data.nSamplesInPcmBuf * 2 #unit: unit8
429
+ return ret, self._sessctrl_out_data.pcmBuf
430
+ return 0, ctypes.c_void_p(0) #no data
431
+ pass
432
+ def release(self):
433
+ if self._initialized:
434
+ ret = Agora_UAP_SessCtrl_destroy(self._handler)
435
+ self._initialized = False
436
+ self._handler = ctypes.c_void_p(0)
437
+ pass
438
+ def is_expired(self, interval: int) -> bool:
439
+ return ( time.time()*1000 - self._last_access_time > interval )
440
+
441
+ #manager for sessionctrol
442
+ class SessionCtrlManager:
443
+ def __init__(self, update_interval: int = 100, expired_duration: int = 1000*10) -> None:
444
+ self._sessions = {}
445
+ self._last_update_time = time.time()*1000 #in ms
446
+ self._update_interval = update_interval # in ms, every 100ms to do check
447
+ self._expired_duration = expired_duration #10s expired
448
+ pass
449
+ def process_audio_frame(self, userid: ctypes.c_char_p, c_buffer:ctypes.c_void_p, size_in_short: int) -> tuple[int, ctypes.c_void_p]:
450
+ #check & release the expired sessions
451
+ ret = self._update_check()
452
+ #get session
453
+ #do process within session
454
+ session = self._get_session(userid)
455
+ ret, c_datas = session.process(c_buffer, size_in_short)
456
+ return ret, c_datas
457
+ pass
458
+ def _get_session(self, userid:ctypes.c_char_p) -> SessionControl:
459
+ if userid not in self._sessions:
460
+ #add new session & do init process
461
+ session = SessionControl(userid)
462
+ session._init()
463
+ self._sessions[userid] = session
464
+ return self._sessions[userid]
465
+ def _update_check(self) -> None:
466
+ now = time.time()*1000 #in ms
467
+ if now - self._last_update_time < self._update_interval:
468
+ return
469
+ self._last_update_time = now
470
+ for userid in self._sessions:
471
+ session = self._sessions[userid]
472
+ if session.is_expired(self._expired_duration):
473
+ session.release()
474
+ del self._sessions[userid]
475
+ def release(self, userid: int) -> None:
476
+ if userid in self._sessions:
477
+ self._sessions[userid].release()
478
+ del self._sessions[userid]
479
+ pass
480
+ def clear(self) -> None:
481
+ for userid in self._sessions:
482
+ self._sessions[userid].release()
483
+ self._sessions.clear()
484
+ pass
@@ -76,7 +76,7 @@ class IRTCLocalUserObserver():
76
76
  def on_remote_video_track_statistics(self, agora_local_user, agora_remote_video_track, stats):
77
77
  pass
78
78
 
79
- def on_audio_volume_indication(self, agora_local_user, speakers, speaker_number, total_volume):
79
+ def on_audio_volume_indication(self, agora_local_user, speakers_list, speaker_number, total_volume):
80
80
  pass
81
81
 
82
82
  def on_active_speaker(self, agora_local_user, userId):
@@ -0,0 +1,240 @@
1
+ import time
2
+ import ctypes
3
+
4
+ import os
5
+ import sys
6
+ from enum import Enum,IntEnum
7
+ from collections import deque
8
+ from .agora_base import AudioFrame, AudioParams
9
+ import logging
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class AudioVadConfigV2():
13
+ def __init__(self, preStartRecognizeCount:int, startRecognizeCount:int, stopRecognizeCount:int,
14
+ activePercent:float, inactivePercent:float, start_voiceprob: int, stop_voiceporb:int, rmsThreshold:float):
15
+ self.start_recognize_count = startRecognizeCount
16
+ self.pre_start_recognize_count = preStartRecognizeCount
17
+ self.stop_recognize_count = stopRecognizeCount
18
+ self.activePercent = activePercent #percent value = avtivity frames/ total_frames, to determine startspeaking,
19
+ self.inactivePercent = inactivePercent #percent value = inactive_frames/ total_frames, to determine stopspeaking
20
+ #voice prob:
21
+ # The lower the gate threshold, the higher the probability that a frame is judged as activity,
22
+ # which allows the start phase to begin earlier.
23
+ #
24
+ # Conversely, the higher the gate threshold, the lower the probability that a frame is judged as activity,
25
+ # and the higher the probability of being judged as inactivity,
26
+ # which allows the end phase to begin earlier.
27
+ self.start_voiceprob = start_voiceprob #defautl to 70
28
+ self.stop_voiceprob = stop_voiceporb#default to 50
29
+
30
+ #rms: for rmsThreshold, the higher the value, the more sensitive to voice activity.
31
+ # In a quiet environment, it can be set to -50;
32
+ # in a noisy environment, it can be set to a value between -40 and -30.
33
+
34
+ self.start_rms = rmsThreshold #default to -50
35
+ self.stop_rms = rmsThreshold #default to -50
36
+
37
+ pass
38
+
39
+ class VadDataV2:
40
+ def __init__(self, data: AudioFrame, is_activity: bool):
41
+ self._audio_frame = data
42
+ #self.timestamp = time.time()
43
+ self._is_activity = is_activity
44
+ pass
45
+
46
+ class AudioVadV2():
47
+ _kIntervalPerAudioFrameInMS = 10
48
+ _kMaxChunkSizePer10MSFor16K = 320 #bytes, ??/ for diff sampleRate,its still
49
+ _kMaxChunkSizePer10MSFor32K = 640 #bytes, ??/ for diff sampleRate
50
+ _kMaxChunkSizePer10MSFor48K = 960 #bytes, ??/ for diff sampleRate
51
+ _vad_state_nonspeaking = 0
52
+ _vad_state_startspeaking = 1
53
+ _vad_state_speaking = 2
54
+ _vad_state_stopspeaking = 3
55
+
56
+ def __init__(self, config: AudioVadConfigV2):
57
+ self._vad_configure = config
58
+ self._cur_state = self._vad_state_nonspeaking #0: non-speaking, 1-start speaking, 2-speaking, 3-stop speaking
59
+ self._data = bytearray()
60
+ self._start_size = self._vad_configure.pre_start_recognize_count + self._vad_configure.start_recognize_count
61
+ self._start_queue = deque(maxlen=self._start_size)
62
+ self._stop_queue = deque(maxlen=self._vad_configure.stop_recognize_count)
63
+ #trend queue: not impl in this version date: 2024-10-29
64
+ self._trend_queue = None #deque(maxlen=self._vad_configure.stop_recognize_count)
65
+ self._trend_window = self._vad_configure.stop_recognize_count//2
66
+
67
+
68
+
69
+
70
+
71
+ def _push_to_start(self, data: VadDataV2) -> tuple[int,bool]:
72
+ self._start_queue.append(data)
73
+ size = len(self._start_queue)
74
+ return size, size >= self._start_size
75
+ def _push_to_stop(self, data: VadDataV2) -> tuple[int,bool]:
76
+ self._stop_queue.append(data)
77
+ size = len(self._stop_queue)
78
+ return size, size >= self._vad_configure.stop_recognize_count
79
+ def _push_to_trend(self, data: VadDataV2) -> tuple[int,bool]:
80
+ self._trend_queue.append(data)
81
+ size = len(self._trend_queue)
82
+ return size, size >= self._trend_window
83
+ def _sum(self,quue: deque) -> int:
84
+ return sum(1 for item in quue if item._is_activity == True)
85
+ def _calculate_sliding_window_ratio(self, arr:deque, window_size:int)-> list[float]:
86
+ ratios = []
87
+ #slide window
88
+ seperator_index = len(arr)//2
89
+ count_ones = 0
90
+
91
+
92
+ for i, item in enumerate(arr,start=0):
93
+ if i < seperator_index:
94
+ count_ones += 1 if item._is_activity == True else 0
95
+ elif i == seperator_index:
96
+ ratios.append(count_ones)
97
+ count_ones = 0
98
+ elif i > seperator_index:
99
+ count_ones += 1 if item._is_activity == True else 0
100
+
101
+ ratios.append(count_ones)
102
+
103
+ return ratios
104
+ for start_index in range(len(arr) - window_size + 1):
105
+ count_ones = 0
106
+ for i, item in enumerate(arr,start=start_index):
107
+ if i >= start_index and i < start_index + window_size:
108
+ count_ones += 1 if item._is_activity == True else 0
109
+ ratio = count_ones / window_size
110
+ ratios.append(ratio)
111
+ return ratios
112
+ def _get_trend(self, queue: deque) -> int:
113
+ if len(queue) < self._trend_window:
114
+ return 0
115
+
116
+ ratios = self._calculate_sliding_window_ratio(queue, self._trend_window)
117
+ # 计算趋势
118
+ print(ratios)
119
+ return 1 if ratios[1] > ratios[0] else 0
120
+
121
+ #get silence count from deque: totalcount, silenct_count
122
+ def _get_silence_count(self, queue: deque, start_inx:int) -> tuple[int, int]:
123
+ total = len(queue)
124
+ silence_count = 0
125
+ for i, item in enumerate(queue,start=0):
126
+ if i > start_inx and item._is_activity == False:
127
+ silence_count += 1
128
+ return total, silence_count
129
+
130
+ def _move_deque(self, data:bytearray, queue: deque) ->bytearray:
131
+
132
+ for item in queue: #是否有必要在这对inactive包替换为静音包???依赖实际测试
133
+ data.extend(item._audio_frame.buffer)
134
+ return data
135
+ def _clear_queue(self, queue: deque):
136
+ queue.clear()
137
+ pass
138
+
139
+ def _process_start(self, data: VadDataV2) -> tuple[int, bytearray]:
140
+ size, full = self._push_to_start(data)
141
+ state = self._cur_state
142
+ bytes = bytearray()
143
+
144
+
145
+ if full == True:
146
+ #存在一定的问题:如果pre中就已经是开始在说话了,这个时候就会出现问题,或者漏掉的情况
147
+ #检查start中的比例是否符合阈值,如果符合阈值,zhi,则将start中的数据全部送入到pre中,并且将pre清空,同时将start清空,同时将当前状态设置为speaking
148
+ total, silence_count = self._get_silence_count(self._start_queue, self._vad_configure.pre_start_recognize_count)
149
+ total -= self._vad_configure.pre_start_recognize_count
150
+ if (total - silence_count) / total >= self._vad_configure.activePercent:
151
+ state = self._vad_state_startspeaking
152
+ #move pre & start to a new bytearray
153
+
154
+ self._move_deque(bytes, self._start_queue)
155
+ self._clear_queue(self._start_queue)
156
+
157
+ #and clear pre &start
158
+ self._clear_queue(self._stop_queue)
159
+ print("start speaking:", len(self._stop_queue))
160
+
161
+ return state, bytes
162
+
163
+ def _process_speaking(self, data: VadDataV2) -> tuple[int, bytearray]:
164
+ #将数据append 到stop中
165
+ #如果数据满,怎判断是否触发stop
166
+ state = self._cur_state
167
+ size, full = self._push_to_stop(data)
168
+ print(f"stop: {size}, {full}")
169
+
170
+
171
+ if full == True:
172
+ #trend check
173
+ trend = self._get_trend(self._stop_queue)
174
+ #检查stop中的比例是否符合阈值,
175
+ # 如果符合阈值,同时清空stop 清空,并且将当前状态设置为non-speaking
176
+ total, silence_count = self._get_silence_count(self._stop_queue,0)
177
+ if (silence_count) / total >= (self._vad_configure.inactivePercent):
178
+ state = self._vad_state_stopspeaking
179
+ self._clear_queue(self._stop_queue)
180
+ #print(f"stop speaking: {len(self._start_queue)}, {silence_count}, {total}, {trend}")
181
+ return state, data._audio_frame.buffer
182
+
183
+
184
+
185
+
186
+
187
+ def process(self, data:AudioFrame) -> tuple[int, bytearray]:
188
+ is_activity = self._is_vad_active(data)
189
+ vad_data = VadDataV2(data, is_activity)
190
+ # Determine the current state.
191
+ # The buffer divided into three parts: pre, start, and stop.
192
+ # The Voice Activity Detection (VAD) has two major states: silent and speaking.
193
+ # Case 1: If the current state is non-speaking,
194
+ # save the data into 'pre';
195
+ # simultaneously save the data into 'start';
196
+ # if 'start' is full, determine if speaking is triggered:
197
+ # if speaking is triggered, move all data in pre and start to a new bytearray, and clear both 'pre' and 'start',
198
+ # then set the current state to speaking.
199
+ # if speaking is not triggered, append the data to 'start'.
200
+ # Case 2: If the current state is speaking,
201
+ # save the data in 'stop';
202
+ # if 'stop' is full, determine if stop is triggered:
203
+ # if stop is triggered, move all data in 'stop' to a new bytearray, and clear 'stop'
204
+ # and set the current state to non-speaking.
205
+ # if stop is not triggered, append the data to 'stop'.
206
+ state = self._cur_state
207
+ if self._cur_state == self._vad_state_nonspeaking: #当前状态是静音
208
+ state, data = self._process_start(vad_data)
209
+ if state == self._vad_state_startspeaking:
210
+ self._cur_state = self._vad_state_speaking
211
+
212
+ return state, data
213
+ if self._cur_state == self._vad_state_speaking:
214
+ state, data = self._process_speaking(vad_data)
215
+ if state == self._vad_state_stopspeaking:
216
+ self._cur_state = self._vad_state_nonspeaking
217
+ return state, data
218
+ #default: shoud never happen
219
+ return int(-100), bytearray()
220
+
221
+ """
222
+ def _is_vad_active(self, data: AudioFrame) -> bool:
223
+ """
224
+
225
+ def _is_vad_active(self, data: AudioFrame) -> bool:
226
+ voice_prob = 0
227
+ rms_prob = 0
228
+ if self._cur_state == self._vad_state_speaking:
229
+ voice_prob = self._vad_configure.stop_voiceprob
230
+ rms_prob = self._vad_configure.stop_rms
231
+ else:
232
+ voice_prob = self._vad_configure.start_voiceprob
233
+ rms_prob = self._vad_configure.start_rms
234
+
235
+ #case2
236
+ #if data.far_field_flag == 1 and data.voice_prob > voice_prob :#and data.pitch > 0 : #voice: from 75 to 50
237
+ #case4: rms > -40
238
+ if data.far_field_flag == 1 and data.voice_prob > voice_prob and data.rms > rms_prob :#and data.pitch > 0 : #voice: from 75 to 50
239
+ return True
240
+ return False
@@ -1,10 +1,8 @@
1
1
  Metadata-Version: 2.1
2
- Name: agora-python-server-sdk
3
- Version: 2.1.0
2
+ Name: agora_python_server_sdk
3
+ Version: 2.1.2
4
4
  Summary: A Python SDK for Agora Server
5
5
  Home-page: https://github.com/AgoraIO-Extensions/Agora-Python-Server-SDK
6
- License: UNKNOWN
7
- Platform: UNKNOWN
8
6
  Classifier: Intended Audience :: Developers
9
7
  Classifier: License :: OSI Approved :: MIT License
10
8
  Classifier: Topic :: Multimedia :: Sound/Audio
@@ -51,5 +49,3 @@ pip install agora_python_server_sdk
51
49
  python agora_rtc/examples/example_audio_pcm_send.py --appId=xxx --channelId=xxx --audioFile=./test_data/demo.pcm --sampleRate=16000 --numOfChannels=1
52
50
  ```
53
51
 
54
-
55
-
@@ -9,6 +9,7 @@ agora/rtc/agora_service.py
9
9
  agora/rtc/audio_encoded_frame_sender.py
10
10
  agora/rtc/audio_frame_observer.py
11
11
  agora/rtc/audio_pcm_data_sender.py
12
+ agora/rtc/audio_sessionctrl.py
12
13
  agora/rtc/audio_vad.py
13
14
  agora/rtc/local_audio_track.py
14
15
  agora/rtc/local_user.py
@@ -23,6 +24,7 @@ agora/rtc/video_encoded_frame_observer.py
23
24
  agora/rtc/video_encoded_image_sender.py
24
25
  agora/rtc/video_frame_observer.py
25
26
  agora/rtc/video_frame_sender.py
27
+ agora/rtc/voice_detection.py
26
28
  agora/rtc/_ctypes_handle/_audio_frame_observer.py
27
29
  agora/rtc/_ctypes_handle/_ctypes_data.py
28
30
  agora/rtc/_ctypes_handle/_local_user_observer.py
@@ -45,7 +45,7 @@ class CustomInstallCommand(install):
45
45
 
46
46
  setup(
47
47
  name='agora_python_server_sdk',
48
- version='2.1.0',
48
+ version='2.1.2',
49
49
  description='A Python SDK for Agora Server',
50
50
  long_description=open('README.md').read(),
51
51
  long_description_content_type='text/markdown',