PyPI - agora-python-server-sdk - Versions diffs - 2.1.0__tar.gz → 2.1.2__tar.gz - Mend

agora-python-server-sdk 2.1.0tar.gz → 2.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of agora-python-server-sdk might be problematic. Click here for more details.

Files changed (40) hide show

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,8 @@
 Metadata-Version: 2.1
 Name: agora_python_server_sdk
-Version: 2.1.0
+Version: 2.1.2
 Summary: A Python SDK for Agora Server
 Home-page: https://github.com/AgoraIO-Extensions/Agora-Python-Server-SDK
-License: UNKNOWN
-Platform: UNKNOWN
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Topic :: Multimedia :: Sound/Audio
@@ -51,5 +49,3 @@ pip install agora_python_server_sdk
 python agora_rtc/examples/example_audio_pcm_send.py --appId=xxx --channelId=xxx --audioFile=./test_data/demo.pcm --sampleRate=16000 --numOfChannels=1
 ```

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_audio_frame_observer.py RENAMED Viewed

@@ -5,6 +5,7 @@ import ctypes
 from ..audio_frame_observer import *
 import logging
 logger = logging.getLogger(__name__)
+#from ..audio_sessionctrl import *
 ON_RECORD_AUDIO_FRAME_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_int, AGORA_HANDLE, ctypes.c_char_p, ctypes.POINTER(AudioFrameInner))
 ON_PLAYBACK_AUDIO_FRAME_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_int, AGORA_HANDLE, ctypes.c_char_p, ctypes.POINTER(AudioFrameInner))
@@ -43,6 +44,7 @@ class AudioFrameObserverInner(ctypes.Structure):
         self.on_ear_monitoring_audio_frame = ON_EAR_MONITORING_AUDIO_FRAME_CALLBACK(self._on_ear_monitoring_audio_frame)
         self.on_playback_audio_frame_before_mixing = ON_PLAYBACK_AUDIO_FRAME_BEFORE_MIXING_CALLBACK(self._on_playback_audio_frame_before_mixing)
         self.on_get_audio_frame_position = ON_GET_AUDIO_FRAME_POSITION_CALLBACK(self._on_get_audio_frame_position)
+        self._session_ctrl_manager = None #SessionCtrlManager()
         # self.on_get_playback_audio_frame_param = ON_GET_PLAYBACK_AUDIO_FRAME_PARAM_CALLBACK(self._on_get_playback_audio_frame_param)
         # self.on_get_record_audio_frame_param = ON_GET_RECORD_AUDIO_FRAME_PARAM_CALLBACK(self._on_get_record_audio_frame_param)
@@ -51,30 +53,37 @@ class AudioFrameObserverInner(ctypes.Structure):
     def _on_record_audio_frame(self, local_user_handle, channel_id, audio_frame_inner):
         logger.debug(f"AudioFrameObserverInner _on_record_audio_frame: {local_user_handle}, {channel_id}, {audio_frame_inner}")
-        channel_id_str = channel_id.decode('utf-8')
+        channel_id_str = channel_id.decode('utf-8') if channel_id else ""
         frame = audio_frame_inner.contents.get()
         ret = self.observer.on_record_audio_frame(self.local_user, channel_id_str, frame)
         return ret
     def _on_playback_audio_frame(self, local_user_handle, channel_id, audio_frame_inner):
         logger.debug(f"AudioFrameObserverInner _on_playback_audio_frame: {local_user_handle}, {channel_id}, {audio_frame_inner}")
-        channel_id_str = channel_id.decode('utf-8')
+        channel_id_str = channel_id.decode('utf-8') if channel_id else ""
         frame = audio_frame_inner.contents.get()
         ret = self.observer.on_playback_audio_frame(self.local_user, channel_id_str, frame)
         return ret
     def _on_mixed_audio_frame(self, local_user_handle, channel_id, audio_frame_inner):
         logger.debug(f"AudioFrameObserverInner _on_mixed_audio_frame: {local_user_handle}, {channel_id}, {audio_frame_inner}")
-        ret = self.observer.on_mixed_audio_frame(self.local_user, audio_frame_inner)
+        channel_id_str = channel_id.decode('utf-8') if channel_id else ""
+        frame = audio_frame_inner.contents.get()
+        ret = self.observer.on_mixed_audio_frame(self.local_user, channel_id_str, frame)
         return ret
     def _on_ear_monitoring_audio_frame(self, local_user_handle, audio_frame_inner):
         logger.debug(f"AudioFrameObserverInner _on_ear_monitoring_audio_frame: {local_user_handle}, {audio_frame_inner}")
-        ret = self.observer.on_ear_monitoring_audio_frame(self.local_user, audio_frame_inner)
+        frame = audio_frame_inner.contents.get()
+        ret = self.observer.on_ear_monitoring_audio_frame(self.local_user, frame)
         return ret
-    def _on_playback_audio_frame_before_mixing(self, local_user_handle, channel_id, user_id, audio_frame_inner: AudioFrameInner):
-        # logger.debug(f"AudioFrameObserverInner _on_playback_audio_frame_before_mixing: {local_user_handle}, {channel_id}, {user_id}, {audio_frame_inner}")
+    def _on_playback_audio_frame_before_mixing(self, local_user_handle, channel_id, user_id, audio_frame_inner):
+        #session control here !
+        #ret, c_data = self._session_ctrl_manager.process_audio_frame(user_id, audio_frame_inner.contents.buffer, audio_frame_inner.contents.samples_per_channel)
+        #print("ret = ", ret)
+        #logger.debug(f"AudioFrameObserverInner _on_playback_audio_frame_before_mixing: {local_user_handle}, {channel_id}, {user_id}, {audio_frame_inner}")
         if channel_id is None:
             channel_id_str = ""
         else:

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_ctypes_data.py RENAMED Viewed

@@ -979,7 +979,7 @@ class EncodedAudioFrameInfoInner(ctypes.Structure):
 class AudioVolumeInfoInner(ctypes.Structure):
     _fields_ = [
-        ("user_id", ctypes.c_uint),
+        ("user_id", user_id_t),
         ("volume", ctypes.c_uint),
         ("vad", ctypes.c_uint),
         ("voicePitch", ctypes.c_double)
@@ -987,10 +987,10 @@ class AudioVolumeInfoInner(ctypes.Structure):
     def get(self):
         return AudioVolumeInfo(
-            user_id=self.user_id,
+            user_id=self.user_id.decode() if self.user_id else "",
             volume=self.volume,
             vad=self.vad,
-            voicePitch=self.voicePitch
+            voice_pitch=self.voicePitch
         )

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/_ctypes_handle/_local_user_observer.py RENAMED Viewed

@@ -272,8 +272,14 @@ class RTCLocalUserObserverInner(ctypes.Structure):
     def _on_audio_volume_indication(self, local_user_handle, audio_volume_info_ptr, speaker_number, total_volume):
         logger.debug(f"LocalUserCB _on_audio_volume_indication: {local_user_handle}, {audio_volume_info_ptr}, {speaker_number}, {total_volume}")
-        audio_volume_info = audio_volume_info_ptr.contents
-        self.local_user_observer.on_audio_volume_indication(self.local_user, audio_volume_info, speaker_number, total_volume)
+        # enum
+        audio_volume_info_list = []
+        for i in range(speaker_number):
+             speaker_info = audio_volume_info_ptr[i]
+             audio_volume_info = audio_volume_info_ptr[i].get()
+             audio_volume_info_list.append(audio_volume_info)
+        self.local_user_observer.on_audio_volume_indication(self.local_user, audio_volume_info_list, speaker_number, total_volume)
     def _on_active_speaker(self, local_user_handle, user_id):
         logger.debug(f"LocalUserCB _on_active_speaker: {local_user_handle}, {user_id}")

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/agora_base.py RENAMED Viewed

@@ -222,7 +222,7 @@ class AudioFrame:
 @dataclass(frozen=True, kw_only=True)
 class AudioVolumeInfo:
-    user_id: int
+    user_id: str
     volume: int
     vad: int
     voice_pitch: float

agora_python_server_sdk-2.1.2/agora/rtc/audio_sessionctrl.py ADDED Viewed

@@ -0,0 +1,484 @@
+import time
+import ctypes
+import os
+import sys
+from enum import Enum,IntEnum
+import logging
+logger = logging.getLogger(__name__)
+from . import lib_path
+#dll def
+if sys.platform == 'darwin':
+    agora_sessionctrl_lib_path =os.path.join(lib_path, 'libagora_session_control.dylib')
+elif sys.platform == 'linux':
+    agora_sessionctrl_lib_path =os.path.join(lib_path, 'libagora_session_control.so')
+try:
+    sessctrl_lib = ctypes.CDLL(agora_sessionctrl_lib_path)
+except OSError as e:
+    logger.error(f"Error loading the library: {e}")
+    logger.error(f"Attempted to load from: {agora_sessionctrl_lib_path}")
+    sys.exit(1)
+#const & enu define
+from enum import Enum
+# Define constants
+AGORA_UAP_SESSCTRL_VERSION = 20240626
+AGORA_UAP_SESSCTRL_MAX_USERID_LEN = 128
+AGORA_UAP_SESSCTRL_MAX_FRMSZ = 1000
+AGORA_UAP_SESSCTRL_DEBUG_DUMP = 0
+AGORA_UAP_SESSCTRL_COUNTER_ZERO_HISTOGRAM_NUM = 8
+AGORA_UAP_SESSCTRL_COUNTER_LAST_WORD_DELAY_HISTOGRAM_NUM = 11
+AGORA_UAP_SESSCTRL_COUNTER_FIRST_WORD_DELAY_HISTOGRAM_NUM = 11
+AGORA_UAP_SESSCTRL_COUNTER_VAD_LENGTH_HISTOGRAM_NUM = 12
+AGORA_UAP_SESSCTRL_COUNTER_SILENCE_LENGTH_HISTOGRAM_NUM = 18
+AGORA_UAP_SESSCTRL_COUNTER_INPUT_VOLUME_HISTOGRAM_NUM = 10
+# Enum for sample rate
+class SessCtrlFs(ctypes.c_int):
+    kFs_16000 = 16000
+    kFs_32000 = 32000
+    kFs_44100 = 44100
+    kFs_48000 = 48000
+    kFs_24000 = 24000
+# Enum for session control status
+class SessCtrlStatus(ctypes.c_int):
+    kSCStatus_None = 0          # Not in Session
+    kSCStatus_SOS = 1           # Start Of Sentence
+    kSCStatus_Continue = 2      # Continue sending data during a sentence
+    kSCStatus_Wait4Dec = 3      # Waiting status, It will change status to EOS if waiting for M ms
+    kSCStatus_EOS = 4           # End Of Sentence
+    kSCStatus_EOSRETRY = 5      # Retry of EOS event to enforce "final" from ASR
+    kSCStatus_EOSRETRYSTOP = 6  # Stop EOS retry because over max iteration[eosRetryMaxIteration]
+    kSCStatus_EOSDISCONNECT = 7 # Disconnect ASR server at EOS
+    kSCStatus_Cnt = 8           # Number of status
+# Enum for ASR events
+class SessCtrlAsrEvent(ctypes.c_int):
+    kSCAsrEvent_NONFINAL = 0   # "final"-waiting has been timeout
+    kSCAsrEvent_FINAL = 1      # A "final" event has been received from ASR service
+    kSCAsrEvent_TIME_OUT = 2   # Time out event
+    kSCAsrEvent_Cnt = 3        # Number of events
+# Enum for sentence finalization status
+class SessCtrlSentenceFinal(ctypes.c_int):
+    kSCSentence_NONFINAL = 0   # Non-final
+    kSCSentence_FINAL = 1      # Final
+    kSCSentence_UNKNOWN = 2    # Unknown
+# Define the structures
+class SessCtrl_StaticCfg(ctypes.Structure):
+    _fields_ = [
+        ("userID", ctypes.c_char_p),
+        ("frmSz", ctypes.c_int),
+        ("smplFrq", ctypes.c_int),
+        ("persistentVoiceLenOfSOS", ctypes.c_int),
+        ("prePaddingLenOfSessCtrlSOS", ctypes.c_int),
+        ("postPaddingLenOfSessCtrlEOS", ctypes.c_int),
+        ("unVoiceLenOfTriggerSessCtrlEOS", ctypes.c_int),
+        ("unVoiceLenOfTriggerServerEOS", ctypes.c_int),
+        ("eosWaitTime", ctypes.c_int),
+        ("eosRetryWaitTime", ctypes.c_int),
+        ("eosRetryPadding", ctypes.c_int),
+        ("eosRetryMaxIteration", ctypes.c_int),
+        ("enableMainSpeakerDet", ctypes.c_int)
+    ]
+    def __init__(self):
+        self.userID = ctypes.c_char_p("")
+        self.frmSz = 160
+        self.smplFrq = 16000
+        self.persistentVoiceLenOfSOS = 0
+        self.prePaddingLenOfSessCtrlSOS = 0
+        self.postPaddingLenOfSessCtrlEOS = 0
+        self.unVoiceLenOfTriggerSessCtrlEOS = 0
+        pass
+class MSJudge_Param(ctypes.Structure):
+    _fields_ =	[
+        ("powScale", ctypes.c_float),
+        ("powRatio", ctypes.c_float),
+        ("biasDelay", ctypes.c_int),
+        ("aggressive", ctypes.c_float),
+        ("voiceProbThr", ctypes.c_float),
+        ("suppressGain", ctypes.c_float),
+        ("mainSpeakerMaintance", ctypes.c_int)
+	]
+    def __init__(self):
+        pass
+class SessCtrl_DynamCfg(ctypes.Structure):
+    _fields_ = [
+        ("logLv", ctypes.c_int),
+        ("meterRMSThr", ctypes.c_float),
+        ("vadThr", ctypes.c_float),
+        ("musicGateFlag", ctypes.c_int),
+        ("musicThr", ctypes.c_float),
+        ("sessCtrlBSVoiceGateFlag", ctypes.c_int),
+        ("sessCtrlBSVoiceAggressive", ctypes.c_float),
+        ("voiceThr", ctypes.c_float),
+        ("sessCtrlFinalRMSThr", ctypes.c_int),
+        ("sessCtrlFinalThr", ctypes.c_int),
+        ("sessCtrlFinalThrInc", ctypes.c_int),
+        ("sessCtrlFinalThrMax", ctypes.c_int),
+        ("vadVolumeThr", ctypes.c_float),
+        ("sessCtrlTimeOutInMs", ctypes.c_int),
+        ("sessCtrlStartSniffWordGapInMs", ctypes.c_int),
+        ("sessCtrlWordGapLenInMs", ctypes.c_int),
+        ("sessCtrlWordGapLenVolumeThr", ctypes.c_int),
+        ("sessCtrlEOSDisconnectFlag", ctypes.c_int),
+        ("sessCtrlAiVadBasedDenoiseFlag", ctypes.c_int),
+        ("sessCtrlAiVadBasedDenoiseDelayInMs", ctypes.c_int),
+        ("sessCtrlAiVadBasedVoiceDenoiseProbThr", ctypes.c_float),
+        ("sessCtrlAiVadBasedMusicDenoiseProbThr", ctypes.c_float),
+        ("sessCtrlEnableDumpFlag", ctypes.c_int),
+        ("msJude_param", MSJudge_Param)
+    ]
+    def __init__(self):
+        pass
+class SessCtrl_FrmCtrl(ctypes.Structure):
+    _fields_ = [
+        ("trash", ctypes.c_int)
+    ]
+    def __init__(self):
+        pass
+class SessCtrl_InputData(ctypes.Structure):
+    _fields_ = [
+        ("pcm", ctypes.POINTER(ctypes.c_short)),
+        ("frmIdx", ctypes.c_int),
+        ("ts", ctypes.c_long)
+    ]
+    def __init__(self):
+        self.pcm = ctypes.c_void_p(0)
+        self.frmIdx = 0
+        self.ts = 0
+        pass
+class SessCtrl_OutputData(ctypes.Structure):
+    _fields_ = [
+        ("userID", ctypes.c_char_p),
+        ("sessID", ctypes.c_int),
+        ("status", SessCtrlStatus),
+        ("pcmBuf", ctypes.POINTER(ctypes.c_short)),
+        ("nSamplesInPcmBuf", ctypes.c_int),
+        ("eosWaitTimeInMs", ctypes.c_int),
+        ("startFrmIdx", ctypes.c_int),
+        ("startTs", ctypes.c_long),
+        ("lastVoiceTs", ctypes.c_long),
+        ("avgVadScore", ctypes.c_float),
+        ("avgRMS", ctypes.c_float)
+    ]
+    def __init__(self):
+        self.nSamplesInPcmBuf = 0
+        pass
+class SessCtrl_AsrResponse(ctypes.Structure):
+    _fields_ = [
+        ("sessionID", ctypes.c_int),
+        ("event", SessCtrlAsrEvent),
+        ("startDataTime", ctypes.c_long),
+        ("durationTime", ctypes.c_int)
+    ]
+    def __init__(self):
+        pass
+class SessCtrl_AsrHandleResponse(ctypes.Structure):
+    _fields_ = [
+        ("sentenceFinal", SessCtrlSentenceFinal)
+    ]
+    def __init__(self):
+        pass
+class SessCtrl_GetData(ctypes.Structure):
+    _fields_ = [
+        ("trash", ctypes.c_int)
+    ]
+    def __init__(self):
+        pass
+class SessCtrl_Counter(ctypes.Structure):
+    _fields_ = [
+        ("rmsVadDataLenInMs", ctypes.c_int),
+        ("rmsVadReportPeriodInMs", ctypes.c_int),
+        ("asrDataLenInMs", ctypes.c_int),
+        ("asrDataReportPeriodInMs", ctypes.c_int)
+    ]
+    def __init__(self):
+        pass
+class SessCtrl_EventCounter(ctypes.Structure):
+    _fields_ = [
+        ("remoteUid", ctypes.c_long),
+        ("sessCtrlReportNumOfFinalInSession", ctypes.c_int),
+        ("sessCtrlReportNumOfFinalBetweenSession", ctypes.c_int),
+        ("sessCtrlReportNumOfFinalCrossSession", ctypes.c_int),
+        ("sessCtrlReportEOSNumbers", ctypes.c_int),
+        ("sessCtrlReportInputLength", ctypes.c_int),
+        ("sessCtrlReportOutputLength", ctypes.c_int),
+        ("sessCtrlReportZeroDataLength", ctypes.c_int),
+        ("sessCtrlReportSendZeroDataLenHistogram", ctypes.c_int * 8),
+        ("sessCtrlReportLastWordDelayHistogram", ctypes.c_int * 8),
+        ("sessCtrlReportFirstWordDelayHistogram", ctypes.c_int * 8),
+        ("sessCtrlReportVadLengthHistogram", ctypes.c_int * 8),
+        ("sessCtrlReportSilenceLengthHistogram", ctypes.c_int * 8),
+        ("sessCtrlReportVadProbHistogram", ctypes.c_int * 8),
+        ("sessCtrlReportSilenceProbHistogram", ctypes.c_int * 8),
+        ("sessCtrlReportInputVolumeHistogram", ctypes.c_int * 8)
+    ]
+    def __init__(self):
+        pass
+# Function prototypes
+#AGORA_API int Agora_UAP_SessCtrl_create(void** stPtr);
+Agora_UAP_SessCtrl_create = sessctrl_lib.Agora_UAP_SessCtrl_create
+Agora_UAP_SessCtrl_create.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
+Agora_UAP_SessCtrl_create.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_destroy(void** stPtr);
+Agora_UAP_SessCtrl_destroy = sessctrl_lib.Agora_UAP_SessCtrl_destroy
+Agora_UAP_SessCtrl_destroy.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
+Agora_UAP_SessCtrl_destroy.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_counterEventReport(void* stPtr,SessCtrl_EventCounter* pEventCounter);
+Agora_UAP_SessCtrl_counterEventReport = sessctrl_lib.Agora_UAP_SessCtrl_counterEventReport
+Agora_UAP_SessCtrl_counterEventReport.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_EventCounter)]
+Agora_UAP_SessCtrl_counterEventReport.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_memAllocate(void* stPtr, const SessCtrl_StaticCfg* pCfg);
+Agora_UAP_SessCtrl_memAllocate = sessctrl_lib.Agora_UAP_SessCtrl_memAllocate
+Agora_UAP_SessCtrl_memAllocate.restype = ctypes.c_int
+Agora_UAP_SessCtrl_memAllocate.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_StaticCfg)]
+#AGORA_API int Agora_UAP_SessCtrl_init(void* stPtr);
+Agora_UAP_SessCtrl_init = sessctrl_lib.Agora_UAP_SessCtrl_init
+Agora_UAP_SessCtrl_init.argtypes = [ctypes.c_void_p]
+Agora_UAP_SessCtrl_init.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_setDynamCfg(void* stPtr, const SessCtrl_DynamCfg* pCfg);
+Agora_UAP_SessCtrl_setDynamCfg = sessctrl_lib.Agora_UAP_SessCtrl_setDynamCfg
+Agora_UAP_SessCtrl_setDynamCfg.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_DynamCfg)]
+Agora_UAP_SessCtrl_setDynamCfg.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_getStaticCfg(const void* stPtr, SessCtrl_StaticCfg* pCfg);
+Agora_UAP_SessCtrl_getStaticCfg = sessctrl_lib.Agora_UAP_SessCtrl_getStaticCfg
+Agora_UAP_SessCtrl_getStaticCfg.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_StaticCfg)]
+Agora_UAP_SessCtrl_getStaticCfg.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_getDefaultStaticCfg(SessCtrl_StaticCfg* pCfg);
+Agora_UAP_SessCtrl_getDefaultStaticCfg = sessctrl_lib.Agora_UAP_SessCtrl_getDefaultStaticCfg
+Agora_UAP_SessCtrl_getDefaultStaticCfg.argtypes = [ctypes.POINTER(SessCtrl_StaticCfg)]
+Agora_UAP_SessCtrl_getDefaultStaticCfg.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_getDynamCfg(const void* stPtr, SessCtrl_DynamCfg* pCfg);
+Agora_UAP_SessCtrl_getDynamCfg = sessctrl_lib.Agora_UAP_SessCtrl_getDynamCfg
+Agora_UAP_SessCtrl_getDynamCfg.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_DynamCfg)]
+Agora_UAP_SessCtrl_getDynamCfg.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_getDefaultDynamCfg(SessCtrl_FrmCtrl* frmCtrlPtr, SessCtrl_DynamCfg* pDynamCfg);
+Agora_UAP_SessCtrl_getDefaultDynamCfg = sessctrl_lib.Agora_UAP_SessCtrl_getDefaultDynamCfg
+Agora_UAP_SessCtrl_getDefaultDynamCfg.argtypes = [ctypes.POINTER(SessCtrl_FrmCtrl), ctypes.POINTER(SessCtrl_DynamCfg)]
+Agora_UAP_SessCtrl_getDefaultDynamCfg.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_getCounter(void* stPtr, SessCtrl_Counter* pCounter);
+Agora_UAP_SessCtrl_getCounter = sessctrl_lib.Agora_UAP_SessCtrl_getCounter
+Agora_UAP_SessCtrl_getCounter.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_Counter)]
+Agora_UAP_SessCtrl_getCounter.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_proc(void* stPtr, const SessCtrl_FrmCtrl* pCtrl, const SessCtrl_InputData* pIn,SessCtrl_OutputData* pOut);
+Agora_UAP_SessCtrl_proc = sessctrl_lib.Agora_UAP_SessCtrl_proc
+Agora_UAP_SessCtrl_proc.argtypes = [ctypes.c_void_p, ctypes.POINTER(SessCtrl_FrmCtrl), ctypes.POINTER(SessCtrl_InputData), ctypes.POINTER(SessCtrl_OutputData)]
+Agora_UAP_SessCtrl_proc.restype = ctypes.c_int
+#AGORA_API int Agora_UAP_SessCtrl_handleAsrResponse(void* stPtr, const SessCtrl_AsrResponse* pAsrResponse, SessCtrl_OutputData* pOut,SessCtrl_AsrHandleResponseFinal* pFinal);
+class SessionControl:
+    def __init__(self, userid:ctypes.c_char_p):
+        self._handler = ctypes.c_void_p(0)
+        self._static_config = SessCtrl_StaticCfg()
+        self._dynamic_config = SessCtrl_DynamCfg()
+        self._initialized = False
+        #pre allocated null buffer struct for proc
+        self._sessctrl_in_data = SessCtrl_InputData()
+        self._sessctrl_out_data = SessCtrl_OutputData()
+        self._frm_ctrl = SessCtrl_FrmCtrl()
+        self._frm_count = 0
+        self._user_id = userid #str type
+        ret = self._prepare_sessctrl_cfg()
+        # for lifetime control
+        self._last_access_time = time.time()*1000 #unit in ms
+        pass
+    def _prepare_sessctrl_cfg(self) -> int:
+		#get default config and default frame config
+        ret_static  = Agora_UAP_SessCtrl_getDefaultStaticCfg(ctypes.byref(self._static_config))
+        ret_dynamic = Agora_UAP_SessCtrl_getDefaultDynamCfg(ctypes.byref(self._frm_ctrl), ctypes.byref(self._dynamic_config))
+		#assign value to static config
+        self._static_config.userID = ctypes.c_char_p(self._user_id.encode('utf-8'))
+        self._static_config.frmSz = 160
+        self._static_config.smplFrq = SessCtrlFs.kFs_16000
+        self._static_config.persistentVoiceLenOfSOS = 10
+        self._static_config.prePaddingLenOfSessCtrlSOS = 0
+        self._static_config.postPaddingLenOfSessCtrlEOS = 0
+        self._static_config.unVoiceLenOfTriggerSessCtrlEOS = 1000000
+        self._static_config.unVoiceLenOfTriggerServerEOS = 0
+        self._static_config.eosWaitTime = 0
+        self._static_config.eosRetryWaitTime =0
+        self._static_config.eosRetryPadding = 0
+        self._static_config.eosRetryMaxIteration = 0
+		#assign value to dynamic config
+        self._dynamic_config.logLv = 10
+        self._dynamic_config.sessCtrlTimeOutInMs = 1000000
+        self._dynamic_config.sessCtrlStartSniffWordGapInMs = 1000000
+        self._dynamic_config.sessCtrlWordGapLenInMs = 10
+        self._dynamic_config.sessCtrlWordGapLenVolumeThr = 0
+        self._dynamic_config.sessCtrlEnableDumpFlag = 0
+        self._dynamic_config.vadThr = -2
+        self._dynamic_config.voiceThr = -2
+        self._dynamic_config.sessCtrlFinalRMSThr = 80
+        self._dynamic_config.sessCtrlFinalThr = 200
+        self._dynamic_config.sessCtrlFinalThrInc = 100
+        self._dynamic_config.sessCtrlFinalThrMax = 3
+        self._dynamic_config.meterRMSThr = 65
+        self._dynamic_config.sessCtrlBSVoiceGateFlag = 1
+        self._dynamic_config.sessCtrlBSVoiceAggressive = 4
+        self._dynamic_config.sessCtrlAiVadBasedDenoiseFlag = 1
+        self._dynamic_config.sessCtrlAiVadBasedDenoiseDelayInMs = 50
+        self._dynamic_config.sessCtrlAiVadBasedVoiceDenoiseProbThr = 0.5
+        self._dynamic_config.sessCtrlAiVadBasedMusicDenoiseProbThr = 0.5
+        return (ret_static and ret_dynamic)
+    def _init(self) -> int:
+        if self._initialized:
+            return 0
+        #create handler
+        self._handler = ctypes.c_void_p()
+        ret = Agora_UAP_SessCtrl_create(ctypes.byref(self._handler))
+        if ret < 0:
+            return ret
+		#prepari static config & dynamic configure
+        ret = self._prepare_sessctrl_cfg()
+		#memory allocate
+        ret = Agora_UAP_SessCtrl_memAllocate(self._handler, ctypes.byref(self._static_config))
+        if ret < 0:
+            return ret
+		#init
+        ret = Agora_UAP_SessCtrl_init(self._handler)
+        if ret < 0:
+            return ret
+		#set dynamic configure
+        ret = Agora_UAP_SessCtrl_setDynamCfg(self._handler, ctypes.byref(self._dynamic_config))
+        if ret < 0:
+            return ret
+        self._initialized = True if ret == 0 else False
+        return ret
+    def process (self, c_buffer:ctypes.c_void_p, size_in_short: int) -> tuple[int, ctypes.c_void_p]: # return ret, pcm data in bytes. ret is len of uint8
+       #update last access time
+        self._last_access_time = time.time()*1000
+        self._sessctrl_in_data.pcm = c_buffer
+        self._sessctrl_in_data.frmIdx = self._frm_count
+        self._frm_count += 1
+        #todo：
+        #如果mute后，是否有必要销毁session ctrl？---暂时不考虑销毁
+        #用户id：和外部对齐
+        #功能点：
+        #
+        #inputData.ts = (frmCnt * frmSz) / (MT_TEST_FS / 1000);
+        self._sessctrl_in_data.ts = self._frm_count * self._static_config.frmSz / (SessCtrlFs.kFs_16000 / 1000)
+        self._sessctrl_out_data.status = SessCtrlStatus.kSCStatus_None
+        self._sessctrl_out_data.pcmBuf = ctypes.c_void_p(0)
+        self._sessctrl_out_data.nSamplesInPcmBuf = 0 #added by me ,ToDo check if needed, parameters need to be rest or not?
+        ret = Agora_UAP_SessCtrl_proc(self._handler, ctypes.byref(self._frm_ctrl), ctypes.byref(self._sessctrl_in_data), ctypes.byref(self._sessctrl_out_data))
+        if ret < 0:
+            return ret, ctypes.c_void_p(0) #exit(1) ?? indicate error and do not continue to process next frame? ToDo
+        #get output data
+        if self._sessctrl_out_data.nSamplesInPcmBuf > 0: #unit: unit16
+            ret = self._sessctrl_out_data.nSamplesInPcmBuf * 2 #unit: unit8
+            return ret, self._sessctrl_out_data.pcmBuf
+        return 0, ctypes.c_void_p(0) #no data
+        pass
+    def release(self):
+        if self._initialized:
+            ret = Agora_UAP_SessCtrl_destroy(self._handler)
+            self._initialized = False
+            self._handler = ctypes.c_void_p(0)
+        pass
+    def is_expired(self, interval: int) -> bool:
+        return ( time.time()*1000 - self._last_access_time > interval )
+    #manager for sessionctrol
+class SessionCtrlManager:
+    def __init__(self, update_interval: int = 100, expired_duration: int = 1000*10) -> None:
+        self._sessions = {}
+        self._last_update_time = time.time()*1000  #in ms
+        self._update_interval = update_interval  # in ms, every 100ms to do check
+        self._expired_duration = expired_duration  #10s expired
+    pass
+    def process_audio_frame(self, userid: ctypes.c_char_p, c_buffer:ctypes.c_void_p, size_in_short: int) -> tuple[int, ctypes.c_void_p]:
+        #check & release the expired sessions
+        ret = self._update_check()
+        #get session
+        #do process within session
+        session = self._get_session(userid)
+        ret, c_datas =  session.process(c_buffer, size_in_short)
+        return ret, c_datas
+        pass
+    def _get_session(self, userid:ctypes.c_char_p) -> SessionControl:
+        if userid not in self._sessions:
+            #add new session & do init process
+            session = SessionControl(userid)
+            session._init()
+            self._sessions[userid] = session
+        return self._sessions[userid]
+    def _update_check(self) -> None:
+        now = time.time()*1000 #in ms
+        if now - self._last_update_time < self._update_interval:
+            return
+        self._last_update_time = now
+        for userid in self._sessions:
+            session = self._sessions[userid]
+            if session.is_expired(self._expired_duration):
+                session.release()
+                del self._sessions[userid]
+    def release(self, userid: int) -> None:
+        if userid in self._sessions:
+            self._sessions[userid].release()
+            del self._sessions[userid]
+        pass
+    def clear(self) -> None:
+        for userid in self._sessions:
+            self._sessions[userid].release()
+        self._sessions.clear()
+        pass

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora/rtc/local_user_observer.py RENAMED Viewed

@@ -76,7 +76,7 @@ class IRTCLocalUserObserver():
     def on_remote_video_track_statistics(self, agora_local_user, agora_remote_video_track, stats):
         pass
-    def on_audio_volume_indication(self, agora_local_user, speakers, speaker_number, total_volume):
+    def on_audio_volume_indication(self, agora_local_user, speakers_list, speaker_number, total_volume):
         pass
     def on_active_speaker(self, agora_local_user, userId):

agora_python_server_sdk-2.1.2/agora/rtc/voice_detection.py ADDED Viewed

@@ -0,0 +1,240 @@
+import time
+import ctypes
+import os
+import sys
+from enum import Enum,IntEnum
+from collections import deque
+from .agora_base import AudioFrame, AudioParams
+import logging
+logger = logging.getLogger(__name__)
+class  AudioVadConfigV2():
+    def __init__(self, preStartRecognizeCount:int, startRecognizeCount:int, stopRecognizeCount:int,
+                 activePercent:float, inactivePercent:float, start_voiceprob: int, stop_voiceporb:int, rmsThreshold:float):
+        self.start_recognize_count = startRecognizeCount
+        self.pre_start_recognize_count = preStartRecognizeCount
+        self.stop_recognize_count = stopRecognizeCount
+        self.activePercent = activePercent #percent value = avtivity frames/ total_frames, to determine startspeaking,
+        self.inactivePercent = inactivePercent  #percent value = inactive_frames/ total_frames, to determine stopspeaking
+        #voice prob:
+        # The lower the gate threshold, the higher the probability that a frame is judged as activity,
+        # which allows the start phase to begin earlier.
+        #
+        # Conversely, the higher the gate threshold, the lower the probability that a frame is judged as activity,
+        # and the higher the probability of being judged as inactivity,
+        # which allows the end phase to begin earlier.
+        self.start_voiceprob = start_voiceprob #defautl to 70
+        self.stop_voiceprob = stop_voiceporb#default to 50
+        #rms: for rmsThreshold, the higher the value, the more sensitive to voice activity.
+        # In a quiet environment, it can be set to -50;
+        # in a noisy environment, it can be set to a value between -40 and -30.
+        self.start_rms = rmsThreshold #default to -50
+        self.stop_rms = rmsThreshold #default to -50
+        pass
+class VadDataV2:
+    def __init__(self, data: AudioFrame,   is_activity: bool):
+        self._audio_frame = data
+        #self.timestamp = time.time()
+        self._is_activity = is_activity
+        pass
+class AudioVadV2():
+    _kIntervalPerAudioFrameInMS = 10
+    _kMaxChunkSizePer10MSFor16K = 320 #bytes, ??/ for diff sampleRate,its still
+    _kMaxChunkSizePer10MSFor32K = 640 #bytes, ??/ for diff sampleRate
+    _kMaxChunkSizePer10MSFor48K = 960 #bytes, ??/ for diff sampleRate
+    _vad_state_nonspeaking = 0
+    _vad_state_startspeaking = 1
+    _vad_state_speaking = 2
+    _vad_state_stopspeaking = 3
+    def __init__(self, config: AudioVadConfigV2):
+        self._vad_configure = config
+        self._cur_state = self._vad_state_nonspeaking #0: non-speaking, 1-start speaking, 2-speaking, 3-stop speaking
+        self._data = bytearray()
+        self._start_size = self._vad_configure.pre_start_recognize_count + self._vad_configure.start_recognize_count
+        self._start_queue = deque(maxlen=self._start_size)
+        self._stop_queue = deque(maxlen=self._vad_configure.stop_recognize_count)
+        #trend queue: not impl in this version date: 2024-10-29
+        self._trend_queue = None #deque(maxlen=self._vad_configure.stop_recognize_count)
+        self._trend_window = self._vad_configure.stop_recognize_count//2
+    def _push_to_start(self, data: VadDataV2) -> tuple[int,bool]:
+        self._start_queue.append(data)
+        size = len(self._start_queue)
+        return size, size >= self._start_size
+    def _push_to_stop(self, data: VadDataV2) -> tuple[int,bool]:
+        self._stop_queue.append(data)
+        size = len(self._stop_queue)
+        return size, size >= self._vad_configure.stop_recognize_count
+    def _push_to_trend(self, data: VadDataV2) -> tuple[int,bool]:
+        self._trend_queue.append(data)
+        size = len(self._trend_queue)
+        return size, size >= self._trend_window
+    def _sum(self,quue: deque) -> int:
+        return sum(1 for item in quue if item._is_activity == True)
+    def _calculate_sliding_window_ratio(self, arr:deque, window_size:int)-> list[float]:
+        ratios = []
+        #slide window
+        seperator_index = len(arr)//2
+        count_ones = 0
+        for i, item in enumerate(arr,start=0):
+            if i  < seperator_index:
+                count_ones +=  1 if item._is_activity == True else 0
+            elif i == seperator_index:
+                ratios.append(count_ones)
+                count_ones = 0
+            elif i > seperator_index:
+                count_ones +=  1 if item._is_activity == True else 0
+        ratios.append(count_ones)
+        return ratios
+        for start_index in range(len(arr) - window_size + 1):
+            count_ones = 0
+            for i, item in enumerate(arr,start=start_index):
+                if i >= start_index and i < start_index + window_size:
+                    count_ones +=  1 if item._is_activity == True else 0
+                ratio = count_ones / window_size
+                ratios.append(ratio)
+        return ratios
+    def _get_trend(self, queue: deque) -> int:
+        if len(queue) < self._trend_window:
+            return 0
+        ratios = self._calculate_sliding_window_ratio(queue, self._trend_window)
+        # 计算趋势
+        print(ratios)
+        return 1 if ratios[1] > ratios[0] else 0
+    #get silence count from deque: totalcount, silenct_count
+    def _get_silence_count(self, queue: deque, start_inx:int) -> tuple[int, int]:
+        total = len(queue)
+        silence_count = 0
+        for i, item in enumerate(queue,start=0):
+            if i > start_inx and item._is_activity == False:
+                silence_count += 1
+        return total, silence_count
+    def _move_deque(self, data:bytearray, queue: deque) ->bytearray:
+        for item in queue:  #是否有必要在这对inactive包替换为静音包？？？依赖实际测试
+            data.extend(item._audio_frame.buffer)
+        return data
+    def _clear_queue(self, queue: deque):
+        queue.clear()
+        pass
+    def _process_start(self, data: VadDataV2) -> tuple[int, bytearray]:
+        size, full = self._push_to_start(data)
+        state = self._cur_state
+        bytes = bytearray()
+        if full == True:
+            #存在一定的问题：如果pre中就已经是开始在说话了，这个时候就会出现问题，或者漏掉的情况
+            #检查start中的比例是否符合阈值,如果符合阈值，zhi，则将start中的数据全部送入到pre中，并且将pre清空，同时将start清空，同时将当前状态设置为speaking
+            total, silence_count = self._get_silence_count(self._start_queue, self._vad_configure.pre_start_recognize_count)
+            total -= self._vad_configure.pre_start_recognize_count
+            if (total - silence_count) / total >= self._vad_configure.activePercent:
+                state = self._vad_state_startspeaking
+                #move pre & start to a new bytearray
+                self._move_deque(bytes, self._start_queue)
+                self._clear_queue(self._start_queue)
+                #and clear pre &start
+                self._clear_queue(self._stop_queue)
+                print("start speaking:", len(self._stop_queue))
+        return state, bytes
+    def _process_speaking(self, data: VadDataV2) -> tuple[int, bytearray]:
+        #将数据append 到stop中
+        #如果数据满，怎判断是否触发stop
+        state = self._cur_state
+        size, full = self._push_to_stop(data)
+        print(f"stop: {size}, {full}")
+        if full == True:
+            #trend check
+            trend = self._get_trend(self._stop_queue)
+            #检查stop中的比例是否符合阈值,
+            #   如果符合阈值，同时清空stop 清空，并且将当前状态设置为non-speaking
+            total, silence_count = self._get_silence_count(self._stop_queue,0)
+            if (silence_count) / total >= (self._vad_configure.inactivePercent):
+                state = self._vad_state_stopspeaking
+                self._clear_queue(self._stop_queue)
+                #print(f"stop speaking: {len(self._start_queue)}, {silence_count}, {total}, {trend}")
+        return state, data._audio_frame.buffer
+    def process(self, data:AudioFrame) -> tuple[int, bytearray]:
+        is_activity = self._is_vad_active(data)
+        vad_data = VadDataV2(data, is_activity)
+        # Determine the current state.
+            # The buffer  divided into three parts: pre, start, and stop.
+            # The Voice Activity Detection (VAD) has two major states: silent and speaking.
+            # Case 1: If the current state is non-speaking,
+            #   save the data into 'pre';
+            #   simultaneously save the data into 'start';
+            #   if 'start' is full, determine if speaking is triggered:
+            #       if speaking is triggered, move all data in pre and start to a new bytearray, and  clear both 'pre' and 'start',
+            #       then set the current state to speaking.
+            #       if speaking is not triggered, append the data to 'start'.
+            # Case 2: If the current state is speaking,
+            #   save the data in 'stop';
+            #   if 'stop' is full, determine if stop is triggered:
+            #       if stop is triggered, move all data in 'stop' to a new bytearray, and clear 'stop'
+            #       and set the current state to non-speaking.
+            #       if stop is not triggered, append the data to 'stop'.
+        state = self._cur_state
+        if self._cur_state == self._vad_state_nonspeaking: #当前状态是静音
+            state, data = self._process_start(vad_data)
+            if state == self._vad_state_startspeaking:
+                self._cur_state = self._vad_state_speaking
+            return state, data
+        if self._cur_state == self._vad_state_speaking:
+            state, data = self._process_speaking(vad_data)
+            if state == self._vad_state_stopspeaking:
+                self._cur_state = self._vad_state_nonspeaking
+            return state, data
+        #default: shoud never happen
+        return int(-100), bytearray()
+    """
+    def _is_vad_active(self, data: AudioFrame) -> bool:
+    """
+    def _is_vad_active(self, data: AudioFrame) -> bool:
+        voice_prob = 0
+        rms_prob = 0
+        if self._cur_state == self._vad_state_speaking:
+            voice_prob = self._vad_configure.stop_voiceprob
+            rms_prob = self._vad_configure.stop_rms
+        else:
+            voice_prob = self._vad_configure.start_voiceprob
+            rms_prob = self._vad_configure.start_rms
+        #case2
+        #if data.far_field_flag == 1 and data.voice_prob > voice_prob :#and data.pitch > 0 : #voice: from 75 to 50
+        #case4: rms > -40
+        if data.far_field_flag == 1 and data.voice_prob > voice_prob and data.rms > rms_prob :#and data.pitch > 0 : #voice: from 75 to 50
+            return True
+        return False

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora_python_server_sdk.egg-info/PKG-INFO RENAMED Viewed

@@ -1,10 +1,8 @@
 Metadata-Version: 2.1
-Name: agora-python-server-sdk
-Version: 2.1.0
+Name: agora_python_server_sdk
+Version: 2.1.2
 Summary: A Python SDK for Agora Server
 Home-page: https://github.com/AgoraIO-Extensions/Agora-Python-Server-SDK
-License: UNKNOWN
-Platform: UNKNOWN
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Topic :: Multimedia :: Sound/Audio
@@ -51,5 +49,3 @@ pip install agora_python_server_sdk
 python agora_rtc/examples/example_audio_pcm_send.py --appId=xxx --channelId=xxx --audioFile=./test_data/demo.pcm --sampleRate=16000 --numOfChannels=1
 ```

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/agora_python_server_sdk.egg-info/SOURCES.txt RENAMED Viewed

@@ -9,6 +9,7 @@ agora/rtc/agora_service.py
 agora/rtc/audio_encoded_frame_sender.py
 agora/rtc/audio_frame_observer.py
 agora/rtc/audio_pcm_data_sender.py
+agora/rtc/audio_sessionctrl.py
 agora/rtc/audio_vad.py
 agora/rtc/local_audio_track.py
 agora/rtc/local_user.py
@@ -23,6 +24,7 @@ agora/rtc/video_encoded_frame_observer.py
 agora/rtc/video_encoded_image_sender.py
 agora/rtc/video_frame_observer.py
 agora/rtc/video_frame_sender.py
+agora/rtc/voice_detection.py
 agora/rtc/_ctypes_handle/_audio_frame_observer.py
 agora/rtc/_ctypes_handle/_ctypes_data.py
 agora/rtc/_ctypes_handle/_local_user_observer.py

{agora_python_server_sdk-2.1.0 → agora_python_server_sdk-2.1.2}/setup.py RENAMED Viewed

@@ -45,7 +45,7 @@ class CustomInstallCommand(install):
 setup(
     name='agora_python_server_sdk',
-    version='2.1.0',
+    version='2.1.2',
     description='A Python SDK for Agora Server',
     long_description=open('README.md').read(),
     long_description_content_type='text/markdown',