agora-python-server-sdk 2.1.6__tar.gz → 2.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of agora-python-server-sdk might be problematic. Click here for more details.
- {agora_python_server_sdk-2.1.6/agora_python_server_sdk.egg-info → agora_python_server_sdk-2.1.7}/PKG-INFO +45 -1
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/README.md +45 -1
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/local_user.py +34 -9
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/utils/audio_consumer.py +1 -1
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7/agora_python_server_sdk.egg-info}/PKG-INFO +45 -1
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/setup.py +1 -1
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/MANIFEST.in +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/__init__.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/_ctypes_handle/_audio_frame_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/_ctypes_handle/_ctypes_data.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/_ctypes_handle/_local_user_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/_ctypes_handle/_rtc_connection_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/_ctypes_handle/_video_encoded_frame_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/_ctypes_handle/_video_frame_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/_utils/globals.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/agora_base.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/agora_parameter.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/agora_service.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_encoded_frame_sender.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_frame_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_pcm_data_sender.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_sessionctrl.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_vad_manager.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/local_audio_track.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/local_user_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/local_video_track.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/media_node_factory.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/remote_audio_track.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/remote_video_track.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/rtc_connection.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/rtc_connection_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/utils/vad_dump.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/video_encoded_frame_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/video_encoded_image_sender.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/video_frame_observer.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/video_frame_sender.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/voice_detection.py +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora_python_server_sdk.egg-info/SOURCES.txt +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora_python_server_sdk.egg-info/dependency_links.txt +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora_python_server_sdk.egg-info/top_level.txt +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/pyproject.toml +0 -0
- {agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: agora_python_server_sdk
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.7
|
|
4
4
|
Summary: A Python SDK for Agora Server
|
|
5
5
|
Home-page: https://github.com/AgoraIO-Extensions/Agora-Python-Server-SDK
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -51,6 +51,12 @@ python agora_rtc/examples/example_audio_pcm_send.py --appId=xxx --channelId=xxx
|
|
|
51
51
|
|
|
52
52
|
# Change log
|
|
53
53
|
|
|
54
|
+
2024.12.17 Release 2.1.7
|
|
55
|
+
--Changes:
|
|
56
|
+
|
|
57
|
+
Fixed the typeError issue in LocalUser::sub/unsub audio/video.
|
|
58
|
+
Adjusted the default stopRecogCount for VAD from 30 to 50.
|
|
59
|
+
Modified sample_vad.
|
|
54
60
|
## 2024.12.09 Release 2.1.6
|
|
55
61
|
- New Features:
|
|
56
62
|
-- Added AudioVadManager to manage VAD (Voice Activity Detection) instances.
|
|
@@ -279,3 +285,41 @@ Store the LLM results in a cache as they are received.
|
|
|
279
285
|
Perform a reverse scan of the cached data to find the most recent punctuation mark.
|
|
280
286
|
Truncate the data from the start to the most recent punctuation mark and pass it to TTS for synthesis.
|
|
281
287
|
Remove the truncated data from the cache. The remaining data should be moved to the beginning of the cache and continue waiting for additional data from the LLM.
|
|
288
|
+
|
|
289
|
+
##VAD Configuration Parameters
|
|
290
|
+
AgoraAudioVadConfigV2 Properties
|
|
291
|
+
|
|
292
|
+
Property Name Type Description Default Value Value Range
|
|
293
|
+
preStartRecognizeCount int Number of audio frames saved before detecting speech 16 [0, ]
|
|
294
|
+
startRecognizeCount int Total number of audio frames to detect speech start 30 [1, max]
|
|
295
|
+
stopRecognizeCount int Number of audio frames to detect speech stop 50 [1, max]
|
|
296
|
+
activePercent float Percentage of active frames in startRecognizeCount frames 0.7 [0.0, 1.0]
|
|
297
|
+
inactivePercent float Percentage of inactive frames in stopRecognizeCount frames 0.5 [0.0, 1.0]
|
|
298
|
+
startVoiceProb int Probability that an audio frame contains human voice 70 [0, 100]
|
|
299
|
+
stopVoiceProb int Probability that an audio frame contains human voice 70 [0, 100]
|
|
300
|
+
startRmsThreshold int Energy dB threshold for detecting speech start -50 [-100, 0]
|
|
301
|
+
stopRmsThreshold int Energy dB threshold for detecting speech stop -50 [-100, 0]
|
|
302
|
+
Notes:
|
|
303
|
+
startRmsThreshold and stopRmsThreshold:
|
|
304
|
+
|
|
305
|
+
The higher the value, the louder the speaker's voice needs to be compared to the surrounding background noise.
|
|
306
|
+
In quiet environments, it is recommended to use the default value of -50.
|
|
307
|
+
In noisy environments, you can increase the threshold to between -40 and -30 to reduce false positives.
|
|
308
|
+
Adjusting these thresholds based on the actual use case and audio characteristics can achieve optimal performance.
|
|
309
|
+
stopRecognizeCount:
|
|
310
|
+
|
|
311
|
+
This value reflects how long to wait after detecting non-human voice before concluding that the user has stopped speaking. It controls the gap between consecutive speech utterances. Within this gap, VAD will treat adjacent sentences as part of the same speech.
|
|
312
|
+
A shorter gap will increase the likelihood of adjacent sentences being recognized as separate speech segments. Typically, it is recommended to set this value between 50 and 80.
|
|
313
|
+
For example: "Good afternoon, [interval_between_sentences] what are some fun places to visit in Beijing?"
|
|
314
|
+
|
|
315
|
+
If the interval_between_sentences between the speaker's phrases is greater than the stopRecognizeCount, the VAD will recognize the above as two separate VADs:
|
|
316
|
+
|
|
317
|
+
VAD1: Good afternoon
|
|
318
|
+
VAD2: What are some fun places to visit in Beijing?
|
|
319
|
+
If the interval_between_sentences is less than stopRecognizeCount, the VAD will recognize the above as a single VAD:
|
|
320
|
+
|
|
321
|
+
VAD: Good afternoon, what are some fun places to visit in Beijing?
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
If latency is a concern, you can lower this value, or consult with the development team to determine how to manage latency while ensuring semantic continuity in speech recognition. This will help avoid the AI being interrupted too sensitively.
|
|
@@ -36,6 +36,12 @@ python agora_rtc/examples/example_audio_pcm_send.py --appId=xxx --channelId=xxx
|
|
|
36
36
|
|
|
37
37
|
# Change log
|
|
38
38
|
|
|
39
|
+
2024.12.17 Release 2.1.7
|
|
40
|
+
--Changes:
|
|
41
|
+
|
|
42
|
+
Fixed the typeError issue in LocalUser::sub/unsub audio/video.
|
|
43
|
+
Adjusted the default stopRecogCount for VAD from 30 to 50.
|
|
44
|
+
Modified sample_vad.
|
|
39
45
|
## 2024.12.09 Release 2.1.6
|
|
40
46
|
- New Features:
|
|
41
47
|
-- Added AudioVadManager to manage VAD (Voice Activity Detection) instances.
|
|
@@ -263,4 +269,42 @@ To achieve a balance between clarity and minimal delay, the following steps shou
|
|
|
263
269
|
Store the LLM results in a cache as they are received.
|
|
264
270
|
Perform a reverse scan of the cached data to find the most recent punctuation mark.
|
|
265
271
|
Truncate the data from the start to the most recent punctuation mark and pass it to TTS for synthesis.
|
|
266
|
-
Remove the truncated data from the cache. The remaining data should be moved to the beginning of the cache and continue waiting for additional data from the LLM.
|
|
272
|
+
Remove the truncated data from the cache. The remaining data should be moved to the beginning of the cache and continue waiting for additional data from the LLM.
|
|
273
|
+
|
|
274
|
+
##VAD Configuration Parameters
|
|
275
|
+
AgoraAudioVadConfigV2 Properties
|
|
276
|
+
|
|
277
|
+
Property Name Type Description Default Value Value Range
|
|
278
|
+
preStartRecognizeCount int Number of audio frames saved before detecting speech 16 [0, ]
|
|
279
|
+
startRecognizeCount int Total number of audio frames to detect speech start 30 [1, max]
|
|
280
|
+
stopRecognizeCount int Number of audio frames to detect speech stop 50 [1, max]
|
|
281
|
+
activePercent float Percentage of active frames in startRecognizeCount frames 0.7 [0.0, 1.0]
|
|
282
|
+
inactivePercent float Percentage of inactive frames in stopRecognizeCount frames 0.5 [0.0, 1.0]
|
|
283
|
+
startVoiceProb int Probability that an audio frame contains human voice 70 [0, 100]
|
|
284
|
+
stopVoiceProb int Probability that an audio frame contains human voice 70 [0, 100]
|
|
285
|
+
startRmsThreshold int Energy dB threshold for detecting speech start -50 [-100, 0]
|
|
286
|
+
stopRmsThreshold int Energy dB threshold for detecting speech stop -50 [-100, 0]
|
|
287
|
+
Notes:
|
|
288
|
+
startRmsThreshold and stopRmsThreshold:
|
|
289
|
+
|
|
290
|
+
The higher the value, the louder the speaker's voice needs to be compared to the surrounding background noise.
|
|
291
|
+
In quiet environments, it is recommended to use the default value of -50.
|
|
292
|
+
In noisy environments, you can increase the threshold to between -40 and -30 to reduce false positives.
|
|
293
|
+
Adjusting these thresholds based on the actual use case and audio characteristics can achieve optimal performance.
|
|
294
|
+
stopRecognizeCount:
|
|
295
|
+
|
|
296
|
+
This value reflects how long to wait after detecting non-human voice before concluding that the user has stopped speaking. It controls the gap between consecutive speech utterances. Within this gap, VAD will treat adjacent sentences as part of the same speech.
|
|
297
|
+
A shorter gap will increase the likelihood of adjacent sentences being recognized as separate speech segments. Typically, it is recommended to set this value between 50 and 80.
|
|
298
|
+
For example: "Good afternoon, [interval_between_sentences] what are some fun places to visit in Beijing?"
|
|
299
|
+
|
|
300
|
+
If the interval_between_sentences between the speaker's phrases is greater than the stopRecognizeCount, the VAD will recognize the above as two separate VADs:
|
|
301
|
+
|
|
302
|
+
VAD1: Good afternoon
|
|
303
|
+
VAD2: What are some fun places to visit in Beijing?
|
|
304
|
+
If the interval_between_sentences is less than stopRecognizeCount, the VAD will recognize the above as a single VAD:
|
|
305
|
+
|
|
306
|
+
VAD: Good afternoon, what are some fun places to visit in Beijing?
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
If latency is a concern, you can lower this value, or consult with the development team to determine how to manage latency while ensuring semantic continuity in speech recognition. This will help avoid the AI being interrupted too sensitively.
|
|
@@ -62,7 +62,7 @@ agora_local_user_subscribe_all_audio.argtypes = [AGORA_HANDLE]
|
|
|
62
62
|
|
|
63
63
|
agora_local_user_unsubscribe_audio = agora_lib.agora_local_user_unsubscribe_audio
|
|
64
64
|
agora_local_user_unsubscribe_audio.restype = AGORA_API_C_INT
|
|
65
|
-
agora_local_user_unsubscribe_audio.argtypes = [AGORA_HANDLE,
|
|
65
|
+
agora_local_user_unsubscribe_audio.argtypes = [AGORA_HANDLE, user_id_t]
|
|
66
66
|
|
|
67
67
|
agora_local_user_unsubscribe_all_audio = agora_lib.agora_local_user_unsubscribe_all_audio
|
|
68
68
|
agora_local_user_unsubscribe_all_audio.restype = AGORA_API_C_INT
|
|
@@ -184,7 +184,7 @@ agora_local_user_subscribe_all_video.argtypes = [AGORA_HANDLE, ctypes.POINTER(Vi
|
|
|
184
184
|
|
|
185
185
|
agora_local_user_unsubscribe_video = agora_lib.agora_local_user_unsubscribe_video
|
|
186
186
|
agora_local_user_unsubscribe_video.restype = AGORA_API_C_INT
|
|
187
|
-
agora_local_user_unsubscribe_video.argtypes = [AGORA_HANDLE,
|
|
187
|
+
agora_local_user_unsubscribe_video.argtypes = [AGORA_HANDLE, user_id_t]
|
|
188
188
|
|
|
189
189
|
agora_local_user_unsubscribe_all_video = agora_lib.agora_local_user_unsubscribe_all_video
|
|
190
190
|
agora_local_user_unsubscribe_all_video.restype = AGORA_API_C_INT
|
|
@@ -361,7 +361,13 @@ class LocalUser:
|
|
|
361
361
|
return ret
|
|
362
362
|
|
|
363
363
|
def subscribe_audio(self, user_id):
|
|
364
|
-
|
|
364
|
+
if user_id is None:
|
|
365
|
+
return -1
|
|
366
|
+
uid_str = user_id.encode('utf-8')
|
|
367
|
+
#ret = agora_local_user_subscribe_audio(self.user_handle, ctypes.create_string_buffer(uid_str))
|
|
368
|
+
# note:both ctypes.create_string_buffer and ctypes.c_char_p are all can change python's str to c_char_p
|
|
369
|
+
# but ctypes.c_char_p is more suitable for this case for the c api never change the content of c_char_p
|
|
370
|
+
ret = agora_local_user_subscribe_audio(self.user_handle, ctypes.c_char_p(uid_str))
|
|
365
371
|
return ret
|
|
366
372
|
|
|
367
373
|
def subscribe_all_audio(self):
|
|
@@ -369,7 +375,11 @@ class LocalUser:
|
|
|
369
375
|
return ret
|
|
370
376
|
|
|
371
377
|
def unsubscribe_audio(self, user_id):
|
|
372
|
-
|
|
378
|
+
#validity check
|
|
379
|
+
if user_id is None:
|
|
380
|
+
return -1
|
|
381
|
+
uid_str = user_id.encode('utf-8')
|
|
382
|
+
ret = agora_local_user_unsubscribe_audio(self.user_handle, ctypes.c_char_p(uid_str))
|
|
373
383
|
if ret < 0:
|
|
374
384
|
logger.error("Failed to unsubscribe audio")
|
|
375
385
|
else:
|
|
@@ -485,18 +495,33 @@ class LocalUser:
|
|
|
485
495
|
# return ret
|
|
486
496
|
|
|
487
497
|
def subscribe_video(self, user_id, options: VideoSubscriptionOptions):
|
|
488
|
-
|
|
498
|
+
if user_id is None:
|
|
499
|
+
return -1
|
|
500
|
+
uid_str = user_id.encode('utf-8')
|
|
489
501
|
|
|
490
|
-
|
|
502
|
+
|
|
503
|
+
if options is None:
|
|
504
|
+
inner = VideoSubscriptionOptionsInner()
|
|
505
|
+
else:
|
|
506
|
+
inner = VideoSubscriptionOptionsInner.create(options)
|
|
507
|
+
|
|
508
|
+
c_ptr = ctypes.byref(inner)
|
|
509
|
+
ret = agora_local_user_subscribe_video(self.user_handle, ctypes.c_char_p(uid_str), c_ptr)
|
|
491
510
|
return ret
|
|
492
511
|
|
|
493
512
|
def subscribe_all_video(self, options: VideoSubscriptionOptions):
|
|
494
|
-
|
|
513
|
+
if options is None:
|
|
514
|
+
inner = VideoSubscriptionOptionsInner()
|
|
515
|
+
else:
|
|
516
|
+
inner = VideoSubscriptionOptionsInner.create(options)
|
|
517
|
+
ret = agora_local_user_subscribe_all_video(self.user_handle, ctypes.byref(inner))
|
|
495
518
|
return ret
|
|
496
519
|
|
|
497
520
|
def unsubscribe_video(self, user_id):
|
|
498
|
-
|
|
499
|
-
|
|
521
|
+
if user_id is None:
|
|
522
|
+
return -1
|
|
523
|
+
uid_str = user_id.encode('utf-8')
|
|
524
|
+
ret = agora_local_user_unsubscribe_video(self.user_handle, ctypes.c_char_p(uid_str))
|
|
500
525
|
if ret < 0:
|
|
501
526
|
logger.error("Failed to unsubscribe video")
|
|
502
527
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: agora_python_server_sdk
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.7
|
|
4
4
|
Summary: A Python SDK for Agora Server
|
|
5
5
|
Home-page: https://github.com/AgoraIO-Extensions/Agora-Python-Server-SDK
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -51,6 +51,12 @@ python agora_rtc/examples/example_audio_pcm_send.py --appId=xxx --channelId=xxx
|
|
|
51
51
|
|
|
52
52
|
# Change log
|
|
53
53
|
|
|
54
|
+
2024.12.17 Release 2.1.7
|
|
55
|
+
--Changes:
|
|
56
|
+
|
|
57
|
+
Fixed the typeError issue in LocalUser::sub/unsub audio/video.
|
|
58
|
+
Adjusted the default stopRecogCount for VAD from 30 to 50.
|
|
59
|
+
Modified sample_vad.
|
|
54
60
|
## 2024.12.09 Release 2.1.6
|
|
55
61
|
- New Features:
|
|
56
62
|
-- Added AudioVadManager to manage VAD (Voice Activity Detection) instances.
|
|
@@ -279,3 +285,41 @@ Store the LLM results in a cache as they are received.
|
|
|
279
285
|
Perform a reverse scan of the cached data to find the most recent punctuation mark.
|
|
280
286
|
Truncate the data from the start to the most recent punctuation mark and pass it to TTS for synthesis.
|
|
281
287
|
Remove the truncated data from the cache. The remaining data should be moved to the beginning of the cache and continue waiting for additional data from the LLM.
|
|
288
|
+
|
|
289
|
+
##VAD Configuration Parameters
|
|
290
|
+
AgoraAudioVadConfigV2 Properties
|
|
291
|
+
|
|
292
|
+
Property Name Type Description Default Value Value Range
|
|
293
|
+
preStartRecognizeCount int Number of audio frames saved before detecting speech 16 [0, ]
|
|
294
|
+
startRecognizeCount int Total number of audio frames to detect speech start 30 [1, max]
|
|
295
|
+
stopRecognizeCount int Number of audio frames to detect speech stop 50 [1, max]
|
|
296
|
+
activePercent float Percentage of active frames in startRecognizeCount frames 0.7 [0.0, 1.0]
|
|
297
|
+
inactivePercent float Percentage of inactive frames in stopRecognizeCount frames 0.5 [0.0, 1.0]
|
|
298
|
+
startVoiceProb int Probability that an audio frame contains human voice 70 [0, 100]
|
|
299
|
+
stopVoiceProb int Probability that an audio frame contains human voice 70 [0, 100]
|
|
300
|
+
startRmsThreshold int Energy dB threshold for detecting speech start -50 [-100, 0]
|
|
301
|
+
stopRmsThreshold int Energy dB threshold for detecting speech stop -50 [-100, 0]
|
|
302
|
+
Notes:
|
|
303
|
+
startRmsThreshold and stopRmsThreshold:
|
|
304
|
+
|
|
305
|
+
The higher the value, the louder the speaker's voice needs to be compared to the surrounding background noise.
|
|
306
|
+
In quiet environments, it is recommended to use the default value of -50.
|
|
307
|
+
In noisy environments, you can increase the threshold to between -40 and -30 to reduce false positives.
|
|
308
|
+
Adjusting these thresholds based on the actual use case and audio characteristics can achieve optimal performance.
|
|
309
|
+
stopRecognizeCount:
|
|
310
|
+
|
|
311
|
+
This value reflects how long to wait after detecting non-human voice before concluding that the user has stopped speaking. It controls the gap between consecutive speech utterances. Within this gap, VAD will treat adjacent sentences as part of the same speech.
|
|
312
|
+
A shorter gap will increase the likelihood of adjacent sentences being recognized as separate speech segments. Typically, it is recommended to set this value between 50 and 80.
|
|
313
|
+
For example: "Good afternoon, [interval_between_sentences] what are some fun places to visit in Beijing?"
|
|
314
|
+
|
|
315
|
+
If the interval_between_sentences between the speaker's phrases is greater than the stopRecognizeCount, the VAD will recognize the above as two separate VADs:
|
|
316
|
+
|
|
317
|
+
VAD1: Good afternoon
|
|
318
|
+
VAD2: What are some fun places to visit in Beijing?
|
|
319
|
+
If the interval_between_sentences is less than stopRecognizeCount, the VAD will recognize the above as a single VAD:
|
|
320
|
+
|
|
321
|
+
VAD: Good afternoon, what are some fun places to visit in Beijing?
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
If latency is a concern, you can lower this value, or consult with the development team to determine how to manage latency while ensuring semantic continuity in speech recognition. This will help avoid the AI being interrupted too sensitively.
|
|
@@ -45,7 +45,7 @@ class CustomInstallCommand(install):
|
|
|
45
45
|
|
|
46
46
|
setup(
|
|
47
47
|
name='agora_python_server_sdk',
|
|
48
|
-
version='2.1.
|
|
48
|
+
version='2.1.7',
|
|
49
49
|
description='A Python SDK for Agora Server',
|
|
50
50
|
long_description=open('README.md').read(),
|
|
51
51
|
long_description_content_type='text/markdown',
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/agora_parameter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_frame_observer.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_pcm_data_sender.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_sessionctrl.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/audio_vad_manager.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/local_audio_track.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/local_user_observer.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/local_video_track.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/media_node_factory.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/remote_audio_track.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/remote_video_track.py
RENAMED
|
File without changes
|
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/rtc_connection_observer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/video_frame_observer.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/video_frame_sender.py
RENAMED
|
File without changes
|
{agora_python_server_sdk-2.1.6 → agora_python_server_sdk-2.1.7}/agora/rtc/voice_detection.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|