PyPI - livekit-plugins-azure - Versions diffs - 0.3.0.dev7__tar.gz → 0.3.2__tar.gz - Mend

livekit-plugins-azure 0.3.0.dev7tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-azure
-Version: 0.3.0.dev7
+Version: 0.3.2
 Summary: Agent Framework plugin for services from Azure
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0

{livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/__init__.py RENAMED Viewed

@@ -18,13 +18,12 @@ __all__ = ["STT", "SpeechStream", "TTS", "__version__"]
 from livekit.agents import Plugin
+from .log import logger
 class AzurePlugin(Plugin):
     def __init__(self):
-        super().__init__(__name__, __version__, __package__)
-    def download_files(self):
-        pass
+        super().__init__(__name__, __version__, __package__, logger)
 Plugin.register_plugin(AzurePlugin())

{livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/stt.py RENAMED Viewed

@@ -16,6 +16,7 @@ import asyncio
 import os
 from dataclasses import dataclass
+from livekit import rtc
 from livekit.agents import stt, utils
 import azure.cognitiveservices.speech as speechsdk  # type: ignore
@@ -44,6 +45,13 @@ class STT(stt.STT):
         num_channels: int = 1,
         languages: list[str] = [],  # when empty, auto-detect the language
     ):
+        """
+        Create a new instance of Azure STT.
+        ``speech_key`` and ``speech_region`` must be set, either using arguments or by setting the
+        ``AZURE_SPEECH_KEY`` and ``AZURE_SPEECH_REGION`` environmental variables, respectively.
+        """
         super().__init__(
             capabilities=stt.STTCapabilities(streaming=True, interim_results=True)
         )
@@ -102,7 +110,8 @@ class SpeechStream(stt.SpeechStream):
     async def _main_task(self) -> None:
         try:
             async for input in self._input_ch:
-                self._stream.write(input.data.tobytes())
+                if isinstance(input, rtc.AudioFrame):
+                    self._stream.write(input.data.tobytes())
             self._stream.close()
             await self._done_event.wait()

{livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/tts.py RENAMED Viewed

@@ -16,7 +16,6 @@ import asyncio
 import os
 from dataclasses import dataclass
-from livekit import rtc
 from livekit.agents import tts, utils
 import azure.cognitiveservices.speech as speechsdk  # type: ignore
@@ -42,6 +41,13 @@ class TTS(tts.TTS):
         speech_region: str | None = None,
         voice: str | None = None,
     ) -> None:
+        """
+        Create a new instance of Azure TTS.
+        ``speech_key`` and ``speech_region`` must be set, either using arguments or by setting the
+        ``AZURE_SPEECH_KEY`` and ``AZURE_SPEECH_REGION`` environmental variables, respectively.
+        """
         super().__init__(
             capabilities=tts.TTSCapabilities(
                 streaming=False,
@@ -73,17 +79,18 @@ class ChunkedStream(tts.ChunkedStream):
     @utils.log_exceptions()
     async def _main_task(self):
-        stream_callback = _PushAudioOutputStreamCallback(
-            asyncio.get_running_loop(), self._event_ch
+        stream_callback = speechsdk.audio.PushAudioOutputStream(
+            _PushAudioOutputStreamCallback(asyncio.get_running_loop(), self._event_ch)
         )
         synthesizer = _create_speech_synthesizer(
             config=self._opts,
-            stream=speechsdk.audio.PushAudioOutputStream(stream_callback),
+            stream=stream_callback,
         )
         def _synthesize() -> speechsdk.SpeechSynthesisResult:
             return synthesizer.speak_text_async(self._text).get()  # type: ignore
+        result = None
         try:
             result = await asyncio.to_thread(_synthesize)
             if result.reason != speechsdk.ResultReason.SynthesizingAudioCompleted:
@@ -93,8 +100,11 @@ class ChunkedStream(tts.ChunkedStream):
         finally:
             def _cleanup() -> None:
-                nonlocal synthesizer, result
+                # cleanup resources inside an Executor
+                # to avoid blocking the event loop
+                nonlocal synthesizer, stream_callback, result
                 del synthesizer
+                del stream_callback
                 del result
             await asyncio.to_thread(_cleanup)
@@ -112,20 +122,30 @@ class _PushAudioOutputStreamCallback(speechsdk.audio.PushAudioOutputStreamCallba
         self._request_id = utils.shortuuid()
         self._segment_id = utils.shortuuid()
-    def write(self, audio_buffer: memoryview) -> int:
-        audio = tts.SynthesizedAudio(
-            request_id=self._request_id,
-            segment_id=self._segment_id,
-            frame=rtc.AudioFrame(
-                data=audio_buffer,
-                sample_rate=AZURE_SAMPLE_RATE,
-                num_channels=AZURE_NUM_CHANNELS,
-                samples_per_channel=audio_buffer.nbytes // 2,
-            ),
+        self._bstream = utils.audio.AudioByteStream(
+            sample_rate=AZURE_SAMPLE_RATE, num_channels=AZURE_NUM_CHANNELS
         )
-        self._loop.call_soon_threadsafe(self._event_ch.send_nowait, audio)
+    def write(self, audio_buffer: memoryview) -> int:
+        for frame in self._bstream.write(audio_buffer.tobytes()):
+            audio = tts.SynthesizedAudio(
+                request_id=self._request_id,
+                segment_id=self._segment_id,
+                frame=frame,
+            )
+            self._loop.call_soon_threadsafe(self._event_ch.send_nowait, audio)
         return audio_buffer.nbytes
+    def close(self) -> None:
+        for frame in self._bstream.flush():
+            audio = tts.SynthesizedAudio(
+                request_id=self._request_id,
+                segment_id=self._segment_id,
+                frame=frame,
+            )
+            self._loop.call_soon_threadsafe(self._event_ch.send_nowait, audio)
 def _create_speech_synthesizer(
     *, config: _TTSOptions, stream: speechsdk.audio.AudioOutputStream

{livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.3.0-dev.7"
+__version__ = "0.3.2"

{livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit_plugins_azure.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-azure
-Version: 0.3.0.dev7
+Version: 0.3.2
 Summary: Agent Framework plugin for services from Azure
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0