livekit-plugins-azure 0.3.0.dev7__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/PKG-INFO +1 -1
  2. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/__init__.py +3 -4
  3. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/stt.py +10 -1
  4. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/tts.py +36 -16
  5. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/version.py +1 -1
  6. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit_plugins_azure.egg-info/PKG-INFO +1 -1
  7. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/README.md +0 -0
  8. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/log.py +0 -0
  9. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit/plugins/azure/py.typed +0 -0
  10. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit_plugins_azure.egg-info/SOURCES.txt +0 -0
  11. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit_plugins_azure.egg-info/dependency_links.txt +0 -0
  12. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit_plugins_azure.egg-info/requires.txt +0 -0
  13. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/livekit_plugins_azure.egg-info/top_level.txt +0 -0
  14. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/pyproject.toml +0 -0
  15. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/setup.cfg +0 -0
  16. {livekit_plugins_azure-0.3.0.dev7 → livekit_plugins_azure-0.3.2}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-azure
3
- Version: 0.3.0.dev7
3
+ Version: 0.3.2
4
4
  Summary: Agent Framework plugin for services from Azure
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -18,13 +18,12 @@ __all__ = ["STT", "SpeechStream", "TTS", "__version__"]
18
18
 
19
19
  from livekit.agents import Plugin
20
20
 
21
+ from .log import logger
22
+
21
23
 
22
24
  class AzurePlugin(Plugin):
23
25
  def __init__(self):
24
- super().__init__(__name__, __version__, __package__)
25
-
26
- def download_files(self):
27
- pass
26
+ super().__init__(__name__, __version__, __package__, logger)
28
27
 
29
28
 
30
29
  Plugin.register_plugin(AzurePlugin())
@@ -16,6 +16,7 @@ import asyncio
16
16
  import os
17
17
  from dataclasses import dataclass
18
18
 
19
+ from livekit import rtc
19
20
  from livekit.agents import stt, utils
20
21
 
21
22
  import azure.cognitiveservices.speech as speechsdk # type: ignore
@@ -44,6 +45,13 @@ class STT(stt.STT):
44
45
  num_channels: int = 1,
45
46
  languages: list[str] = [], # when empty, auto-detect the language
46
47
  ):
48
+ """
49
+ Create a new instance of Azure STT.
50
+
51
+ ``speech_key`` and ``speech_region`` must be set, either using arguments or by setting the
52
+ ``AZURE_SPEECH_KEY`` and ``AZURE_SPEECH_REGION`` environmental variables, respectively.
53
+ """
54
+
47
55
  super().__init__(
48
56
  capabilities=stt.STTCapabilities(streaming=True, interim_results=True)
49
57
  )
@@ -102,7 +110,8 @@ class SpeechStream(stt.SpeechStream):
102
110
  async def _main_task(self) -> None:
103
111
  try:
104
112
  async for input in self._input_ch:
105
- self._stream.write(input.data.tobytes())
113
+ if isinstance(input, rtc.AudioFrame):
114
+ self._stream.write(input.data.tobytes())
106
115
 
107
116
  self._stream.close()
108
117
  await self._done_event.wait()
@@ -16,7 +16,6 @@ import asyncio
16
16
  import os
17
17
  from dataclasses import dataclass
18
18
 
19
- from livekit import rtc
20
19
  from livekit.agents import tts, utils
21
20
 
22
21
  import azure.cognitiveservices.speech as speechsdk # type: ignore
@@ -42,6 +41,13 @@ class TTS(tts.TTS):
42
41
  speech_region: str | None = None,
43
42
  voice: str | None = None,
44
43
  ) -> None:
44
+ """
45
+ Create a new instance of Azure TTS.
46
+
47
+ ``speech_key`` and ``speech_region`` must be set, either using arguments or by setting the
48
+ ``AZURE_SPEECH_KEY`` and ``AZURE_SPEECH_REGION`` environmental variables, respectively.
49
+ """
50
+
45
51
  super().__init__(
46
52
  capabilities=tts.TTSCapabilities(
47
53
  streaming=False,
@@ -73,17 +79,18 @@ class ChunkedStream(tts.ChunkedStream):
73
79
 
74
80
  @utils.log_exceptions()
75
81
  async def _main_task(self):
76
- stream_callback = _PushAudioOutputStreamCallback(
77
- asyncio.get_running_loop(), self._event_ch
82
+ stream_callback = speechsdk.audio.PushAudioOutputStream(
83
+ _PushAudioOutputStreamCallback(asyncio.get_running_loop(), self._event_ch)
78
84
  )
79
85
  synthesizer = _create_speech_synthesizer(
80
86
  config=self._opts,
81
- stream=speechsdk.audio.PushAudioOutputStream(stream_callback),
87
+ stream=stream_callback,
82
88
  )
83
89
 
84
90
  def _synthesize() -> speechsdk.SpeechSynthesisResult:
85
91
  return synthesizer.speak_text_async(self._text).get() # type: ignore
86
92
 
93
+ result = None
87
94
  try:
88
95
  result = await asyncio.to_thread(_synthesize)
89
96
  if result.reason != speechsdk.ResultReason.SynthesizingAudioCompleted:
@@ -93,8 +100,11 @@ class ChunkedStream(tts.ChunkedStream):
93
100
  finally:
94
101
 
95
102
  def _cleanup() -> None:
96
- nonlocal synthesizer, result
103
+ # cleanup resources inside an Executor
104
+ # to avoid blocking the event loop
105
+ nonlocal synthesizer, stream_callback, result
97
106
  del synthesizer
107
+ del stream_callback
98
108
  del result
99
109
 
100
110
  await asyncio.to_thread(_cleanup)
@@ -112,20 +122,30 @@ class _PushAudioOutputStreamCallback(speechsdk.audio.PushAudioOutputStreamCallba
112
122
  self._request_id = utils.shortuuid()
113
123
  self._segment_id = utils.shortuuid()
114
124
 
115
- def write(self, audio_buffer: memoryview) -> int:
116
- audio = tts.SynthesizedAudio(
117
- request_id=self._request_id,
118
- segment_id=self._segment_id,
119
- frame=rtc.AudioFrame(
120
- data=audio_buffer,
121
- sample_rate=AZURE_SAMPLE_RATE,
122
- num_channels=AZURE_NUM_CHANNELS,
123
- samples_per_channel=audio_buffer.nbytes // 2,
124
- ),
125
+ self._bstream = utils.audio.AudioByteStream(
126
+ sample_rate=AZURE_SAMPLE_RATE, num_channels=AZURE_NUM_CHANNELS
125
127
  )
126
- self._loop.call_soon_threadsafe(self._event_ch.send_nowait, audio)
128
+
129
+ def write(self, audio_buffer: memoryview) -> int:
130
+ for frame in self._bstream.write(audio_buffer.tobytes()):
131
+ audio = tts.SynthesizedAudio(
132
+ request_id=self._request_id,
133
+ segment_id=self._segment_id,
134
+ frame=frame,
135
+ )
136
+ self._loop.call_soon_threadsafe(self._event_ch.send_nowait, audio)
137
+
127
138
  return audio_buffer.nbytes
128
139
 
140
+ def close(self) -> None:
141
+ for frame in self._bstream.flush():
142
+ audio = tts.SynthesizedAudio(
143
+ request_id=self._request_id,
144
+ segment_id=self._segment_id,
145
+ frame=frame,
146
+ )
147
+ self._loop.call_soon_threadsafe(self._event_ch.send_nowait, audio)
148
+
129
149
 
130
150
  def _create_speech_synthesizer(
131
151
  *, config: _TTSOptions, stream: speechsdk.audio.AudioOutputStream
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.3.0-dev.7"
15
+ __version__ = "0.3.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-azure
3
- Version: 0.3.0.dev7
3
+ Version: 0.3.2
4
4
  Summary: Agent Framework plugin for services from Azure
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0