PyPI - livekit-plugins-openai - Versions diffs - 0.4.dev1__tar.gz → 0.5.dev0__tar.gz - Mend

livekit-plugins-openai 0.4.dev1tar.gz → 0.5.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-openai
-Version: 0.4.dev1
+Version: 0.5.dev0
 Summary: Agent Framework plugin for services from OpenAI
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9.0
 Description-Content-Type: text/markdown
 Requires-Dist: livekit~=0.11
-Requires-Dist: livekit-agents~=0.6.dev1
+Requires-Dist: livekit-agents~=0.7.dev0
 Requires-Dist: openai>=1.0.0
 Requires-Dist: requests<3,>=2

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/llm.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import asyncio
 import enum
 import functools
@@ -23,7 +25,7 @@ class LLM(llm.LLM):
     def __init__(
         self,
         *,
-        model: str | ChatModels = "gpt-4-turbo",
+        model: str | ChatModels = "gpt-4o",
         client: openai.AsyncClient | None = None,
     ) -> None:
         self._opts = LLMOptions(model=model)
@@ -150,9 +152,13 @@ class LLMStream(llm.LLMStream):
         fnc = fncs[name]
         # validate args before calling fnc
         for arg in fnc.args.values():
-            if arg.default is inspect.Parameter.empty and arg.name not in args:
-                logger.error(f"missing required arg {arg.name} for ai_callable {name}")
-                return
+            if arg.name not in args:
+                if arg.default is inspect.Parameter.empty:
+                    logger.error(
+                        f"missing required arg {arg.name} for ai_callable {name}"
+                    )
+                    return
+                continue
             if arg.type is bool and args[arg.name] not in (True, False):
                 logger.error(f"invalid arg {arg.name} for ai_callable {name}")
@@ -170,9 +176,11 @@ class LLMStream(llm.LLMStream):
                 logger.error(f"invalid arg {arg.name} for ai_callable {name}")
                 return
-            if issubclass(arg.type, enum.Enum) and args[arg.name] not in arg.type:
-                logger.error(f"invalid arg {arg.name} for ai_callable {name}")
-                return
+            if issubclass(arg.type, enum.Enum):
+                values = set(item.value for item in arg.type)
+                if args[arg.name] not in values:
+                    logger.error(f"invalid arg {arg.name} for ai_callable {name}")
+                    return
         logger.debug(f"calling function {name} with arguments {args}")
         self._called_functions.append(

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/models.py RENAMED Viewed

@@ -5,6 +5,8 @@ TTSModels = Literal["tts-1", "tts-1-hd"]
 TTSVoices = Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
 DalleModels = Literal["dall-e-2", "dall-e-3"]
 ChatModels = Literal[
+    "gpt-4o",
+    "gpt-4o-2024-05-13",
     "gpt-4-turbo",
     "gpt-4-turbo-2024-04-09",
     "gpt-4-turbo-preview",

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/stt.py RENAMED Viewed

@@ -20,20 +20,22 @@ import os
 import wave
 from dataclasses import dataclass
+import aiohttp
 from livekit import agents
-from livekit.agents import stt
+from livekit.agents import stt, utils
 from livekit.agents.utils import AudioBuffer
-import openai
 from .models import WhisperModels
+OPENAI_ENPOINT = "https://api.openai.com/v1/audio/transcriptions"
 @dataclass
-class STTOptions:
+class _STTOptions:
     language: str
     detect_language: bool
     model: WhisperModels
+    api_key: str
 class STT(stt.STT):
@@ -44,29 +46,36 @@ class STT(stt.STT):
         detect_language: bool = False,
         model: WhisperModels = "whisper-1",
         api_key: str | None = None,
+        http_session: aiohttp.ClientSession | None = None,
     ):
         super().__init__(streaming_supported=False)
         api_key = api_key or os.environ.get("OPENAI_API_KEY")
         if not api_key:
             raise ValueError("OPENAI_API_KEY must be set")
-        self._client = openai.AsyncOpenAI(api_key=api_key)
         if detect_language:
             language = ""
-        self._config = STTOptions(
+        self._opts = _STTOptions(
             language=language,
             detect_language=detect_language,
             model=model,
+            api_key=api_key,
         )
+        self._session = http_session
+    def _ensure_session(self) -> aiohttp.ClientSession:
+        if not self._session:
+            self._session = utils.http_session()
+        return self._session
     def _sanitize_options(
         self,
         *,
         language: str | None = None,
-    ) -> STTOptions:
-        config = dataclasses.replace(self._config)
+    ) -> _STTOptions:
+        config = dataclasses.replace(self._opts)
         config.language = language or config.language
         return config
@@ -86,17 +95,29 @@ class STT(stt.STT):
             wav.setframerate(buffer.sample_rate)
             wav.writeframes(buffer.data)
-        resp = await self._client.audio.transcriptions.create(
-            file=("a.wav", io_buffer),
-            model=config.model,
-            language=config.language,
-            response_format="json",
-        )
-        return transcription_to_speech_event(resp, config.language)
+        form = aiohttp.FormData()
+        form.add_field("file", io_buffer.getvalue(), filename="my_file.wav")
+        form.add_field("model", config.model)
+        if config.language:
+            form.add_field("language", config.language)
+        form.add_field("response_format", "json")
+        async with self._ensure_session().post(
+            OPENAI_ENPOINT,
+            headers={"Authorization": f"Bearer {config.api_key}"},
+            data=form,
+        ) as resp:
+            data = await resp.json()
+            if "text" not in data or "error" in data:
+                raise ValueError(f"Unexpected response: {data}")
+            return _transcription_to_speech_event(data, config.language)
-def transcription_to_speech_event(transcription, language) -> stt.SpeechEvent:
+def _transcription_to_speech_event(transcription: dict, language) -> stt.SpeechEvent:
     return stt.SpeechEvent(
         type=stt.SpeechEventType.FINAL_TRANSCRIPT,
-        alternatives=[stt.SpeechData(text=transcription.text, language=language)],
+        alternatives=[stt.SpeechData(text=transcription["text"], language=language)],
     )

livekit_plugins_openai-0.5.dev0/livekit/plugins/openai/tts.py ADDED Viewed

@@ -0,0 +1,127 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import asyncio
+import contextlib
+import os
+from dataclasses import dataclass
+from typing import Optional
+import aiohttp
+from livekit.agents import codecs, tts, utils
+from .log import logger
+from .models import TTSModels, TTSVoices
+OPENAI_TTS_SAMPLE_RATE = 24000
+OPENAI_TTS_CHANNELS = 1
+OPENAI_ENPOINT = "https://api.openai.com/v1/audio/speech"
+@dataclass
+class _TTSOptions:
+    model: TTSModels
+    voice: TTSVoices
+    api_key: str
+class TTS(tts.TTS):
+    def __init__(
+        self,
+        *,
+        model: TTSModels = "tts-1",
+        voice: TTSVoices = "alloy",
+        api_key: str | None = None,
+        http_session: aiohttp.ClientSession | None = None,
+    ) -> None:
+        super().__init__(
+            streaming_supported=False,
+            sample_rate=OPENAI_TTS_SAMPLE_RATE,
+            num_channels=OPENAI_TTS_CHANNELS,
+        )
+        api_key = api_key or os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError("OPENAI_API_KEY must be set")
+        self._opts = _TTSOptions(model=model, voice=voice, api_key=api_key)
+        self._session = http_session
+    def _ensure_session(self) -> aiohttp.ClientSession:
+        if not self._session:
+            self._session = utils.http_session()
+        return self._session
+    def synthesize(
+        self,
+        text: str,
+    ) -> "ChunkedStream":
+        return ChunkedStream(text, self._opts, self._ensure_session())
+class ChunkedStream(tts.ChunkedStream):
+    def __init__(
+        self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
+    ) -> None:
+        self._opts = opts
+        self._text = text
+        self._session = session
+        self._decoder = codecs.Mp3StreamDecoder()
+        self._main_task: asyncio.Task | None = None
+        self._queue = asyncio.Queue[Optional[tts.SynthesizedAudio]]()
+    async def _run(self):
+        try:
+            async with self._session.post(
+                OPENAI_ENPOINT,
+                headers={"Authorization": f"Bearer {self._opts.api_key}"},
+                json={
+                    "input": self._text,
+                    "model": self._opts.model,
+                    "voice": self._opts.voice,
+                    "response_format": "mp3",
+                },
+            ) as resp:
+                async for data, _ in resp.content.iter_chunks():
+                    frames = self._decoder.decode_chunk(data)
+                    for frame in frames:
+                        self._queue.put_nowait(
+                            tts.SynthesizedAudio(text="", data=frame)
+                        )
+        except Exception:
+            logger.exception("openai tts main task failed in chunked stream")
+        finally:
+            self._queue.put_nowait(None)
+    async def __anext__(self) -> tts.SynthesizedAudio:
+        if not self._main_task:
+            self._main_task = asyncio.create_task(self._run())
+        frame = await self._queue.get()
+        if frame is None:
+            raise StopAsyncIteration
+        return frame
+    async def aclose(self) -> None:
+        if not self._main_task:
+            return
+        self._main_task.cancel()
+        with contextlib.suppress(asyncio.CancelledError):
+            await self._main_task

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.4.dev1"
+__version__ = "0.5.dev0"

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit_plugins_openai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-openai
-Version: 0.4.dev1
+Version: 0.5.dev0
 Summary: Agent Framework plugin for services from OpenAI
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9.0
 Description-Content-Type: text/markdown
 Requires-Dist: livekit~=0.11
-Requires-Dist: livekit-agents~=0.6.dev1
+Requires-Dist: livekit-agents~=0.7.dev0
 Requires-Dist: openai>=1.0.0
 Requires-Dist: requests<3,>=2

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit_plugins_openai.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,4 @@
 livekit~=0.11
-livekit-agents~=0.6.dev1
+livekit-agents~=0.7.dev0
 openai>=1.0.0
 requests<3,>=2

{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/setup.py RENAMED Viewed

@@ -49,7 +49,7 @@ setuptools.setup(
     python_requires=">=3.9.0",
     install_requires=[
         "livekit ~= 0.11",
-        "livekit-agents~=0.6.dev1",
+        "livekit-agents~=0.7.dev0",
         "openai >= 1.0.0",
         "requests >= 2, < 3",
     ],

livekit_plugins_openai-0.4.dev1/livekit/plugins/openai/tts.py DELETED Viewed

@@ -1,75 +0,0 @@
-# Copyright 2023 LiveKit, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-from typing import AsyncIterable, Optional
-import aiohttp
-from livekit.agents import codecs, tts
-from .models import TTSModels, TTSVoices
-OPENAI_TTS_SAMPLE_RATE = 24000
-OPENAI_TTS_CHANNELS = 1
-OPENAI_ENPOINT = "https://api.openai.com/v1/audio/speech"
-class TTS(tts.TTS):
-    def __init__(
-        self, model: TTSModels, voice: TTSVoices, api_key: Optional[str] = None
-    ) -> None:
-        super().__init__(
-            streaming_supported=False,
-            sample_rate=OPENAI_TTS_SAMPLE_RATE,
-            num_channels=OPENAI_TTS_CHANNELS,
-        )
-        api_key = api_key or os.environ.get("OPENAI_API_KEY")
-        if not api_key:
-            raise ValueError("OPENAI_API_KEY must be set")
-        # TODO: we want to reuse aiohttp sessions
-        # for improved latency but doing so doesn't
-        # give us a clean way to close the session.
-        # Perhaps we introduce a close method to TTS?
-        # We also probalby want to send a warmup HEAD
-        # request after we create this
-        self._session = aiohttp.ClientSession(
-            headers={"Authorization": f"Bearer {api_key}"}
-        )
-        self._model = model
-        self._voice = voice
-    def synthesize(
-        self,
-        text: str,
-    ) -> AsyncIterable[tts.SynthesizedAudio]:
-        decoder = codecs.Mp3StreamDecoder()
-        async def generator():
-            async with self._session.post(
-                OPENAI_ENPOINT,
-                json={
-                    "input": text,
-                    "model": self._model,
-                    "voice": self._voice,
-                    "response_format": "mp3",
-                },
-            ) as resp:
-                async for data in resp.content.iter_chunked(4096):
-                    frames = decoder.decode_chunk(data)
-                    for frame in frames:
-                        yield tts.SynthesizedAudio(text=text, data=frame)
-        return generator()