livekit-plugins-openai 0.4.dev1__tar.gz → 0.5.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/PKG-INFO +2 -2
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/llm.py +15 -7
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/models.py +2 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/stt.py +39 -18
- livekit_plugins_openai-0.5.dev0/livekit/plugins/openai/tts.py +127 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/version.py +1 -1
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit_plugins_openai.egg-info/PKG-INFO +2 -2
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit_plugins_openai.egg-info/requires.txt +1 -1
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/setup.py +1 -1
- livekit_plugins_openai-0.4.dev1/livekit/plugins/openai/tts.py +0 -75
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/README.md +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/__init__.py +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/log.py +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/py.typed +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit_plugins_openai.egg-info/SOURCES.txt +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit_plugins_openai.egg-info/dependency_links.txt +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit_plugins_openai.egg-info/top_level.txt +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/pyproject.toml +0 -0
- {livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: livekit-plugins-openai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.dev0
|
|
4
4
|
Summary: Agent Framework plugin for services from OpenAI
|
|
5
5
|
Home-page: https://github.com/livekit/agents
|
|
6
6
|
License: Apache-2.0
|
|
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
|
20
20
|
Requires-Python: >=3.9.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
Requires-Dist: livekit~=0.11
|
|
23
|
-
Requires-Dist: livekit-agents~=0.
|
|
23
|
+
Requires-Dist: livekit-agents~=0.7.dev0
|
|
24
24
|
Requires-Dist: openai>=1.0.0
|
|
25
25
|
Requires-Dist: requests<3,>=2
|
|
26
26
|
|
{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/llm.py
RENAMED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import asyncio
|
|
2
4
|
import enum
|
|
3
5
|
import functools
|
|
@@ -23,7 +25,7 @@ class LLM(llm.LLM):
|
|
|
23
25
|
def __init__(
|
|
24
26
|
self,
|
|
25
27
|
*,
|
|
26
|
-
model: str | ChatModels = "gpt-
|
|
28
|
+
model: str | ChatModels = "gpt-4o",
|
|
27
29
|
client: openai.AsyncClient | None = None,
|
|
28
30
|
) -> None:
|
|
29
31
|
self._opts = LLMOptions(model=model)
|
|
@@ -150,9 +152,13 @@ class LLMStream(llm.LLMStream):
|
|
|
150
152
|
fnc = fncs[name]
|
|
151
153
|
# validate args before calling fnc
|
|
152
154
|
for arg in fnc.args.values():
|
|
153
|
-
if arg.
|
|
154
|
-
|
|
155
|
-
|
|
155
|
+
if arg.name not in args:
|
|
156
|
+
if arg.default is inspect.Parameter.empty:
|
|
157
|
+
logger.error(
|
|
158
|
+
f"missing required arg {arg.name} for ai_callable {name}"
|
|
159
|
+
)
|
|
160
|
+
return
|
|
161
|
+
continue
|
|
156
162
|
|
|
157
163
|
if arg.type is bool and args[arg.name] not in (True, False):
|
|
158
164
|
logger.error(f"invalid arg {arg.name} for ai_callable {name}")
|
|
@@ -170,9 +176,11 @@ class LLMStream(llm.LLMStream):
|
|
|
170
176
|
logger.error(f"invalid arg {arg.name} for ai_callable {name}")
|
|
171
177
|
return
|
|
172
178
|
|
|
173
|
-
if issubclass(arg.type, enum.Enum)
|
|
174
|
-
|
|
175
|
-
|
|
179
|
+
if issubclass(arg.type, enum.Enum):
|
|
180
|
+
values = set(item.value for item in arg.type)
|
|
181
|
+
if args[arg.name] not in values:
|
|
182
|
+
logger.error(f"invalid arg {arg.name} for ai_callable {name}")
|
|
183
|
+
return
|
|
176
184
|
|
|
177
185
|
logger.debug(f"calling function {name} with arguments {args}")
|
|
178
186
|
self._called_functions.append(
|
{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/models.py
RENAMED
|
@@ -5,6 +5,8 @@ TTSModels = Literal["tts-1", "tts-1-hd"]
|
|
|
5
5
|
TTSVoices = Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
|
|
6
6
|
DalleModels = Literal["dall-e-2", "dall-e-3"]
|
|
7
7
|
ChatModels = Literal[
|
|
8
|
+
"gpt-4o",
|
|
9
|
+
"gpt-4o-2024-05-13",
|
|
8
10
|
"gpt-4-turbo",
|
|
9
11
|
"gpt-4-turbo-2024-04-09",
|
|
10
12
|
"gpt-4-turbo-preview",
|
{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/stt.py
RENAMED
|
@@ -20,20 +20,22 @@ import os
|
|
|
20
20
|
import wave
|
|
21
21
|
from dataclasses import dataclass
|
|
22
22
|
|
|
23
|
+
import aiohttp
|
|
23
24
|
from livekit import agents
|
|
24
|
-
from livekit.agents import stt
|
|
25
|
+
from livekit.agents import stt, utils
|
|
25
26
|
from livekit.agents.utils import AudioBuffer
|
|
26
27
|
|
|
27
|
-
import openai
|
|
28
|
-
|
|
29
28
|
from .models import WhisperModels
|
|
30
29
|
|
|
30
|
+
OPENAI_ENPOINT = "https://api.openai.com/v1/audio/transcriptions"
|
|
31
|
+
|
|
31
32
|
|
|
32
33
|
@dataclass
|
|
33
|
-
class
|
|
34
|
+
class _STTOptions:
|
|
34
35
|
language: str
|
|
35
36
|
detect_language: bool
|
|
36
37
|
model: WhisperModels
|
|
38
|
+
api_key: str
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
class STT(stt.STT):
|
|
@@ -44,29 +46,36 @@ class STT(stt.STT):
|
|
|
44
46
|
detect_language: bool = False,
|
|
45
47
|
model: WhisperModels = "whisper-1",
|
|
46
48
|
api_key: str | None = None,
|
|
49
|
+
http_session: aiohttp.ClientSession | None = None,
|
|
47
50
|
):
|
|
48
51
|
super().__init__(streaming_supported=False)
|
|
49
52
|
api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
50
53
|
if not api_key:
|
|
51
54
|
raise ValueError("OPENAI_API_KEY must be set")
|
|
52
55
|
|
|
53
|
-
self._client = openai.AsyncOpenAI(api_key=api_key)
|
|
54
|
-
|
|
55
56
|
if detect_language:
|
|
56
57
|
language = ""
|
|
57
58
|
|
|
58
|
-
self.
|
|
59
|
+
self._opts = _STTOptions(
|
|
59
60
|
language=language,
|
|
60
61
|
detect_language=detect_language,
|
|
61
62
|
model=model,
|
|
63
|
+
api_key=api_key,
|
|
62
64
|
)
|
|
65
|
+
self._session = http_session
|
|
66
|
+
|
|
67
|
+
def _ensure_session(self) -> aiohttp.ClientSession:
|
|
68
|
+
if not self._session:
|
|
69
|
+
self._session = utils.http_session()
|
|
70
|
+
|
|
71
|
+
return self._session
|
|
63
72
|
|
|
64
73
|
def _sanitize_options(
|
|
65
74
|
self,
|
|
66
75
|
*,
|
|
67
76
|
language: str | None = None,
|
|
68
|
-
) ->
|
|
69
|
-
config = dataclasses.replace(self.
|
|
77
|
+
) -> _STTOptions:
|
|
78
|
+
config = dataclasses.replace(self._opts)
|
|
70
79
|
config.language = language or config.language
|
|
71
80
|
return config
|
|
72
81
|
|
|
@@ -86,17 +95,29 @@ class STT(stt.STT):
|
|
|
86
95
|
wav.setframerate(buffer.sample_rate)
|
|
87
96
|
wav.writeframes(buffer.data)
|
|
88
97
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
98
|
+
form = aiohttp.FormData()
|
|
99
|
+
form.add_field("file", io_buffer.getvalue(), filename="my_file.wav")
|
|
100
|
+
form.add_field("model", config.model)
|
|
101
|
+
|
|
102
|
+
if config.language:
|
|
103
|
+
form.add_field("language", config.language)
|
|
104
|
+
|
|
105
|
+
form.add_field("response_format", "json")
|
|
106
|
+
|
|
107
|
+
async with self._ensure_session().post(
|
|
108
|
+
OPENAI_ENPOINT,
|
|
109
|
+
headers={"Authorization": f"Bearer {config.api_key}"},
|
|
110
|
+
data=form,
|
|
111
|
+
) as resp:
|
|
112
|
+
data = await resp.json()
|
|
113
|
+
if "text" not in data or "error" in data:
|
|
114
|
+
raise ValueError(f"Unexpected response: {data}")
|
|
115
|
+
|
|
116
|
+
return _transcription_to_speech_event(data, config.language)
|
|
96
117
|
|
|
97
118
|
|
|
98
|
-
def
|
|
119
|
+
def _transcription_to_speech_event(transcription: dict, language) -> stt.SpeechEvent:
|
|
99
120
|
return stt.SpeechEvent(
|
|
100
121
|
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
101
|
-
alternatives=[stt.SpeechData(text=transcription
|
|
122
|
+
alternatives=[stt.SpeechData(text=transcription["text"], language=language)],
|
|
102
123
|
)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import contextlib
|
|
19
|
+
import os
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
import aiohttp
|
|
24
|
+
from livekit.agents import codecs, tts, utils
|
|
25
|
+
|
|
26
|
+
from .log import logger
|
|
27
|
+
from .models import TTSModels, TTSVoices
|
|
28
|
+
|
|
29
|
+
OPENAI_TTS_SAMPLE_RATE = 24000
|
|
30
|
+
OPENAI_TTS_CHANNELS = 1
|
|
31
|
+
OPENAI_ENPOINT = "https://api.openai.com/v1/audio/speech"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class _TTSOptions:
|
|
36
|
+
model: TTSModels
|
|
37
|
+
voice: TTSVoices
|
|
38
|
+
api_key: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class TTS(tts.TTS):
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
*,
|
|
45
|
+
model: TTSModels = "tts-1",
|
|
46
|
+
voice: TTSVoices = "alloy",
|
|
47
|
+
api_key: str | None = None,
|
|
48
|
+
http_session: aiohttp.ClientSession | None = None,
|
|
49
|
+
) -> None:
|
|
50
|
+
super().__init__(
|
|
51
|
+
streaming_supported=False,
|
|
52
|
+
sample_rate=OPENAI_TTS_SAMPLE_RATE,
|
|
53
|
+
num_channels=OPENAI_TTS_CHANNELS,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
57
|
+
if not api_key:
|
|
58
|
+
raise ValueError("OPENAI_API_KEY must be set")
|
|
59
|
+
|
|
60
|
+
self._opts = _TTSOptions(model=model, voice=voice, api_key=api_key)
|
|
61
|
+
self._session = http_session
|
|
62
|
+
|
|
63
|
+
def _ensure_session(self) -> aiohttp.ClientSession:
|
|
64
|
+
if not self._session:
|
|
65
|
+
self._session = utils.http_session()
|
|
66
|
+
|
|
67
|
+
return self._session
|
|
68
|
+
|
|
69
|
+
def synthesize(
|
|
70
|
+
self,
|
|
71
|
+
text: str,
|
|
72
|
+
) -> "ChunkedStream":
|
|
73
|
+
return ChunkedStream(text, self._opts, self._ensure_session())
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ChunkedStream(tts.ChunkedStream):
|
|
77
|
+
def __init__(
|
|
78
|
+
self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
|
|
79
|
+
) -> None:
|
|
80
|
+
self._opts = opts
|
|
81
|
+
self._text = text
|
|
82
|
+
self._session = session
|
|
83
|
+
self._decoder = codecs.Mp3StreamDecoder()
|
|
84
|
+
self._main_task: asyncio.Task | None = None
|
|
85
|
+
self._queue = asyncio.Queue[Optional[tts.SynthesizedAudio]]()
|
|
86
|
+
|
|
87
|
+
async def _run(self):
|
|
88
|
+
try:
|
|
89
|
+
async with self._session.post(
|
|
90
|
+
OPENAI_ENPOINT,
|
|
91
|
+
headers={"Authorization": f"Bearer {self._opts.api_key}"},
|
|
92
|
+
json={
|
|
93
|
+
"input": self._text,
|
|
94
|
+
"model": self._opts.model,
|
|
95
|
+
"voice": self._opts.voice,
|
|
96
|
+
"response_format": "mp3",
|
|
97
|
+
},
|
|
98
|
+
) as resp:
|
|
99
|
+
async for data, _ in resp.content.iter_chunks():
|
|
100
|
+
frames = self._decoder.decode_chunk(data)
|
|
101
|
+
for frame in frames:
|
|
102
|
+
self._queue.put_nowait(
|
|
103
|
+
tts.SynthesizedAudio(text="", data=frame)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
except Exception:
|
|
107
|
+
logger.exception("openai tts main task failed in chunked stream")
|
|
108
|
+
finally:
|
|
109
|
+
self._queue.put_nowait(None)
|
|
110
|
+
|
|
111
|
+
async def __anext__(self) -> tts.SynthesizedAudio:
|
|
112
|
+
if not self._main_task:
|
|
113
|
+
self._main_task = asyncio.create_task(self._run())
|
|
114
|
+
|
|
115
|
+
frame = await self._queue.get()
|
|
116
|
+
if frame is None:
|
|
117
|
+
raise StopAsyncIteration
|
|
118
|
+
|
|
119
|
+
return frame
|
|
120
|
+
|
|
121
|
+
async def aclose(self) -> None:
|
|
122
|
+
if not self._main_task:
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
self._main_task.cancel()
|
|
126
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
127
|
+
await self._main_task
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: livekit-plugins-openai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.dev0
|
|
4
4
|
Summary: Agent Framework plugin for services from OpenAI
|
|
5
5
|
Home-page: https://github.com/livekit/agents
|
|
6
6
|
License: Apache-2.0
|
|
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
|
20
20
|
Requires-Python: >=3.9.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
Requires-Dist: livekit~=0.11
|
|
23
|
-
Requires-Dist: livekit-agents~=0.
|
|
23
|
+
Requires-Dist: livekit-agents~=0.7.dev0
|
|
24
24
|
Requires-Dist: openai>=1.0.0
|
|
25
25
|
Requires-Dist: requests<3,>=2
|
|
26
26
|
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
# Copyright 2023 LiveKit, Inc.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
import os
|
|
16
|
-
from typing import AsyncIterable, Optional
|
|
17
|
-
|
|
18
|
-
import aiohttp
|
|
19
|
-
from livekit.agents import codecs, tts
|
|
20
|
-
|
|
21
|
-
from .models import TTSModels, TTSVoices
|
|
22
|
-
|
|
23
|
-
OPENAI_TTS_SAMPLE_RATE = 24000
|
|
24
|
-
OPENAI_TTS_CHANNELS = 1
|
|
25
|
-
OPENAI_ENPOINT = "https://api.openai.com/v1/audio/speech"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class TTS(tts.TTS):
|
|
29
|
-
def __init__(
|
|
30
|
-
self, model: TTSModels, voice: TTSVoices, api_key: Optional[str] = None
|
|
31
|
-
) -> None:
|
|
32
|
-
super().__init__(
|
|
33
|
-
streaming_supported=False,
|
|
34
|
-
sample_rate=OPENAI_TTS_SAMPLE_RATE,
|
|
35
|
-
num_channels=OPENAI_TTS_CHANNELS,
|
|
36
|
-
)
|
|
37
|
-
api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
38
|
-
if not api_key:
|
|
39
|
-
raise ValueError("OPENAI_API_KEY must be set")
|
|
40
|
-
|
|
41
|
-
# TODO: we want to reuse aiohttp sessions
|
|
42
|
-
# for improved latency but doing so doesn't
|
|
43
|
-
# give us a clean way to close the session.
|
|
44
|
-
# Perhaps we introduce a close method to TTS?
|
|
45
|
-
# We also probalby want to send a warmup HEAD
|
|
46
|
-
# request after we create this
|
|
47
|
-
self._session = aiohttp.ClientSession(
|
|
48
|
-
headers={"Authorization": f"Bearer {api_key}"}
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
self._model = model
|
|
52
|
-
self._voice = voice
|
|
53
|
-
|
|
54
|
-
def synthesize(
|
|
55
|
-
self,
|
|
56
|
-
text: str,
|
|
57
|
-
) -> AsyncIterable[tts.SynthesizedAudio]:
|
|
58
|
-
decoder = codecs.Mp3StreamDecoder()
|
|
59
|
-
|
|
60
|
-
async def generator():
|
|
61
|
-
async with self._session.post(
|
|
62
|
-
OPENAI_ENPOINT,
|
|
63
|
-
json={
|
|
64
|
-
"input": text,
|
|
65
|
-
"model": self._model,
|
|
66
|
-
"voice": self._voice,
|
|
67
|
-
"response_format": "mp3",
|
|
68
|
-
},
|
|
69
|
-
) as resp:
|
|
70
|
-
async for data in resp.content.iter_chunked(4096):
|
|
71
|
-
frames = decoder.decode_chunk(data)
|
|
72
|
-
for frame in frames:
|
|
73
|
-
yield tts.SynthesizedAudio(text=text, data=frame)
|
|
74
|
-
|
|
75
|
-
return generator()
|
|
File without changes
|
|
File without changes
|
{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/log.py
RENAMED
|
File without changes
|
{livekit_plugins_openai-0.4.dev1 → livekit_plugins_openai-0.5.dev0}/livekit/plugins/openai/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|