livekit-plugins-clova 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-clova might be problematic. Click here for more details.
- livekit/plugins/clova/__init__.py +30 -0
- livekit/plugins/clova/common.py +13 -0
- livekit/plugins/clova/constants.py +2 -0
- livekit/plugins/clova/log.py +3 -0
- livekit/plugins/clova/models.py +15 -0
- livekit/plugins/clova/stt.py +161 -0
- livekit/plugins/clova/version.py +15 -0
- livekit_plugins_clova-1.0.0.dist-info/METADATA +37 -0
- livekit_plugins_clova-1.0.0.dist-info/RECORD +10 -0
- livekit_plugins_clova-1.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .stt import STT
|
|
2
|
+
from .version import __version__
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"STT",
|
|
6
|
+
"__version__",
|
|
7
|
+
]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from livekit.agents import Plugin
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ClovaSTTPlugin(Plugin):
|
|
14
|
+
def __init__(self):
|
|
15
|
+
super().__init__(__name__, __version__, __package__)
|
|
16
|
+
|
|
17
|
+
def download_files(self):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Plugin.register_plugin(ClovaSTTPlugin())
|
|
22
|
+
|
|
23
|
+
# Cleanup docs of unexported modules
|
|
24
|
+
_module = dir()
|
|
25
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
|
26
|
+
|
|
27
|
+
__pdoc__ = {}
|
|
28
|
+
|
|
29
|
+
for n in NOT_IN_ALL:
|
|
30
|
+
__pdoc__[n] = False
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import io
|
|
2
|
+
|
|
3
|
+
from pydub import AudioSegment
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def resample_audio(audio_bytes, original_sample_rate, target_sample_rate):
|
|
7
|
+
resampled_audio = AudioSegment.from_raw(
|
|
8
|
+
io.BytesIO(audio_bytes),
|
|
9
|
+
sample_width=2,
|
|
10
|
+
frame_rate=original_sample_rate,
|
|
11
|
+
channels=1,
|
|
12
|
+
).set_frame_rate(target_sample_rate)
|
|
13
|
+
return resampled_audio.raw_data
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
ClovaSttLanguages = Literal["ko-KR", "en-US", "enko", "ja", "zh-cn", "zh-tw"]
|
|
4
|
+
|
|
5
|
+
ClovaSpeechAPIType = Literal["recognizer/object-storage", "recognizer/url", "recognizer/upload"]
|
|
6
|
+
|
|
7
|
+
clova_languages_mapping = {
|
|
8
|
+
"en": "en-US",
|
|
9
|
+
"ko-KR": "ko-KR",
|
|
10
|
+
"en-US": "en-US",
|
|
11
|
+
"enko": "enko",
|
|
12
|
+
"ja": "ja",
|
|
13
|
+
"zh-cn": "zh-cn",
|
|
14
|
+
"zh-tw": "zh-tw",
|
|
15
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import io
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import time
|
|
22
|
+
import wave
|
|
23
|
+
|
|
24
|
+
import aiohttp
|
|
25
|
+
|
|
26
|
+
from livekit.agents import (
|
|
27
|
+
APIConnectOptions,
|
|
28
|
+
APIStatusError,
|
|
29
|
+
APITimeoutError,
|
|
30
|
+
stt,
|
|
31
|
+
utils,
|
|
32
|
+
)
|
|
33
|
+
from livekit.agents.stt import SpeechEventType, STTCapabilities
|
|
34
|
+
from livekit.agents.types import (
|
|
35
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
|
36
|
+
NOT_GIVEN,
|
|
37
|
+
NotGivenOr,
|
|
38
|
+
)
|
|
39
|
+
from livekit.agents.utils import AudioBuffer, is_given, merge_frames
|
|
40
|
+
from livekit.plugins.clova.constants import CLOVA_INPUT_SAMPLE_RATE
|
|
41
|
+
|
|
42
|
+
from .common import resample_audio
|
|
43
|
+
from .log import logger
|
|
44
|
+
from .models import ClovaSpeechAPIType, ClovaSttLanguages, clova_languages_mapping
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class STT(stt.STT):
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
*,
|
|
51
|
+
language: ClovaSttLanguages | str = "en-US",
|
|
52
|
+
secret: NotGivenOr[str] = NOT_GIVEN,
|
|
53
|
+
invoke_url: NotGivenOr[str] = NOT_GIVEN,
|
|
54
|
+
http_session: aiohttp.ClientSession | None = None,
|
|
55
|
+
threshold: float = 0.5,
|
|
56
|
+
):
|
|
57
|
+
"""
|
|
58
|
+
Create a new instance of Clova STT.
|
|
59
|
+
|
|
60
|
+
``secret`` and ``invoke_url`` must be set, either using arguments or by setting the
|
|
61
|
+
``CLOVA_STT_SECRET_KEY`` and ``CLOVA_STT_INVOKE_URL`` environmental variables, respectively.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
super().__init__(capabilities=STTCapabilities(streaming=False, interim_results=True))
|
|
65
|
+
self._secret = secret if is_given(secret) else os.environ.get("CLOVA_STT_SECRET_KEY")
|
|
66
|
+
self._invoke_url = (
|
|
67
|
+
invoke_url if is_given(invoke_url) else os.environ.get("CLOVA_STT_INVOKE_URL")
|
|
68
|
+
)
|
|
69
|
+
self._language = clova_languages_mapping.get(language, language)
|
|
70
|
+
self._session = http_session
|
|
71
|
+
if self._secret is None:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"Clova STT secret key is required. It should be set with env CLOVA_STT_SECRET_KEY"
|
|
74
|
+
)
|
|
75
|
+
self.threshold = threshold
|
|
76
|
+
|
|
77
|
+
def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> None:
|
|
78
|
+
if is_given(language):
|
|
79
|
+
self._language = clova_languages_mapping.get(language, language)
|
|
80
|
+
|
|
81
|
+
def _ensure_session(self) -> aiohttp.ClientSession:
|
|
82
|
+
if not self._session:
|
|
83
|
+
self._session = utils.http_context.http_session()
|
|
84
|
+
return self._session
|
|
85
|
+
|
|
86
|
+
def url_builder(self, process_method: ClovaSpeechAPIType = "recognizer/upload") -> str:
|
|
87
|
+
return f"{self._invoke_url}/{process_method}"
|
|
88
|
+
|
|
89
|
+
async def _recognize_impl(
|
|
90
|
+
self,
|
|
91
|
+
buffer: AudioBuffer,
|
|
92
|
+
*,
|
|
93
|
+
language: NotGivenOr[ClovaSttLanguages | str] = NOT_GIVEN,
|
|
94
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
|
95
|
+
) -> stt.SpeechEvent:
|
|
96
|
+
try:
|
|
97
|
+
url = self.url_builder()
|
|
98
|
+
if is_given(language):
|
|
99
|
+
self._language = clova_languages_mapping.get(language, language)
|
|
100
|
+
payload = json.dumps({"language": self._language, "completion": "sync"})
|
|
101
|
+
|
|
102
|
+
buffer = merge_frames(buffer)
|
|
103
|
+
buffer_bytes = resample_audio(
|
|
104
|
+
buffer.data.tobytes(), buffer.sample_rate, CLOVA_INPUT_SAMPLE_RATE
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
io_buffer = io.BytesIO()
|
|
108
|
+
with wave.open(io_buffer, "wb") as wav:
|
|
109
|
+
wav.setnchannels(1)
|
|
110
|
+
wav.setsampwidth(2) # 16-bit
|
|
111
|
+
wav.setframerate(CLOVA_INPUT_SAMPLE_RATE)
|
|
112
|
+
wav.writeframes(buffer_bytes)
|
|
113
|
+
io_buffer.seek(0)
|
|
114
|
+
|
|
115
|
+
headers = {"X-CLOVASPEECH-API-KEY": self._secret}
|
|
116
|
+
form_data = aiohttp.FormData()
|
|
117
|
+
form_data.add_field("params", payload)
|
|
118
|
+
form_data.add_field("media", io_buffer, filename="audio.wav", content_type="audio/wav")
|
|
119
|
+
start = time.time()
|
|
120
|
+
async with self._ensure_session().post(
|
|
121
|
+
url,
|
|
122
|
+
data=form_data,
|
|
123
|
+
headers=headers,
|
|
124
|
+
timeout=aiohttp.ClientTimeout(
|
|
125
|
+
total=30,
|
|
126
|
+
sock_connect=conn_options.timeout,
|
|
127
|
+
),
|
|
128
|
+
) as response:
|
|
129
|
+
response_data = await response.json()
|
|
130
|
+
end = time.time()
|
|
131
|
+
text = response_data.get("text")
|
|
132
|
+
confidence = response_data.get("confidence")
|
|
133
|
+
logger.info(f"{text} | {confidence} | total_seconds: {end - start}")
|
|
134
|
+
if not text or "error" in response_data:
|
|
135
|
+
raise ValueError(f"Unexpected response: {response_data}")
|
|
136
|
+
if confidence < self.threshold:
|
|
137
|
+
raise ValueError(
|
|
138
|
+
f"Confidence: {confidence} is bellow threshold {self.threshold}. Skipping."
|
|
139
|
+
)
|
|
140
|
+
logger.info(f"final event: {response_data}")
|
|
141
|
+
return self._transcription_to_speech_event(text=text)
|
|
142
|
+
|
|
143
|
+
except asyncio.TimeoutError as e:
|
|
144
|
+
raise APITimeoutError() from e
|
|
145
|
+
except aiohttp.ClientResponseError as e:
|
|
146
|
+
raise APIStatusError(
|
|
147
|
+
message=e.message,
|
|
148
|
+
status_code=e.status,
|
|
149
|
+
request_id=None,
|
|
150
|
+
body=None,
|
|
151
|
+
) from e
|
|
152
|
+
|
|
153
|
+
def _transcription_to_speech_event(
|
|
154
|
+
self,
|
|
155
|
+
text: str,
|
|
156
|
+
event_type: SpeechEventType = stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
157
|
+
) -> stt.SpeechEvent:
|
|
158
|
+
return stt.SpeechEvent(
|
|
159
|
+
type=event_type,
|
|
160
|
+
alternatives=[stt.SpeechData(text=text, language=self._language)],
|
|
161
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: livekit-plugins-clova
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: LiveKit Agents Plugin for LINE Clova STT
|
|
5
|
+
Project-URL: Documentation, https://docs.livekit.io
|
|
6
|
+
Project-URL: Website, https://livekit.io/
|
|
7
|
+
Project-URL: Source, https://github.com/livekit/agents
|
|
8
|
+
Author-email: LiveKit <hello@livekit.io>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Keywords: audio,livekit,realtime,video,webrtc
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.9.0
|
|
21
|
+
Requires-Dist: livekit-agents>=1.0.0
|
|
22
|
+
Requires-Dist: pydub~=0.25.1
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# LiveKit Plugins Clova
|
|
26
|
+
|
|
27
|
+
Agent Framework plugin for speech-to-text with [Clova](https://api.ncloud-docs.com/docs/)'s API. Currently supports speech-to-text.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install livekit-plugins-clova
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Pre-requisites
|
|
36
|
+
|
|
37
|
+
You need invoke url and secret key from Naver cloud platform -> Clova Speech and set as environment variables: `CLOVA_STT_INVOKE_URL` & `CLOVA_STT_SECRET_KEY`
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
livekit/plugins/clova/__init__.py,sha256=WelhQwAYhVLN1OYJjlXDe1k1rSj6lJQLIYSgdiA4Xp4,508
|
|
2
|
+
livekit/plugins/clova/common.py,sha256=67vNmTLX7le5nTpM4N8MFNmQuIYCXy7NKf9-OkP1JmI,359
|
|
3
|
+
livekit/plugins/clova/constants.py,sha256=b6X_va-KsJWDsDdjo-nka7yae_9fVmTnTb_sQm8gQao,66
|
|
4
|
+
livekit/plugins/clova/log.py,sha256=odnkyQ2umM1S3wZiHAaOrUowHZl-de1y57MXL9CD1uI,68
|
|
5
|
+
livekit/plugins/clova/models.py,sha256=R71m_BcWxmdqSPIrfc49a0yuJMlcrzT8HuQZTWmCpQk,378
|
|
6
|
+
livekit/plugins/clova/stt.py,sha256=cWU9t507GB6v1X1iYOO3kxtzvPM-A5YZQdRFtFRH5kI,5991
|
|
7
|
+
livekit/plugins/clova/version.py,sha256=nW89L_U9N4ukT3wAO3BeTqOaa87zLUOsEFz8TkiKIP8,600
|
|
8
|
+
livekit_plugins_clova-1.0.0.dist-info/METADATA,sha256=jt-CZ8AUcM02iMJrnzGo_6QI4P2zDKRFQARCbeqiOCs,1385
|
|
9
|
+
livekit_plugins_clova-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
livekit_plugins_clova-1.0.0.dist-info/RECORD,,
|