livekit-plugins-clova 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-clova might be problematic. Click here for more details.

@@ -0,0 +1,30 @@
1
+ from .stt import STT
2
+ from .version import __version__
3
+
4
+ __all__ = [
5
+ "STT",
6
+ "__version__",
7
+ ]
8
+
9
+
10
+ from livekit.agents import Plugin
11
+
12
+
13
+ class ClovaSTTPlugin(Plugin):
14
+ def __init__(self):
15
+ super().__init__(__name__, __version__, __package__)
16
+
17
+ def download_files(self):
18
+ pass
19
+
20
+
21
+ Plugin.register_plugin(ClovaSTTPlugin())
22
+
23
+ # Cleanup docs of unexported modules
24
+ _module = dir()
25
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
26
+
27
+ __pdoc__ = {}
28
+
29
+ for n in NOT_IN_ALL:
30
+ __pdoc__[n] = False
@@ -0,0 +1,13 @@
1
+ import io
2
+
3
+ from pydub import AudioSegment
4
+
5
+
6
+ def resample_audio(audio_bytes, original_sample_rate, target_sample_rate):
7
+ resampled_audio = AudioSegment.from_raw(
8
+ io.BytesIO(audio_bytes),
9
+ sample_width=2,
10
+ frame_rate=original_sample_rate,
11
+ channels=1,
12
+ ).set_frame_rate(target_sample_rate)
13
+ return resampled_audio.raw_data
@@ -0,0 +1,2 @@
1
+ CLOVA_INPUT_SAMPLE_RATE = 16000
2
+ LIVEKIT_INPUT_SAMPLE_RATE = 48000
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("livekit.plugins.clova")
@@ -0,0 +1,15 @@
1
+ from typing import Literal
2
+
3
+ ClovaSttLanguages = Literal["ko-KR", "en-US", "enko", "ja", "zh-cn", "zh-tw"]
4
+
5
+ ClovaSpeechAPIType = Literal["recognizer/object-storage", "recognizer/url", "recognizer/upload"]
6
+
7
+ clova_languages_mapping = {
8
+ "en": "en-US",
9
+ "ko-KR": "ko-KR",
10
+ "en-US": "en-US",
11
+ "enko": "enko",
12
+ "ja": "ja",
13
+ "zh-cn": "zh-cn",
14
+ "zh-tw": "zh-tw",
15
+ }
@@ -0,0 +1,161 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import io
19
+ import json
20
+ import os
21
+ import time
22
+ import wave
23
+
24
+ import aiohttp
25
+
26
+ from livekit.agents import (
27
+ APIConnectOptions,
28
+ APIStatusError,
29
+ APITimeoutError,
30
+ stt,
31
+ utils,
32
+ )
33
+ from livekit.agents.stt import SpeechEventType, STTCapabilities
34
+ from livekit.agents.types import (
35
+ DEFAULT_API_CONNECT_OPTIONS,
36
+ NOT_GIVEN,
37
+ NotGivenOr,
38
+ )
39
+ from livekit.agents.utils import AudioBuffer, is_given, merge_frames
40
+ from livekit.plugins.clova.constants import CLOVA_INPUT_SAMPLE_RATE
41
+
42
+ from .common import resample_audio
43
+ from .log import logger
44
+ from .models import ClovaSpeechAPIType, ClovaSttLanguages, clova_languages_mapping
45
+
46
+
47
+ class STT(stt.STT):
48
+ def __init__(
49
+ self,
50
+ *,
51
+ language: ClovaSttLanguages | str = "en-US",
52
+ secret: NotGivenOr[str] = NOT_GIVEN,
53
+ invoke_url: NotGivenOr[str] = NOT_GIVEN,
54
+ http_session: aiohttp.ClientSession | None = None,
55
+ threshold: float = 0.5,
56
+ ):
57
+ """
58
+ Create a new instance of Clova STT.
59
+
60
+ ``secret`` and ``invoke_url`` must be set, either using arguments or by setting the
61
+ ``CLOVA_STT_SECRET_KEY`` and ``CLOVA_STT_INVOKE_URL`` environmental variables, respectively.
62
+ """
63
+
64
+ super().__init__(capabilities=STTCapabilities(streaming=False, interim_results=True))
65
+ self._secret = secret if is_given(secret) else os.environ.get("CLOVA_STT_SECRET_KEY")
66
+ self._invoke_url = (
67
+ invoke_url if is_given(invoke_url) else os.environ.get("CLOVA_STT_INVOKE_URL")
68
+ )
69
+ self._language = clova_languages_mapping.get(language, language)
70
+ self._session = http_session
71
+ if self._secret is None:
72
+ raise ValueError(
73
+ "Clova STT secret key is required. It should be set with env CLOVA_STT_SECRET_KEY"
74
+ )
75
+ self.threshold = threshold
76
+
77
+ def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> None:
78
+ if is_given(language):
79
+ self._language = clova_languages_mapping.get(language, language)
80
+
81
+ def _ensure_session(self) -> aiohttp.ClientSession:
82
+ if not self._session:
83
+ self._session = utils.http_context.http_session()
84
+ return self._session
85
+
86
+ def url_builder(self, process_method: ClovaSpeechAPIType = "recognizer/upload") -> str:
87
+ return f"{self._invoke_url}/{process_method}"
88
+
89
+ async def _recognize_impl(
90
+ self,
91
+ buffer: AudioBuffer,
92
+ *,
93
+ language: NotGivenOr[ClovaSttLanguages | str] = NOT_GIVEN,
94
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
95
+ ) -> stt.SpeechEvent:
96
+ try:
97
+ url = self.url_builder()
98
+ if is_given(language):
99
+ self._language = clova_languages_mapping.get(language, language)
100
+ payload = json.dumps({"language": self._language, "completion": "sync"})
101
+
102
+ buffer = merge_frames(buffer)
103
+ buffer_bytes = resample_audio(
104
+ buffer.data.tobytes(), buffer.sample_rate, CLOVA_INPUT_SAMPLE_RATE
105
+ )
106
+
107
+ io_buffer = io.BytesIO()
108
+ with wave.open(io_buffer, "wb") as wav:
109
+ wav.setnchannels(1)
110
+ wav.setsampwidth(2) # 16-bit
111
+ wav.setframerate(CLOVA_INPUT_SAMPLE_RATE)
112
+ wav.writeframes(buffer_bytes)
113
+ io_buffer.seek(0)
114
+
115
+ headers = {"X-CLOVASPEECH-API-KEY": self._secret}
116
+ form_data = aiohttp.FormData()
117
+ form_data.add_field("params", payload)
118
+ form_data.add_field("media", io_buffer, filename="audio.wav", content_type="audio/wav")
119
+ start = time.time()
120
+ async with self._ensure_session().post(
121
+ url,
122
+ data=form_data,
123
+ headers=headers,
124
+ timeout=aiohttp.ClientTimeout(
125
+ total=30,
126
+ sock_connect=conn_options.timeout,
127
+ ),
128
+ ) as response:
129
+ response_data = await response.json()
130
+ end = time.time()
131
+ text = response_data.get("text")
132
+ confidence = response_data.get("confidence")
133
+ logger.info(f"{text} | {confidence} | total_seconds: {end - start}")
134
+ if not text or "error" in response_data:
135
+ raise ValueError(f"Unexpected response: {response_data}")
136
+ if confidence < self.threshold:
137
+ raise ValueError(
138
+ f"Confidence: {confidence} is bellow threshold {self.threshold}. Skipping."
139
+ )
140
+ logger.info(f"final event: {response_data}")
141
+ return self._transcription_to_speech_event(text=text)
142
+
143
+ except asyncio.TimeoutError as e:
144
+ raise APITimeoutError() from e
145
+ except aiohttp.ClientResponseError as e:
146
+ raise APIStatusError(
147
+ message=e.message,
148
+ status_code=e.status,
149
+ request_id=None,
150
+ body=None,
151
+ ) from e
152
+
153
+ def _transcription_to_speech_event(
154
+ self,
155
+ text: str,
156
+ event_type: SpeechEventType = stt.SpeechEventType.INTERIM_TRANSCRIPT,
157
+ ) -> stt.SpeechEvent:
158
+ return stt.SpeechEvent(
159
+ type=event_type,
160
+ alternatives=[stt.SpeechData(text=text, language=self._language)],
161
+ )
@@ -0,0 +1,15 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __version__ = "1.0.0"
@@ -0,0 +1,37 @@
1
+ Metadata-Version: 2.4
2
+ Name: livekit-plugins-clova
3
+ Version: 1.0.0
4
+ Summary: LiveKit Agents Plugin for LINE Clova STT
5
+ Project-URL: Documentation, https://docs.livekit.io
6
+ Project-URL: Website, https://livekit.io/
7
+ Project-URL: Source, https://github.com/livekit/agents
8
+ Author-email: LiveKit <hello@livekit.io>
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,livekit,realtime,video,webrtc
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Topic :: Multimedia :: Sound/Audio
18
+ Classifier: Topic :: Multimedia :: Video
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.9.0
21
+ Requires-Dist: livekit-agents>=1.0.0
22
+ Requires-Dist: pydub~=0.25.1
23
+ Description-Content-Type: text/markdown
24
+
25
+ # LiveKit Plugins Clova
26
+
27
+ Agent Framework plugin for speech-to-text with [Clova](https://api.ncloud-docs.com/docs/)'s API. Currently supports speech-to-text.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install livekit-plugins-clova
33
+ ```
34
+
35
+ ## Pre-requisites
36
+
37
+ You need invoke url and secret key from Naver cloud platform -> Clova Speech and set as environment variables: `CLOVA_STT_INVOKE_URL` & `CLOVA_STT_SECRET_KEY`
@@ -0,0 +1,10 @@
1
+ livekit/plugins/clova/__init__.py,sha256=WelhQwAYhVLN1OYJjlXDe1k1rSj6lJQLIYSgdiA4Xp4,508
2
+ livekit/plugins/clova/common.py,sha256=67vNmTLX7le5nTpM4N8MFNmQuIYCXy7NKf9-OkP1JmI,359
3
+ livekit/plugins/clova/constants.py,sha256=b6X_va-KsJWDsDdjo-nka7yae_9fVmTnTb_sQm8gQao,66
4
+ livekit/plugins/clova/log.py,sha256=odnkyQ2umM1S3wZiHAaOrUowHZl-de1y57MXL9CD1uI,68
5
+ livekit/plugins/clova/models.py,sha256=R71m_BcWxmdqSPIrfc49a0yuJMlcrzT8HuQZTWmCpQk,378
6
+ livekit/plugins/clova/stt.py,sha256=cWU9t507GB6v1X1iYOO3kxtzvPM-A5YZQdRFtFRH5kI,5991
7
+ livekit/plugins/clova/version.py,sha256=nW89L_U9N4ukT3wAO3BeTqOaa87zLUOsEFz8TkiKIP8,600
8
+ livekit_plugins_clova-1.0.0.dist-info/METADATA,sha256=jt-CZ8AUcM02iMJrnzGo_6QI4P2zDKRFQARCbeqiOCs,1385
9
+ livekit_plugins_clova-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ livekit_plugins_clova-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any