livekit-plugins-fal 1.0.0.dev4__tar.gz → 1.0.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-fal might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-fal
3
- Version: 1.0.0.dev4
3
+ Version: 1.0.0rc1
4
4
  Summary: fal plugin template for LiveKit Agents
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -19,7 +19,7 @@ Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
21
  Requires-Dist: fal-client
22
- Requires-Dist: livekit-agents>=1.0.0.dev4
22
+ Requires-Dist: livekit-agents>=1.0.0.rc1
23
23
  Description-Content-Type: text/markdown
24
24
 
25
25
  # LiveKit Plugins fal
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+
6
+ import fal_client
7
+
8
+ from livekit import rtc
9
+ from livekit.agents import APIConnectionError, APIConnectOptions, stt
10
+ from livekit.agents.stt import SpeechEventType, STTCapabilities
11
+ from livekit.agents.types import (
12
+ NOT_GIVEN,
13
+ NotGivenOr,
14
+ )
15
+ from livekit.agents.utils import AudioBuffer, is_given
16
+
17
+
18
+ @dataclass
19
+ class _STTOptions:
20
+ language: str = "en"
21
+ task: str = "transcribe"
22
+ chunk_level: str = "segment"
23
+ version: str = "3"
24
+
25
+
26
+ class WizperSTT(stt.STT):
27
+ def __init__(
28
+ self,
29
+ *,
30
+ language: NotGivenOr[str] = NOT_GIVEN,
31
+ api_key: NotGivenOr[str] = NOT_GIVEN,
32
+ ):
33
+ super().__init__(capabilities=STTCapabilities(streaming=False, interim_results=True))
34
+ self._api_key = api_key if is_given(api_key) else os.getenv("FAL_KEY")
35
+ if not self._api_key:
36
+ raise ValueError("fal AI API key is required. It should be set with env FAL_KEY")
37
+ self._opts = _STTOptions(language=language)
38
+ self._fal_client = fal_client.AsyncClient(key=self._api_key)
39
+
40
+ def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> None:
41
+ if is_given(language):
42
+ self._opts.language = language
43
+
44
+ async def _recognize_impl(
45
+ self,
46
+ buffer: AudioBuffer,
47
+ *,
48
+ language: NotGivenOr[str] = NOT_GIVEN,
49
+ conn_options: APIConnectOptions,
50
+ ) -> stt.SpeechEvent:
51
+ try:
52
+ if is_given(language):
53
+ self._opts.language = language
54
+ data_uri = fal_client.encode(
55
+ rtc.combine_audio_frames(buffer).to_wav_bytes(), "audio/x-wav"
56
+ )
57
+ response = await self._fal_client.run(
58
+ "fal-ai/wizper",
59
+ arguments={
60
+ "audio_url": data_uri,
61
+ "task": self._opts.task,
62
+ "language": self._opts.language,
63
+ "chunk_level": self._opts.chunk_level,
64
+ "version": self._opts.version,
65
+ },
66
+ timeout=conn_options.timeout,
67
+ )
68
+ text = response.get("text", "")
69
+ return self._transcription_to_speech_event(text=text)
70
+ except fal_client.client.FalClientError as e:
71
+ raise APIConnectionError() from e
72
+
73
+ def _transcription_to_speech_event(self, text: str) -> stt.SpeechEvent:
74
+ return stt.SpeechEvent(
75
+ type=SpeechEventType.FINAL_TRANSCRIPT,
76
+ alternatives=[stt.SpeechData(text=text, language=self._opts.language)],
77
+ )
78
+
79
+ async def aclose(self) -> None:
80
+ await self._fal_client._client.aclose()
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.0.dev4"
15
+ __version__ = '1.0.0.rc1'
@@ -22,7 +22,7 @@ classifiers = [
22
22
  "Programming Language :: Python :: 3.10",
23
23
  "Programming Language :: Python :: 3 :: Only",
24
24
  ]
25
- dependencies = ["livekit-agents>=1.0.0.dev4", "fal_client"]
25
+ dependencies = ["livekit-agents>=1.0.0.rc1", "fal_client"]
26
26
 
27
27
  [project.urls]
28
28
  Documentation = "https://docs.livekit.io"
@@ -1,100 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import dataclasses
4
- import os
5
- from dataclasses import dataclass
6
-
7
- import fal_client
8
-
9
- from livekit import rtc
10
- from livekit.agents import APIConnectionError, APIConnectOptions, stt
11
- from livekit.agents.stt import SpeechEventType, STTCapabilities
12
- from livekit.agents.utils import AudioBuffer
13
-
14
-
15
- @dataclass
16
- class _STTOptions:
17
- language: str
18
- task: str
19
- chunk_level: str
20
- version: str
21
-
22
-
23
- class WizperSTT(stt.STT):
24
- def __init__(
25
- self,
26
- *,
27
- language: str | None = "en",
28
- task: str | None = "transcribe",
29
- chunk_level: str | None = "segment",
30
- version: str | None = "3",
31
- ):
32
- super().__init__(capabilities=STTCapabilities(streaming=False, interim_results=True))
33
- self._api_key = os.getenv("FAL_KEY")
34
- self._opts = _STTOptions(
35
- language=language or "en",
36
- task=task or "transcribe",
37
- chunk_level=chunk_level or "segment",
38
- version=version or "3",
39
- )
40
- self._fal_client = fal_client.AsyncClient()
41
-
42
- if not self._api_key:
43
- raise ValueError("fal AI API key is required. It should be set with env FAL_KEY")
44
-
45
- def update_options(self, *, language: str | None = None) -> None:
46
- self._opts.language = language or self._opts.language
47
-
48
- def _sanitize_options(
49
- self,
50
- *,
51
- language: str | None = None,
52
- task: str | None = None,
53
- chunk_level: str | None = None,
54
- version: str | None = None,
55
- ) -> _STTOptions:
56
- config = dataclasses.replace(self._opts)
57
- config.language = language or config.language
58
- config.task = task or config.task
59
- config.chunk_level = chunk_level or config.chunk_level
60
- config.version = version or config.version
61
- return config
62
-
63
- async def _recognize_impl(
64
- self,
65
- buffer: AudioBuffer,
66
- *,
67
- language: str | None,
68
- conn_options: APIConnectOptions,
69
- ) -> stt.SpeechEvent:
70
- try:
71
- config = self._sanitize_options(language=language)
72
- data_uri = fal_client.encode(
73
- rtc.combine_audio_frames(buffer).to_wav_bytes(), "audio/x-wav"
74
- )
75
- response = await self._fal_client.run(
76
- "fal-ai/wizper",
77
- arguments={
78
- "audio_url": data_uri,
79
- "task": config.task,
80
- "language": config.language,
81
- "chunk_level": config.chunk_level,
82
- "version": config.version,
83
- },
84
- timeout=conn_options.timeout,
85
- )
86
- text = response.get("text", "")
87
- return self._transcription_to_speech_event(text=text)
88
- except fal_client.client.FalClientError as e:
89
- raise APIConnectionError() from e
90
-
91
- def _transcription_to_speech_event(
92
- self, event_type=SpeechEventType.FINAL_TRANSCRIPT, text=None
93
- ) -> stt.SpeechEvent:
94
- return stt.SpeechEvent(
95
- type=event_type,
96
- alternatives=[stt.SpeechData(text=text, language=self._opts.language)],
97
- )
98
-
99
- async def aclose(self) -> None:
100
- await self._fal_client._client.aclose()