livekit-plugins-fireworksai 1.2.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ **/.vscode
2
+ **/.DS_Store
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # trunk
150
+ .trunk/
151
+
152
+ # Pyre type checker
153
+ .pyre/
154
+
155
+ # pytype static type analyzer
156
+ .pytype/
157
+
158
+ # Cython debug symbols
159
+ cython_debug/
160
+
161
+ # PyCharm
162
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
165
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166
+ .idea/
167
+
168
+ node_modules
169
+
170
+ credentials.json
171
+ pyrightconfig.json
172
+ docs/
173
+
174
+ # Database files
175
+ *.db
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: livekit-plugins-fireworksai
3
+ Version: 1.2.12
4
+ Summary: LiveKit Agents Plugin for Fireworks AI
5
+ Project-URL: Documentation, https://docs.livekit.io
6
+ Project-URL: Website, https://livekit.io/
7
+ Project-URL: Source, https://github.com/livekit/agents
8
+ Author-email: LiveKit <hello@livekit.io>
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,livekit,realtime,video,webrtc
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Topic :: Multimedia :: Sound/Audio
18
+ Classifier: Topic :: Multimedia :: Video
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.9.0
21
+ Requires-Dist: livekit-agents>=1.2.12
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Fireworks AI plugin for LiveKit Agents
25
+
26
+ Support for speech-to-text api with [Fireworks AI](https://fireworks.ai/).
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install livekit-plugins-fireworksai
32
+ ```
33
+
34
+ ## Pre-requisites
35
+
36
+ You'll need an API key from Fireworks AI. It can be set as an environment variable: `FIREWORKS_API_KEY`
@@ -0,0 +1,13 @@
1
+ # Fireworks AI plugin for LiveKit Agents
2
+
3
+ Support for speech-to-text api with [Fireworks AI](https://fireworks.ai/).
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install livekit-plugins-fireworksai
9
+ ```
10
+
11
+ ## Pre-requisites
12
+
13
+ You'll need an API key from Fireworks AI. It can be set as an environment variable: `FIREWORKS_API_KEY`
@@ -0,0 +1,45 @@
1
+ # Copyright 2025 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Fireworks AI plugin for LiveKit Agents"""
16
+
17
+ from .log import logger
18
+ from .stt import STT, SpeechStream
19
+ from .version import __version__
20
+
21
+ __all__ = [
22
+ "STT",
23
+ "SpeechStream",
24
+ "logger",
25
+ "__version__",
26
+ ]
27
+
28
+ from livekit.agents import Plugin
29
+
30
+
31
+ class FireworksAIPlugin(Plugin):
32
+ def __init__(self) -> None:
33
+ super().__init__(__name__, __version__, __package__, logger)
34
+
35
+
36
+ Plugin.register_plugin(FireworksAIPlugin())
37
+
38
+ # Cleanup docs of unexported modules
39
+ _module = dir()
40
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
41
+
42
+ __pdoc__ = {}
43
+
44
+ for n in NOT_IN_ALL:
45
+ __pdoc__[n] = False
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("livekit.plugins.fireworksai")
@@ -0,0 +1,507 @@
1
+ # Copyright 2025 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import dataclasses
19
+ import json
20
+ import os
21
+ import weakref
22
+ from dataclasses import dataclass
23
+ from typing import Callable
24
+ from urllib.parse import urlencode
25
+
26
+ import aiohttp
27
+
28
+ from livekit.agents import (
29
+ DEFAULT_API_CONNECT_OPTIONS,
30
+ APIConnectOptions,
31
+ APIStatusError,
32
+ stt,
33
+ utils,
34
+ )
35
+ from livekit.agents.types import NOT_GIVEN, NotGivenOr
36
+ from livekit.agents.utils import AudioBuffer, is_given
37
+
38
+ from .log import logger
39
+
40
+ _STREAMING_PATH = "/audio/transcriptions/streaming"
41
+
42
+
43
+ class _PeriodicCollector:
44
+ def __init__(self, duration: float, callback: Callable[[float], None]):
45
+ self._duration = duration
46
+ self._callback = callback
47
+ self._collected_value = 0.0
48
+ self._task: asyncio.Task | None = None
49
+ self._lock = asyncio.Lock()
50
+
51
+ async def push(self, value: float) -> None:
52
+ async with self._lock:
53
+ self._collected_value += value
54
+ if not self._task:
55
+ self._task = asyncio.create_task(self._run())
56
+
57
+ async def flush(self) -> None:
58
+ async with self._lock:
59
+ if self._task:
60
+ self._task.cancel()
61
+ try:
62
+ await self._task
63
+ except asyncio.CancelledError:
64
+ pass
65
+ self._task = None
66
+
67
+ if self._collected_value > 0:
68
+ self._callback(self._collected_value)
69
+ self._collected_value = 0.0
70
+
71
+ async def _run(self) -> None:
72
+ await asyncio.sleep(self._duration)
73
+ async with self._lock:
74
+ self._callback(self._collected_value)
75
+ self._collected_value = 0.0
76
+ self._task = None
77
+
78
+
79
+ @dataclass
80
+ class STTOptions:
81
+ model: NotGivenOr[str]
82
+ sample_rate: int
83
+ language: NotGivenOr[str] = NOT_GIVEN
84
+ prompt: NotGivenOr[str] = NOT_GIVEN
85
+ temperature: NotGivenOr[float] = NOT_GIVEN
86
+ skip_vad: NotGivenOr[bool] = NOT_GIVEN
87
+ vad_kwargs: NotGivenOr[dict] = NOT_GIVEN
88
+ text_timeout_seconds: float = 1.0
89
+ response_format: str = "verbose_json"
90
+ timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN
91
+ base_url: NotGivenOr[str] = NOT_GIVEN
92
+
93
+
94
+ class STT(stt.STT):
95
+ def __init__(
96
+ self,
97
+ *,
98
+ model: NotGivenOr[str] = NOT_GIVEN,
99
+ api_key: NotGivenOr[str] = NOT_GIVEN,
100
+ sample_rate: int = 16000,
101
+ language: NotGivenOr[str] = NOT_GIVEN,
102
+ prompt: NotGivenOr[str] = NOT_GIVEN,
103
+ temperature: NotGivenOr[float] = NOT_GIVEN,
104
+ skip_vad: NotGivenOr[bool] = NOT_GIVEN,
105
+ vad_kwargs: NotGivenOr[dict] = NOT_GIVEN,
106
+ text_timeout_seconds: float = 1.0,
107
+ timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN,
108
+ response_format: str = "verbose_json",
109
+ http_session: aiohttp.ClientSession | None = None,
110
+ base_url: str = "wss://audio-streaming.us-virginia-1.direct.fireworks.ai/v1",
111
+ ):
112
+ """
113
+ Create a new instance of Fireworks AI STT.
114
+
115
+ Args:
116
+ model: The Fireworks AI STT model to use. Defaults to NOT_GIVEN (server uses default model).
117
+ language: The target language for transcription. Defaults to NOT_GIVEN (server detects language automatically).
118
+ Full list: https://fireworks.ai/docs/api-reference/audio-streaming-transcriptions#supported-languages
119
+ prompt: The input prompt that the model will use when generating the transcription. Defaults to NOT_GIVEN.
120
+ temperature: Sampling temperature to use when decoding text tokens during transcription. Defaults to NOT_GIVEN.
121
+ skip_vad: Whether to skip server-side VAD. Defaults to NOT_GIVEN.
122
+ vad_kwargs: The optional kwargs to pass to the VAD model.
123
+ Defaults to NOT_GIVEN. Example: Set to {"threshold": 0.15} to adjust the VAD threshold.
124
+ text_timeout_seconds: Duration of silence before marking transcript as final. Defaults to 1.0.
125
+ timestamp_granularities: The timestamp granularities to populate for this streaming transcription.
126
+ Defaults to NOT_GIVEN. Set to "word,segment" to enable timestamp granularities.
127
+ response_format: The format in which to return the response. Default to "verbose_json".
128
+ base_url: The base URL for the Fireworks AI STT.
129
+ Defaults to "wss://audio-streaming.us-virginia-1.direct.fireworks.ai/v1".
130
+ api_key: The Fireworks AI API key. If not provided, it will be read from
131
+ the FIREWORKS_API_KEY environment variable.
132
+ http_session: Optional aiohttp ClientSession to use for requests.
133
+
134
+ Raises:
135
+ ValueError: If no API key is provided, found in environment variables, or if a parameter is invalid.
136
+ """
137
+ super().__init__(
138
+ capabilities=stt.STTCapabilities(streaming=True, interim_results=True),
139
+ )
140
+ if sample_rate != 16000:
141
+ raise ValueError("FireworksAI STT only supports a sample rate of 16000")
142
+
143
+ if not 1.0 <= text_timeout_seconds <= 29.0:
144
+ raise ValueError("text_timeout_seconds must be between 1.0 and 29.0")
145
+
146
+ fireworks_api_key = api_key if is_given(api_key) else os.environ.get("FIREWORKS_API_KEY")
147
+ if fireworks_api_key is None:
148
+ raise ValueError(
149
+ "Fireworks API key is required. "
150
+ "Pass one in via the `api_key` parameter, "
151
+ "or set it as the `FIREWORKS_API_KEY` environment variable"
152
+ )
153
+ self._api_key = fireworks_api_key
154
+ self._opts = STTOptions(
155
+ model=model,
156
+ sample_rate=sample_rate,
157
+ language=language,
158
+ prompt=prompt,
159
+ temperature=temperature,
160
+ skip_vad=skip_vad,
161
+ vad_kwargs=vad_kwargs,
162
+ text_timeout_seconds=text_timeout_seconds,
163
+ response_format=response_format,
164
+ timestamp_granularities=timestamp_granularities,
165
+ base_url=base_url,
166
+ )
167
+ self._session = http_session
168
+ self._streams = weakref.WeakSet[SpeechStream]()
169
+
170
+ @property
171
+ def session(self) -> aiohttp.ClientSession:
172
+ if not self._session:
173
+ self._session = utils.http_context.http_session()
174
+ return self._session
175
+
176
+ async def _recognize_impl(
177
+ self,
178
+ buffer: AudioBuffer,
179
+ *,
180
+ language: NotGivenOr[str] = NOT_GIVEN,
181
+ conn_options: APIConnectOptions,
182
+ ) -> stt.SpeechEvent:
183
+ raise NotImplementedError(
184
+ "FireworksAI STT does not support batch recognition, use stream() instead"
185
+ )
186
+
187
+ def stream(
188
+ self,
189
+ *,
190
+ language: NotGivenOr[str] = NOT_GIVEN,
191
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
192
+ ) -> SpeechStream:
193
+ config = dataclasses.replace(self._opts)
194
+ stream = SpeechStream(
195
+ stt=self,
196
+ opts=config,
197
+ conn_options=conn_options,
198
+ api_key=self._api_key,
199
+ http_session=self.session,
200
+ )
201
+ self._streams.add(stream)
202
+ return stream
203
+
204
+ def update_options(
205
+ self,
206
+ *,
207
+ model: NotGivenOr[str] = NOT_GIVEN,
208
+ language: NotGivenOr[str] = NOT_GIVEN,
209
+ prompt: NotGivenOr[str] = NOT_GIVEN,
210
+ temperature: NotGivenOr[float] = NOT_GIVEN,
211
+ skip_vad: NotGivenOr[bool] = NOT_GIVEN,
212
+ vad_kwargs: NotGivenOr[dict] = NOT_GIVEN,
213
+ text_timeout_seconds: NotGivenOr[float] = NOT_GIVEN,
214
+ timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN,
215
+ ) -> None:
216
+ if is_given(model):
217
+ self._opts.model = model
218
+ if is_given(language):
219
+ self._opts.language = language
220
+ if is_given(prompt):
221
+ self._opts.prompt = prompt
222
+ if is_given(temperature):
223
+ self._opts.temperature = temperature
224
+ if is_given(skip_vad):
225
+ self._opts.skip_vad = skip_vad
226
+ if is_given(vad_kwargs):
227
+ self._opts.vad_kwargs = vad_kwargs
228
+ if is_given(text_timeout_seconds):
229
+ if not 1.0 <= text_timeout_seconds <= 29.0:
230
+ raise ValueError("text_timeout_seconds must be between 1.0 and 29.0")
231
+ self._opts.text_timeout_seconds = text_timeout_seconds
232
+ if is_given(timestamp_granularities):
233
+ self._opts.timestamp_granularities = timestamp_granularities
234
+
235
+ for stream in self._streams:
236
+ stream.update_options(
237
+ model=model,
238
+ language=language,
239
+ prompt=prompt,
240
+ temperature=temperature,
241
+ skip_vad=skip_vad,
242
+ vad_kwargs=vad_kwargs,
243
+ text_timeout_seconds=text_timeout_seconds,
244
+ timestamp_granularities=timestamp_granularities,
245
+ )
246
+
247
+
248
+ class SpeechStream(stt.SpeechStream):
249
+ _CLOSE_MSG: str = json.dumps({"checkpoint_id": "final"})
250
+
251
+ def __init__(
252
+ self,
253
+ *,
254
+ stt: STT,
255
+ opts: STTOptions,
256
+ conn_options: APIConnectOptions,
257
+ api_key: str,
258
+ http_session: aiohttp.ClientSession,
259
+ ) -> None:
260
+ super().__init__(stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate)
261
+
262
+ self._opts = opts
263
+ self._api_key = api_key
264
+ self._session = http_session
265
+ self._transcript_state: dict[str, str] = {}
266
+ self._reconnect_event = asyncio.Event()
267
+ self._speaking = False
268
+ self._final_segments_length: dict[int, int] = {}
269
+ self._last_final_segment_id = -1
270
+ self._audio_duration_collector = _PeriodicCollector(
271
+ callback=self._on_audio_duration_report,
272
+ duration=10.0,
273
+ )
274
+
275
+ def update_options(
276
+ self,
277
+ *,
278
+ model: NotGivenOr[str] = NOT_GIVEN,
279
+ language: NotGivenOr[str] = NOT_GIVEN,
280
+ prompt: NotGivenOr[str] = NOT_GIVEN,
281
+ temperature: NotGivenOr[float] = NOT_GIVEN,
282
+ skip_vad: NotGivenOr[bool] = NOT_GIVEN,
283
+ vad_kwargs: NotGivenOr[dict] = NOT_GIVEN,
284
+ text_timeout_seconds: NotGivenOr[float] = NOT_GIVEN,
285
+ timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN,
286
+ ) -> None:
287
+ if is_given(model):
288
+ self._opts.model = model
289
+ if is_given(language):
290
+ self._opts.language = language
291
+ if is_given(prompt):
292
+ self._opts.prompt = prompt
293
+ if is_given(temperature):
294
+ self._opts.temperature = temperature
295
+ if is_given(skip_vad):
296
+ self._opts.skip_vad = skip_vad
297
+ if is_given(vad_kwargs):
298
+ self._opts.vad_kwargs = vad_kwargs
299
+ if is_given(text_timeout_seconds):
300
+ self._opts.text_timeout_seconds = text_timeout_seconds
301
+ if is_given(timestamp_granularities):
302
+ self._opts.timestamp_granularities = timestamp_granularities
303
+
304
+ self._reconnect_event.set()
305
+
306
+ async def _run(self) -> None:
307
+ """
308
+ Run a single websocket connection to Fireworks and make sure to reconnect
309
+ when something went wrong.
310
+ """
311
+
312
+ closing_ws = False
313
+
314
+ async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
315
+ nonlocal closing_ws
316
+
317
+ samples_per_buffer = self._opts.sample_rate // 20 # 50ms chunk
318
+ audio_bstream = utils.audio.AudioByteStream(
319
+ sample_rate=self._opts.sample_rate,
320
+ num_channels=1,
321
+ samples_per_channel=samples_per_buffer,
322
+ )
323
+
324
+ async for data in self._input_ch:
325
+ if isinstance(data, self._FlushSentinel):
326
+ frames = audio_bstream.flush()
327
+ else:
328
+ frames = audio_bstream.write(data.data.tobytes())
329
+
330
+ for frame in frames:
331
+ await self._audio_duration_collector.push(frame.duration)
332
+ await ws.send_bytes(frame.data.tobytes())
333
+
334
+ closing_ws = True
335
+ await ws.send_str(self._CLOSE_MSG)
336
+
337
+ async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
338
+ nonlocal closing_ws
339
+ while True:
340
+ try:
341
+ msg = await asyncio.wait_for(ws.receive(), timeout=5)
342
+ except asyncio.TimeoutError:
343
+ if closing_ws:
344
+ break
345
+ continue
346
+
347
+ if msg.type in (
348
+ aiohttp.WSMsgType.CLOSED,
349
+ aiohttp.WSMsgType.CLOSE,
350
+ aiohttp.WSMsgType.CLOSING,
351
+ ):
352
+ if closing_ws:
353
+ return
354
+
355
+ raise APIStatusError(
356
+ "Fireworks connection closed unexpectedly",
357
+ )
358
+
359
+ if msg.type != aiohttp.WSMsgType.TEXT:
360
+ logger.error("unexpected FireworksAI message type %s", msg.type)
361
+ continue
362
+
363
+ try:
364
+ self._process_stream_event(json.loads(msg.data))
365
+ except Exception:
366
+ logger.exception("failed to process FireworksAI message")
367
+
368
+ ws: aiohttp.ClientWebSocketResponse | None = None
369
+
370
+ while True:
371
+ try:
372
+ ws = await self._connect_ws()
373
+ tasks = [
374
+ asyncio.create_task(send_task(ws)),
375
+ asyncio.create_task(recv_task(ws)),
376
+ ]
377
+ wait_reconnect_task = asyncio.create_task(self._reconnect_event.wait())
378
+
379
+ try:
380
+ done, _ = await asyncio.wait(
381
+ (asyncio.gather(*tasks), wait_reconnect_task),
382
+ return_when=asyncio.FIRST_COMPLETED,
383
+ )
384
+ for task in done:
385
+ if task != wait_reconnect_task:
386
+ task.result()
387
+
388
+ if wait_reconnect_task not in done:
389
+ break
390
+
391
+ self._reconnect_event.clear()
392
+ finally:
393
+ await utils.aio.gracefully_cancel(*tasks, wait_reconnect_task)
394
+ finally:
395
+ if self._speaking:
396
+ self._speaking = False
397
+ end_event = stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH)
398
+ self._event_ch.send_nowait(end_event)
399
+
400
+ if ws is not None:
401
+ await ws.close()
402
+
403
+ await self._audio_duration_collector.flush()
404
+
405
+ async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
406
+ live_config = {
407
+ "model": self._opts.model if is_given(self._opts.model) else None,
408
+ "language": self._opts.language if is_given(self._opts.language) else None,
409
+ "prompt": self._opts.prompt if is_given(self._opts.prompt) else None,
410
+ "temperature": self._opts.temperature if is_given(self._opts.temperature) else None,
411
+ "skip_vad": self._opts.skip_vad if is_given(self._opts.skip_vad) else None,
412
+ "vad_kwargs": self._opts.vad_kwargs if is_given(self._opts.vad_kwargs) else None,
413
+ "text_timeout_seconds": self._opts.text_timeout_seconds,
414
+ "response_format": self._opts.response_format,
415
+ "timestamp_granularities": (
416
+ self._opts.timestamp_granularities
417
+ if is_given(self._opts.timestamp_granularities)
418
+ else None
419
+ ),
420
+ }
421
+
422
+ headers = {
423
+ "User-Agent": "LiveKit Agents",
424
+ "Authorization": self._api_key,
425
+ }
426
+
427
+ ws_url = str(self._opts.base_url).rstrip("/") + _STREAMING_PATH
428
+ filtered_config = {k: v for k, v in live_config.items() if v is not None}
429
+ url = f"{ws_url}?{urlencode(filtered_config, doseq=True)}"
430
+ ws = await self._session.ws_connect(url, headers=headers)
431
+ logger.info("connected to Fireworks AI STT", extra={"url": url})
432
+ return ws
433
+
434
+ def _process_stream_event(self, data: dict) -> None:
435
+ if "segments" in data and data["segments"]:
436
+ latest_segment = max(data["segments"], key=lambda s: s["id"])
437
+ max_segment_id = latest_segment["id"]
438
+
439
+ for segment in data["segments"]:
440
+ segment_id = segment["id"]
441
+ if segment_id < self._last_final_segment_id:
442
+ continue
443
+
444
+ if segment_id == self._last_final_segment_id:
445
+ finalized_word_count = self._final_segments_length.get(segment_id, 0)
446
+ words = segment.get("words", [])
447
+ if isinstance(words, list) and finalized_word_count < len(words):
448
+ new_words = words[finalized_word_count:]
449
+ new_text = " ".join(w["word"] for w in new_words if "word" in w).strip()
450
+ self._transcript_state[segment_id] = new_text
451
+ elif segment_id in self._transcript_state:
452
+ del self._transcript_state[segment_id]
453
+ else:
454
+ self._transcript_state[segment["id"]] = segment["text"]
455
+
456
+ for local_segment_id in list(self._transcript_state.keys()):
457
+ if local_segment_id > max_segment_id:
458
+ del self._transcript_state[local_segment_id]
459
+
460
+ # The state dictionary may not be sorted, so we must sort it by the segment ID
461
+ # before joining the text.
462
+ sorted_segments = sorted(self._transcript_state.items(), key=lambda item: int(item[0]))
463
+ full_transcript = " ".join([text for _, text in sorted_segments])
464
+
465
+ if not full_transcript:
466
+ return
467
+
468
+ if not self._speaking:
469
+ self._speaking = True
470
+ start_event = stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
471
+ self._event_ch.send_nowait(start_event)
472
+
473
+ is_final = False
474
+ words = latest_segment.get("words")
475
+ if words and isinstance(words, list) and words:
476
+ last_word = words[-1]
477
+ if isinstance(last_word, dict) and last_word.get("is_final") is True:
478
+ is_final = True
479
+
480
+ if is_final:
481
+ final_event = stt.SpeechEvent(
482
+ type=stt.SpeechEventType.FINAL_TRANSCRIPT,
483
+ alternatives=[
484
+ stt.SpeechData(language=self._opts.language or "", text=full_transcript)
485
+ ],
486
+ )
487
+ self._event_ch.send_nowait(final_event)
488
+ self._transcript_state.clear()
489
+ self._last_final_segment_id = max_segment_id
490
+ words = latest_segment.get("words")
491
+ if isinstance(words, list):
492
+ self._final_segments_length[max_segment_id] = len(words)
493
+ else:
494
+ interim_event = stt.SpeechEvent(
495
+ type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
496
+ alternatives=[
497
+ stt.SpeechData(language=self._opts.language or "", text=full_transcript)
498
+ ],
499
+ )
500
+ self._event_ch.send_nowait(interim_event)
501
+
502
+ def _on_audio_duration_report(self, duration: float) -> None:
503
+ usage_event = stt.SpeechEvent(
504
+ type=stt.SpeechEventType.RECOGNITION_USAGE,
505
+ recognition_usage=stt.RecognitionUsage(audio_duration=duration),
506
+ )
507
+ self._event_ch.send_nowait(usage_event)
@@ -0,0 +1,15 @@
1
+ # Copyright 2025 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __version__ = "1.2.12"
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "livekit-plugins-fireworksai"
7
+ dynamic = ["version"]
8
+ description = "LiveKit Agents Plugin for Fireworks AI"
9
+ readme = "README.md"
10
+ license = "Apache-2.0"
11
+ requires-python = ">=3.9.0"
12
+ authors = [{ name = "LiveKit", email = "hello@livekit.io" }]
13
+ keywords = ["webrtc", "realtime", "audio", "video", "livekit"]
14
+ classifiers = [
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: Apache Software License",
17
+ "Topic :: Multimedia :: Sound/Audio",
18
+ "Topic :: Multimedia :: Video",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3 :: Only",
24
+ ]
25
+ dependencies = ["livekit-agents>=1.2.12"]
26
+
27
+ [project.urls]
28
+ Documentation = "https://docs.livekit.io"
29
+ Website = "https://livekit.io/"
30
+ Source = "https://github.com/livekit/agents"
31
+
32
+ [tool.hatch.version]
33
+ path = "livekit/plugins/fireworksai/version.py"
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["livekit"]
37
+
38
+ [tool.hatch.build.targets.sdist]
39
+ include = ["/livekit"]