livekit-plugins-fireworksai 1.2.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit_plugins_fireworksai-1.2.12/.gitignore +175 -0
- livekit_plugins_fireworksai-1.2.12/PKG-INFO +36 -0
- livekit_plugins_fireworksai-1.2.12/README.md +13 -0
- livekit_plugins_fireworksai-1.2.12/livekit/plugins/fireworksai/__init__.py +45 -0
- livekit_plugins_fireworksai-1.2.12/livekit/plugins/fireworksai/log.py +3 -0
- livekit_plugins_fireworksai-1.2.12/livekit/plugins/fireworksai/py.typed +0 -0
- livekit_plugins_fireworksai-1.2.12/livekit/plugins/fireworksai/stt.py +507 -0
- livekit_plugins_fireworksai-1.2.12/livekit/plugins/fireworksai/version.py +15 -0
- livekit_plugins_fireworksai-1.2.12/pyproject.toml +39 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
**/.vscode
|
|
2
|
+
**/.DS_Store
|
|
3
|
+
|
|
4
|
+
# Byte-compiled / optimized / DLL files
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[cod]
|
|
7
|
+
*$py.class
|
|
8
|
+
|
|
9
|
+
# C extensions
|
|
10
|
+
*.so
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py,cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
cover/
|
|
56
|
+
|
|
57
|
+
# Translations
|
|
58
|
+
*.mo
|
|
59
|
+
*.pot
|
|
60
|
+
|
|
61
|
+
# Django stuff:
|
|
62
|
+
*.log
|
|
63
|
+
local_settings.py
|
|
64
|
+
db.sqlite3
|
|
65
|
+
db.sqlite3-journal
|
|
66
|
+
|
|
67
|
+
# Flask stuff:
|
|
68
|
+
instance/
|
|
69
|
+
.webassets-cache
|
|
70
|
+
|
|
71
|
+
# Scrapy stuff:
|
|
72
|
+
.scrapy
|
|
73
|
+
|
|
74
|
+
# Sphinx documentation
|
|
75
|
+
docs/_build/
|
|
76
|
+
|
|
77
|
+
# PyBuilder
|
|
78
|
+
.pybuilder/
|
|
79
|
+
target/
|
|
80
|
+
|
|
81
|
+
# Jupyter Notebook
|
|
82
|
+
.ipynb_checkpoints
|
|
83
|
+
|
|
84
|
+
# IPython
|
|
85
|
+
profile_default/
|
|
86
|
+
ipython_config.py
|
|
87
|
+
|
|
88
|
+
# pyenv
|
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
91
|
+
# .python-version
|
|
92
|
+
|
|
93
|
+
# pipenv
|
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
97
|
+
# install all needed dependencies.
|
|
98
|
+
#Pipfile.lock
|
|
99
|
+
|
|
100
|
+
# poetry
|
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
103
|
+
# commonly ignored for libraries.
|
|
104
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
105
|
+
#poetry.lock
|
|
106
|
+
|
|
107
|
+
# pdm
|
|
108
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
109
|
+
#pdm.lock
|
|
110
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
111
|
+
# in version control.
|
|
112
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
113
|
+
.pdm.toml
|
|
114
|
+
|
|
115
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
116
|
+
__pypackages__/
|
|
117
|
+
|
|
118
|
+
# Celery stuff
|
|
119
|
+
celerybeat-schedule
|
|
120
|
+
celerybeat.pid
|
|
121
|
+
|
|
122
|
+
# SageMath parsed files
|
|
123
|
+
*.sage.py
|
|
124
|
+
|
|
125
|
+
# Environments
|
|
126
|
+
.env
|
|
127
|
+
.venv
|
|
128
|
+
env/
|
|
129
|
+
venv/
|
|
130
|
+
ENV/
|
|
131
|
+
env.bak/
|
|
132
|
+
venv.bak/
|
|
133
|
+
|
|
134
|
+
# Spyder project settings
|
|
135
|
+
.spyderproject
|
|
136
|
+
.spyproject
|
|
137
|
+
|
|
138
|
+
# Rope project settings
|
|
139
|
+
.ropeproject
|
|
140
|
+
|
|
141
|
+
# mkdocs documentation
|
|
142
|
+
/site
|
|
143
|
+
|
|
144
|
+
# mypy
|
|
145
|
+
.mypy_cache/
|
|
146
|
+
.dmypy.json
|
|
147
|
+
dmypy.json
|
|
148
|
+
|
|
149
|
+
# trunk
|
|
150
|
+
.trunk/
|
|
151
|
+
|
|
152
|
+
# Pyre type checker
|
|
153
|
+
.pyre/
|
|
154
|
+
|
|
155
|
+
# pytype static type analyzer
|
|
156
|
+
.pytype/
|
|
157
|
+
|
|
158
|
+
# Cython debug symbols
|
|
159
|
+
cython_debug/
|
|
160
|
+
|
|
161
|
+
# PyCharm
|
|
162
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
163
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
164
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
165
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
166
|
+
.idea/
|
|
167
|
+
|
|
168
|
+
node_modules
|
|
169
|
+
|
|
170
|
+
credentials.json
|
|
171
|
+
pyrightconfig.json
|
|
172
|
+
docs/
|
|
173
|
+
|
|
174
|
+
# Database files
|
|
175
|
+
*.db
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: livekit-plugins-fireworksai
|
|
3
|
+
Version: 1.2.12
|
|
4
|
+
Summary: LiveKit Agents Plugin for Fireworks AI
|
|
5
|
+
Project-URL: Documentation, https://docs.livekit.io
|
|
6
|
+
Project-URL: Website, https://livekit.io/
|
|
7
|
+
Project-URL: Source, https://github.com/livekit/agents
|
|
8
|
+
Author-email: LiveKit <hello@livekit.io>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Keywords: audio,livekit,realtime,video,webrtc
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.9.0
|
|
21
|
+
Requires-Dist: livekit-agents>=1.2.12
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# Fireworks AI plugin for LiveKit Agents
|
|
25
|
+
|
|
26
|
+
Support for speech-to-text api with [Fireworks AI](https://fireworks.ai/).
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install livekit-plugins-fireworksai
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Pre-requisites
|
|
35
|
+
|
|
36
|
+
You'll need an API key from Fireworks AI. It can be set as an environment variable: `FIREWORKS_API_KEY`
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Fireworks AI plugin for LiveKit Agents
|
|
2
|
+
|
|
3
|
+
Support for speech-to-text api with [Fireworks AI](https://fireworks.ai/).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install livekit-plugins-fireworksai
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Pre-requisites
|
|
12
|
+
|
|
13
|
+
You'll need an API key from Fireworks AI. It can be set as an environment variable: `FIREWORKS_API_KEY`
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright 2025 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Fireworks AI plugin for LiveKit Agents"""
|
|
16
|
+
|
|
17
|
+
from .log import logger
|
|
18
|
+
from .stt import STT, SpeechStream
|
|
19
|
+
from .version import __version__
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"STT",
|
|
23
|
+
"SpeechStream",
|
|
24
|
+
"logger",
|
|
25
|
+
"__version__",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
from livekit.agents import Plugin
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FireworksAIPlugin(Plugin):
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
super().__init__(__name__, __version__, __package__, logger)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
Plugin.register_plugin(FireworksAIPlugin())
|
|
37
|
+
|
|
38
|
+
# Cleanup docs of unexported modules
|
|
39
|
+
_module = dir()
|
|
40
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
|
41
|
+
|
|
42
|
+
__pdoc__ = {}
|
|
43
|
+
|
|
44
|
+
for n in NOT_IN_ALL:
|
|
45
|
+
__pdoc__[n] = False
|
|
File without changes
|
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
# Copyright 2025 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import dataclasses
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import weakref
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
from typing import Callable
|
|
24
|
+
from urllib.parse import urlencode
|
|
25
|
+
|
|
26
|
+
import aiohttp
|
|
27
|
+
|
|
28
|
+
from livekit.agents import (
|
|
29
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
|
30
|
+
APIConnectOptions,
|
|
31
|
+
APIStatusError,
|
|
32
|
+
stt,
|
|
33
|
+
utils,
|
|
34
|
+
)
|
|
35
|
+
from livekit.agents.types import NOT_GIVEN, NotGivenOr
|
|
36
|
+
from livekit.agents.utils import AudioBuffer, is_given
|
|
37
|
+
|
|
38
|
+
from .log import logger
|
|
39
|
+
|
|
40
|
+
_STREAMING_PATH = "/audio/transcriptions/streaming"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class _PeriodicCollector:
|
|
44
|
+
def __init__(self, duration: float, callback: Callable[[float], None]):
|
|
45
|
+
self._duration = duration
|
|
46
|
+
self._callback = callback
|
|
47
|
+
self._collected_value = 0.0
|
|
48
|
+
self._task: asyncio.Task | None = None
|
|
49
|
+
self._lock = asyncio.Lock()
|
|
50
|
+
|
|
51
|
+
async def push(self, value: float) -> None:
|
|
52
|
+
async with self._lock:
|
|
53
|
+
self._collected_value += value
|
|
54
|
+
if not self._task:
|
|
55
|
+
self._task = asyncio.create_task(self._run())
|
|
56
|
+
|
|
57
|
+
async def flush(self) -> None:
|
|
58
|
+
async with self._lock:
|
|
59
|
+
if self._task:
|
|
60
|
+
self._task.cancel()
|
|
61
|
+
try:
|
|
62
|
+
await self._task
|
|
63
|
+
except asyncio.CancelledError:
|
|
64
|
+
pass
|
|
65
|
+
self._task = None
|
|
66
|
+
|
|
67
|
+
if self._collected_value > 0:
|
|
68
|
+
self._callback(self._collected_value)
|
|
69
|
+
self._collected_value = 0.0
|
|
70
|
+
|
|
71
|
+
async def _run(self) -> None:
|
|
72
|
+
await asyncio.sleep(self._duration)
|
|
73
|
+
async with self._lock:
|
|
74
|
+
self._callback(self._collected_value)
|
|
75
|
+
self._collected_value = 0.0
|
|
76
|
+
self._task = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class STTOptions:
|
|
81
|
+
model: NotGivenOr[str]
|
|
82
|
+
sample_rate: int
|
|
83
|
+
language: NotGivenOr[str] = NOT_GIVEN
|
|
84
|
+
prompt: NotGivenOr[str] = NOT_GIVEN
|
|
85
|
+
temperature: NotGivenOr[float] = NOT_GIVEN
|
|
86
|
+
skip_vad: NotGivenOr[bool] = NOT_GIVEN
|
|
87
|
+
vad_kwargs: NotGivenOr[dict] = NOT_GIVEN
|
|
88
|
+
text_timeout_seconds: float = 1.0
|
|
89
|
+
response_format: str = "verbose_json"
|
|
90
|
+
timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN
|
|
91
|
+
base_url: NotGivenOr[str] = NOT_GIVEN
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class STT(stt.STT):
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
*,
|
|
98
|
+
model: NotGivenOr[str] = NOT_GIVEN,
|
|
99
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
|
100
|
+
sample_rate: int = 16000,
|
|
101
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
|
102
|
+
prompt: NotGivenOr[str] = NOT_GIVEN,
|
|
103
|
+
temperature: NotGivenOr[float] = NOT_GIVEN,
|
|
104
|
+
skip_vad: NotGivenOr[bool] = NOT_GIVEN,
|
|
105
|
+
vad_kwargs: NotGivenOr[dict] = NOT_GIVEN,
|
|
106
|
+
text_timeout_seconds: float = 1.0,
|
|
107
|
+
timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN,
|
|
108
|
+
response_format: str = "verbose_json",
|
|
109
|
+
http_session: aiohttp.ClientSession | None = None,
|
|
110
|
+
base_url: str = "wss://audio-streaming.us-virginia-1.direct.fireworks.ai/v1",
|
|
111
|
+
):
|
|
112
|
+
"""
|
|
113
|
+
Create a new instance of Fireworks AI STT.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
model: The Fireworks AI STT model to use. Defaults to NOT_GIVEN (server uses default model).
|
|
117
|
+
language: The target language for transcription. Defaults to NOT_GIVEN (server detects language automatically).
|
|
118
|
+
Full list: https://fireworks.ai/docs/api-reference/audio-streaming-transcriptions#supported-languages
|
|
119
|
+
prompt: The input prompt that the model will use when generating the transcription. Defaults to NOT_GIVEN.
|
|
120
|
+
temperature: Sampling temperature to use when decoding text tokens during transcription. Defaults to NOT_GIVEN.
|
|
121
|
+
skip_vad: Whether to skip server-side VAD. Defaults to NOT_GIVEN.
|
|
122
|
+
vad_kwargs: The optional kwargs to pass to the VAD model.
|
|
123
|
+
Defaults to NOT_GIVEN. Example: Set to {"threshold": 0.15} to adjust the VAD threshold.
|
|
124
|
+
text_timeout_seconds: Duration of silence before marking transcript as final. Defaults to 1.0.
|
|
125
|
+
timestamp_granularities: The timestamp granularities to populate for this streaming transcription.
|
|
126
|
+
Defaults to NOT_GIVEN. Set to "word,segment" to enable timestamp granularities.
|
|
127
|
+
response_format: The format in which to return the response. Default to "verbose_json".
|
|
128
|
+
base_url: The base URL for the Fireworks AI STT.
|
|
129
|
+
Defaults to "wss://audio-streaming.us-virginia-1.direct.fireworks.ai/v1".
|
|
130
|
+
api_key: The Fireworks AI API key. If not provided, it will be read from
|
|
131
|
+
the FIREWORKS_API_KEY environment variable.
|
|
132
|
+
http_session: Optional aiohttp ClientSession to use for requests.
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
ValueError: If no API key is provided, found in environment variables, or if a parameter is invalid.
|
|
136
|
+
"""
|
|
137
|
+
super().__init__(
|
|
138
|
+
capabilities=stt.STTCapabilities(streaming=True, interim_results=True),
|
|
139
|
+
)
|
|
140
|
+
if sample_rate != 16000:
|
|
141
|
+
raise ValueError("FireworksAI STT only supports a sample rate of 16000")
|
|
142
|
+
|
|
143
|
+
if not 1.0 <= text_timeout_seconds <= 29.0:
|
|
144
|
+
raise ValueError("text_timeout_seconds must be between 1.0 and 29.0")
|
|
145
|
+
|
|
146
|
+
fireworks_api_key = api_key if is_given(api_key) else os.environ.get("FIREWORKS_API_KEY")
|
|
147
|
+
if fireworks_api_key is None:
|
|
148
|
+
raise ValueError(
|
|
149
|
+
"Fireworks API key is required. "
|
|
150
|
+
"Pass one in via the `api_key` parameter, "
|
|
151
|
+
"or set it as the `FIREWORKS_API_KEY` environment variable"
|
|
152
|
+
)
|
|
153
|
+
self._api_key = fireworks_api_key
|
|
154
|
+
self._opts = STTOptions(
|
|
155
|
+
model=model,
|
|
156
|
+
sample_rate=sample_rate,
|
|
157
|
+
language=language,
|
|
158
|
+
prompt=prompt,
|
|
159
|
+
temperature=temperature,
|
|
160
|
+
skip_vad=skip_vad,
|
|
161
|
+
vad_kwargs=vad_kwargs,
|
|
162
|
+
text_timeout_seconds=text_timeout_seconds,
|
|
163
|
+
response_format=response_format,
|
|
164
|
+
timestamp_granularities=timestamp_granularities,
|
|
165
|
+
base_url=base_url,
|
|
166
|
+
)
|
|
167
|
+
self._session = http_session
|
|
168
|
+
self._streams = weakref.WeakSet[SpeechStream]()
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def session(self) -> aiohttp.ClientSession:
|
|
172
|
+
if not self._session:
|
|
173
|
+
self._session = utils.http_context.http_session()
|
|
174
|
+
return self._session
|
|
175
|
+
|
|
176
|
+
async def _recognize_impl(
|
|
177
|
+
self,
|
|
178
|
+
buffer: AudioBuffer,
|
|
179
|
+
*,
|
|
180
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
|
181
|
+
conn_options: APIConnectOptions,
|
|
182
|
+
) -> stt.SpeechEvent:
|
|
183
|
+
raise NotImplementedError(
|
|
184
|
+
"FireworksAI STT does not support batch recognition, use stream() instead"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def stream(
|
|
188
|
+
self,
|
|
189
|
+
*,
|
|
190
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
|
191
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
|
192
|
+
) -> SpeechStream:
|
|
193
|
+
config = dataclasses.replace(self._opts)
|
|
194
|
+
stream = SpeechStream(
|
|
195
|
+
stt=self,
|
|
196
|
+
opts=config,
|
|
197
|
+
conn_options=conn_options,
|
|
198
|
+
api_key=self._api_key,
|
|
199
|
+
http_session=self.session,
|
|
200
|
+
)
|
|
201
|
+
self._streams.add(stream)
|
|
202
|
+
return stream
|
|
203
|
+
|
|
204
|
+
def update_options(
|
|
205
|
+
self,
|
|
206
|
+
*,
|
|
207
|
+
model: NotGivenOr[str] = NOT_GIVEN,
|
|
208
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
|
209
|
+
prompt: NotGivenOr[str] = NOT_GIVEN,
|
|
210
|
+
temperature: NotGivenOr[float] = NOT_GIVEN,
|
|
211
|
+
skip_vad: NotGivenOr[bool] = NOT_GIVEN,
|
|
212
|
+
vad_kwargs: NotGivenOr[dict] = NOT_GIVEN,
|
|
213
|
+
text_timeout_seconds: NotGivenOr[float] = NOT_GIVEN,
|
|
214
|
+
timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN,
|
|
215
|
+
) -> None:
|
|
216
|
+
if is_given(model):
|
|
217
|
+
self._opts.model = model
|
|
218
|
+
if is_given(language):
|
|
219
|
+
self._opts.language = language
|
|
220
|
+
if is_given(prompt):
|
|
221
|
+
self._opts.prompt = prompt
|
|
222
|
+
if is_given(temperature):
|
|
223
|
+
self._opts.temperature = temperature
|
|
224
|
+
if is_given(skip_vad):
|
|
225
|
+
self._opts.skip_vad = skip_vad
|
|
226
|
+
if is_given(vad_kwargs):
|
|
227
|
+
self._opts.vad_kwargs = vad_kwargs
|
|
228
|
+
if is_given(text_timeout_seconds):
|
|
229
|
+
if not 1.0 <= text_timeout_seconds <= 29.0:
|
|
230
|
+
raise ValueError("text_timeout_seconds must be between 1.0 and 29.0")
|
|
231
|
+
self._opts.text_timeout_seconds = text_timeout_seconds
|
|
232
|
+
if is_given(timestamp_granularities):
|
|
233
|
+
self._opts.timestamp_granularities = timestamp_granularities
|
|
234
|
+
|
|
235
|
+
for stream in self._streams:
|
|
236
|
+
stream.update_options(
|
|
237
|
+
model=model,
|
|
238
|
+
language=language,
|
|
239
|
+
prompt=prompt,
|
|
240
|
+
temperature=temperature,
|
|
241
|
+
skip_vad=skip_vad,
|
|
242
|
+
vad_kwargs=vad_kwargs,
|
|
243
|
+
text_timeout_seconds=text_timeout_seconds,
|
|
244
|
+
timestamp_granularities=timestamp_granularities,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class SpeechStream(stt.SpeechStream):
|
|
249
|
+
_CLOSE_MSG: str = json.dumps({"checkpoint_id": "final"})
|
|
250
|
+
|
|
251
|
+
def __init__(
|
|
252
|
+
self,
|
|
253
|
+
*,
|
|
254
|
+
stt: STT,
|
|
255
|
+
opts: STTOptions,
|
|
256
|
+
conn_options: APIConnectOptions,
|
|
257
|
+
api_key: str,
|
|
258
|
+
http_session: aiohttp.ClientSession,
|
|
259
|
+
) -> None:
|
|
260
|
+
super().__init__(stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate)
|
|
261
|
+
|
|
262
|
+
self._opts = opts
|
|
263
|
+
self._api_key = api_key
|
|
264
|
+
self._session = http_session
|
|
265
|
+
self._transcript_state: dict[str, str] = {}
|
|
266
|
+
self._reconnect_event = asyncio.Event()
|
|
267
|
+
self._speaking = False
|
|
268
|
+
self._final_segments_length: dict[int, int] = {}
|
|
269
|
+
self._last_final_segment_id = -1
|
|
270
|
+
self._audio_duration_collector = _PeriodicCollector(
|
|
271
|
+
callback=self._on_audio_duration_report,
|
|
272
|
+
duration=10.0,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
def update_options(
|
|
276
|
+
self,
|
|
277
|
+
*,
|
|
278
|
+
model: NotGivenOr[str] = NOT_GIVEN,
|
|
279
|
+
language: NotGivenOr[str] = NOT_GIVEN,
|
|
280
|
+
prompt: NotGivenOr[str] = NOT_GIVEN,
|
|
281
|
+
temperature: NotGivenOr[float] = NOT_GIVEN,
|
|
282
|
+
skip_vad: NotGivenOr[bool] = NOT_GIVEN,
|
|
283
|
+
vad_kwargs: NotGivenOr[dict] = NOT_GIVEN,
|
|
284
|
+
text_timeout_seconds: NotGivenOr[float] = NOT_GIVEN,
|
|
285
|
+
timestamp_granularities: NotGivenOr[list[str]] = NOT_GIVEN,
|
|
286
|
+
) -> None:
|
|
287
|
+
if is_given(model):
|
|
288
|
+
self._opts.model = model
|
|
289
|
+
if is_given(language):
|
|
290
|
+
self._opts.language = language
|
|
291
|
+
if is_given(prompt):
|
|
292
|
+
self._opts.prompt = prompt
|
|
293
|
+
if is_given(temperature):
|
|
294
|
+
self._opts.temperature = temperature
|
|
295
|
+
if is_given(skip_vad):
|
|
296
|
+
self._opts.skip_vad = skip_vad
|
|
297
|
+
if is_given(vad_kwargs):
|
|
298
|
+
self._opts.vad_kwargs = vad_kwargs
|
|
299
|
+
if is_given(text_timeout_seconds):
|
|
300
|
+
self._opts.text_timeout_seconds = text_timeout_seconds
|
|
301
|
+
if is_given(timestamp_granularities):
|
|
302
|
+
self._opts.timestamp_granularities = timestamp_granularities
|
|
303
|
+
|
|
304
|
+
self._reconnect_event.set()
|
|
305
|
+
|
|
306
|
+
async def _run(self) -> None:
|
|
307
|
+
"""
|
|
308
|
+
Run a single websocket connection to Fireworks and make sure to reconnect
|
|
309
|
+
when something went wrong.
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
closing_ws = False
|
|
313
|
+
|
|
314
|
+
async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
|
|
315
|
+
nonlocal closing_ws
|
|
316
|
+
|
|
317
|
+
samples_per_buffer = self._opts.sample_rate // 20 # 50ms chunk
|
|
318
|
+
audio_bstream = utils.audio.AudioByteStream(
|
|
319
|
+
sample_rate=self._opts.sample_rate,
|
|
320
|
+
num_channels=1,
|
|
321
|
+
samples_per_channel=samples_per_buffer,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
async for data in self._input_ch:
|
|
325
|
+
if isinstance(data, self._FlushSentinel):
|
|
326
|
+
frames = audio_bstream.flush()
|
|
327
|
+
else:
|
|
328
|
+
frames = audio_bstream.write(data.data.tobytes())
|
|
329
|
+
|
|
330
|
+
for frame in frames:
|
|
331
|
+
await self._audio_duration_collector.push(frame.duration)
|
|
332
|
+
await ws.send_bytes(frame.data.tobytes())
|
|
333
|
+
|
|
334
|
+
closing_ws = True
|
|
335
|
+
await ws.send_str(self._CLOSE_MSG)
|
|
336
|
+
|
|
337
|
+
async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
|
|
338
|
+
nonlocal closing_ws
|
|
339
|
+
while True:
|
|
340
|
+
try:
|
|
341
|
+
msg = await asyncio.wait_for(ws.receive(), timeout=5)
|
|
342
|
+
except asyncio.TimeoutError:
|
|
343
|
+
if closing_ws:
|
|
344
|
+
break
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
if msg.type in (
|
|
348
|
+
aiohttp.WSMsgType.CLOSED,
|
|
349
|
+
aiohttp.WSMsgType.CLOSE,
|
|
350
|
+
aiohttp.WSMsgType.CLOSING,
|
|
351
|
+
):
|
|
352
|
+
if closing_ws:
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
raise APIStatusError(
|
|
356
|
+
"Fireworks connection closed unexpectedly",
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
if msg.type != aiohttp.WSMsgType.TEXT:
|
|
360
|
+
logger.error("unexpected FireworksAI message type %s", msg.type)
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
self._process_stream_event(json.loads(msg.data))
|
|
365
|
+
except Exception:
|
|
366
|
+
logger.exception("failed to process FireworksAI message")
|
|
367
|
+
|
|
368
|
+
ws: aiohttp.ClientWebSocketResponse | None = None
|
|
369
|
+
|
|
370
|
+
while True:
|
|
371
|
+
try:
|
|
372
|
+
ws = await self._connect_ws()
|
|
373
|
+
tasks = [
|
|
374
|
+
asyncio.create_task(send_task(ws)),
|
|
375
|
+
asyncio.create_task(recv_task(ws)),
|
|
376
|
+
]
|
|
377
|
+
wait_reconnect_task = asyncio.create_task(self._reconnect_event.wait())
|
|
378
|
+
|
|
379
|
+
try:
|
|
380
|
+
done, _ = await asyncio.wait(
|
|
381
|
+
(asyncio.gather(*tasks), wait_reconnect_task),
|
|
382
|
+
return_when=asyncio.FIRST_COMPLETED,
|
|
383
|
+
)
|
|
384
|
+
for task in done:
|
|
385
|
+
if task != wait_reconnect_task:
|
|
386
|
+
task.result()
|
|
387
|
+
|
|
388
|
+
if wait_reconnect_task not in done:
|
|
389
|
+
break
|
|
390
|
+
|
|
391
|
+
self._reconnect_event.clear()
|
|
392
|
+
finally:
|
|
393
|
+
await utils.aio.gracefully_cancel(*tasks, wait_reconnect_task)
|
|
394
|
+
finally:
|
|
395
|
+
if self._speaking:
|
|
396
|
+
self._speaking = False
|
|
397
|
+
end_event = stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH)
|
|
398
|
+
self._event_ch.send_nowait(end_event)
|
|
399
|
+
|
|
400
|
+
if ws is not None:
|
|
401
|
+
await ws.close()
|
|
402
|
+
|
|
403
|
+
await self._audio_duration_collector.flush()
|
|
404
|
+
|
|
405
|
+
async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
|
|
406
|
+
live_config = {
|
|
407
|
+
"model": self._opts.model if is_given(self._opts.model) else None,
|
|
408
|
+
"language": self._opts.language if is_given(self._opts.language) else None,
|
|
409
|
+
"prompt": self._opts.prompt if is_given(self._opts.prompt) else None,
|
|
410
|
+
"temperature": self._opts.temperature if is_given(self._opts.temperature) else None,
|
|
411
|
+
"skip_vad": self._opts.skip_vad if is_given(self._opts.skip_vad) else None,
|
|
412
|
+
"vad_kwargs": self._opts.vad_kwargs if is_given(self._opts.vad_kwargs) else None,
|
|
413
|
+
"text_timeout_seconds": self._opts.text_timeout_seconds,
|
|
414
|
+
"response_format": self._opts.response_format,
|
|
415
|
+
"timestamp_granularities": (
|
|
416
|
+
self._opts.timestamp_granularities
|
|
417
|
+
if is_given(self._opts.timestamp_granularities)
|
|
418
|
+
else None
|
|
419
|
+
),
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
headers = {
|
|
423
|
+
"User-Agent": "LiveKit Agents",
|
|
424
|
+
"Authorization": self._api_key,
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
ws_url = str(self._opts.base_url).rstrip("/") + _STREAMING_PATH
|
|
428
|
+
filtered_config = {k: v for k, v in live_config.items() if v is not None}
|
|
429
|
+
url = f"{ws_url}?{urlencode(filtered_config, doseq=True)}"
|
|
430
|
+
ws = await self._session.ws_connect(url, headers=headers)
|
|
431
|
+
logger.info("connected to Fireworks AI STT", extra={"url": url})
|
|
432
|
+
return ws
|
|
433
|
+
|
|
434
|
+
def _process_stream_event(self, data: dict) -> None:
|
|
435
|
+
if "segments" in data and data["segments"]:
|
|
436
|
+
latest_segment = max(data["segments"], key=lambda s: s["id"])
|
|
437
|
+
max_segment_id = latest_segment["id"]
|
|
438
|
+
|
|
439
|
+
for segment in data["segments"]:
|
|
440
|
+
segment_id = segment["id"]
|
|
441
|
+
if segment_id < self._last_final_segment_id:
|
|
442
|
+
continue
|
|
443
|
+
|
|
444
|
+
if segment_id == self._last_final_segment_id:
|
|
445
|
+
finalized_word_count = self._final_segments_length.get(segment_id, 0)
|
|
446
|
+
words = segment.get("words", [])
|
|
447
|
+
if isinstance(words, list) and finalized_word_count < len(words):
|
|
448
|
+
new_words = words[finalized_word_count:]
|
|
449
|
+
new_text = " ".join(w["word"] for w in new_words if "word" in w).strip()
|
|
450
|
+
self._transcript_state[segment_id] = new_text
|
|
451
|
+
elif segment_id in self._transcript_state:
|
|
452
|
+
del self._transcript_state[segment_id]
|
|
453
|
+
else:
|
|
454
|
+
self._transcript_state[segment["id"]] = segment["text"]
|
|
455
|
+
|
|
456
|
+
for local_segment_id in list(self._transcript_state.keys()):
|
|
457
|
+
if local_segment_id > max_segment_id:
|
|
458
|
+
del self._transcript_state[local_segment_id]
|
|
459
|
+
|
|
460
|
+
# The state dictionary may not be sorted, so we must sort it by the segment ID
|
|
461
|
+
# before joining the text.
|
|
462
|
+
sorted_segments = sorted(self._transcript_state.items(), key=lambda item: int(item[0]))
|
|
463
|
+
full_transcript = " ".join([text for _, text in sorted_segments])
|
|
464
|
+
|
|
465
|
+
if not full_transcript:
|
|
466
|
+
return
|
|
467
|
+
|
|
468
|
+
if not self._speaking:
|
|
469
|
+
self._speaking = True
|
|
470
|
+
start_event = stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
|
|
471
|
+
self._event_ch.send_nowait(start_event)
|
|
472
|
+
|
|
473
|
+
is_final = False
|
|
474
|
+
words = latest_segment.get("words")
|
|
475
|
+
if words and isinstance(words, list) and words:
|
|
476
|
+
last_word = words[-1]
|
|
477
|
+
if isinstance(last_word, dict) and last_word.get("is_final") is True:
|
|
478
|
+
is_final = True
|
|
479
|
+
|
|
480
|
+
if is_final:
|
|
481
|
+
final_event = stt.SpeechEvent(
|
|
482
|
+
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
483
|
+
alternatives=[
|
|
484
|
+
stt.SpeechData(language=self._opts.language or "", text=full_transcript)
|
|
485
|
+
],
|
|
486
|
+
)
|
|
487
|
+
self._event_ch.send_nowait(final_event)
|
|
488
|
+
self._transcript_state.clear()
|
|
489
|
+
self._last_final_segment_id = max_segment_id
|
|
490
|
+
words = latest_segment.get("words")
|
|
491
|
+
if isinstance(words, list):
|
|
492
|
+
self._final_segments_length[max_segment_id] = len(words)
|
|
493
|
+
else:
|
|
494
|
+
interim_event = stt.SpeechEvent(
|
|
495
|
+
type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
496
|
+
alternatives=[
|
|
497
|
+
stt.SpeechData(language=self._opts.language or "", text=full_transcript)
|
|
498
|
+
],
|
|
499
|
+
)
|
|
500
|
+
self._event_ch.send_nowait(interim_event)
|
|
501
|
+
|
|
502
|
+
def _on_audio_duration_report(self, duration: float) -> None:
|
|
503
|
+
usage_event = stt.SpeechEvent(
|
|
504
|
+
type=stt.SpeechEventType.RECOGNITION_USAGE,
|
|
505
|
+
recognition_usage=stt.RecognitionUsage(audio_duration=duration),
|
|
506
|
+
)
|
|
507
|
+
self._event_ch.send_nowait(usage_event)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2025 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
__version__ = "1.2.12"
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "livekit-plugins-fireworksai"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "LiveKit Agents Plugin for Fireworks AI"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.9.0"
|
|
12
|
+
authors = [{ name = "LiveKit", email = "hello@livekit.io" }]
|
|
13
|
+
keywords = ["webrtc", "realtime", "audio", "video", "livekit"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: Apache Software License",
|
|
17
|
+
"Topic :: Multimedia :: Sound/Audio",
|
|
18
|
+
"Topic :: Multimedia :: Video",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
24
|
+
]
|
|
25
|
+
dependencies = ["livekit-agents>=1.2.12"]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Documentation = "https://docs.livekit.io"
|
|
29
|
+
Website = "https://livekit.io/"
|
|
30
|
+
Source = "https://github.com/livekit/agents"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.version]
|
|
33
|
+
path = "livekit/plugins/fireworksai/version.py"
|
|
34
|
+
|
|
35
|
+
[tool.hatch.build.targets.wheel]
|
|
36
|
+
packages = ["livekit"]
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.sdist]
|
|
39
|
+
include = ["/livekit"]
|