openspeechapi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openspeech/__init__.py +75 -0
- openspeech/__main__.py +5 -0
- openspeech/cli.py +413 -0
- openspeech/client/__init__.py +4 -0
- openspeech/client/client.py +145 -0
- openspeech/config.py +212 -0
- openspeech/core/__init__.py +0 -0
- openspeech/core/base.py +75 -0
- openspeech/core/enums.py +39 -0
- openspeech/core/models.py +61 -0
- openspeech/core/registry.py +37 -0
- openspeech/core/settings.py +8 -0
- openspeech/demo.py +675 -0
- openspeech/dispatch/__init__.py +0 -0
- openspeech/dispatch/context.py +34 -0
- openspeech/dispatch/dispatcher.py +661 -0
- openspeech/dispatch/executors/__init__.py +0 -0
- openspeech/dispatch/executors/base.py +34 -0
- openspeech/dispatch/executors/in_process.py +66 -0
- openspeech/dispatch/executors/remote.py +64 -0
- openspeech/dispatch/executors/subprocess_exec.py +446 -0
- openspeech/dispatch/fanout.py +95 -0
- openspeech/dispatch/filters.py +73 -0
- openspeech/dispatch/lifecycle.py +178 -0
- openspeech/dispatch/watcher.py +82 -0
- openspeech/engine_catalog.py +236 -0
- openspeech/engine_registry.yaml +347 -0
- openspeech/exceptions.py +51 -0
- openspeech/factory.py +325 -0
- openspeech/local_engines/__init__.py +12 -0
- openspeech/local_engines/aim_resolver.py +91 -0
- openspeech/local_engines/backends/__init__.py +1 -0
- openspeech/local_engines/backends/docker_backend.py +490 -0
- openspeech/local_engines/backends/native_backend.py +902 -0
- openspeech/local_engines/base.py +30 -0
- openspeech/local_engines/engines/__init__.py +1 -0
- openspeech/local_engines/engines/faster_whisper.py +36 -0
- openspeech/local_engines/engines/fish_speech.py +33 -0
- openspeech/local_engines/engines/sherpa_onnx.py +56 -0
- openspeech/local_engines/engines/whisper.py +41 -0
- openspeech/local_engines/engines/whisperlivekit.py +60 -0
- openspeech/local_engines/manager.py +208 -0
- openspeech/local_engines/models.py +50 -0
- openspeech/local_engines/progress.py +69 -0
- openspeech/local_engines/registry.py +19 -0
- openspeech/local_engines/task_store.py +52 -0
- openspeech/local_engines/tasks.py +71 -0
- openspeech/logging_config.py +607 -0
- openspeech/observe/__init__.py +0 -0
- openspeech/observe/base.py +79 -0
- openspeech/observe/debug.py +44 -0
- openspeech/observe/latency.py +19 -0
- openspeech/observe/metrics.py +47 -0
- openspeech/observe/tracing.py +44 -0
- openspeech/observe/usage.py +27 -0
- openspeech/providers/__init__.py +0 -0
- openspeech/providers/_template.py +101 -0
- openspeech/providers/stt/__init__.py +0 -0
- openspeech/providers/stt/alibaba.py +86 -0
- openspeech/providers/stt/assemblyai.py +135 -0
- openspeech/providers/stt/azure_speech.py +99 -0
- openspeech/providers/stt/baidu.py +135 -0
- openspeech/providers/stt/deepgram.py +311 -0
- openspeech/providers/stt/elevenlabs.py +385 -0
- openspeech/providers/stt/faster_whisper.py +211 -0
- openspeech/providers/stt/google_cloud.py +106 -0
- openspeech/providers/stt/iflytek.py +427 -0
- openspeech/providers/stt/macos_speech.py +226 -0
- openspeech/providers/stt/openai.py +84 -0
- openspeech/providers/stt/sherpa_onnx.py +353 -0
- openspeech/providers/stt/tencent.py +212 -0
- openspeech/providers/stt/volcengine.py +107 -0
- openspeech/providers/stt/whisper.py +153 -0
- openspeech/providers/stt/whisperlivekit.py +530 -0
- openspeech/providers/stt/windows_speech.py +249 -0
- openspeech/providers/tts/__init__.py +0 -0
- openspeech/providers/tts/alibaba.py +95 -0
- openspeech/providers/tts/azure_speech.py +123 -0
- openspeech/providers/tts/baidu.py +143 -0
- openspeech/providers/tts/coqui.py +64 -0
- openspeech/providers/tts/cosyvoice.py +90 -0
- openspeech/providers/tts/deepgram.py +174 -0
- openspeech/providers/tts/elevenlabs.py +311 -0
- openspeech/providers/tts/fish_speech.py +158 -0
- openspeech/providers/tts/google_cloud.py +107 -0
- openspeech/providers/tts/iflytek.py +209 -0
- openspeech/providers/tts/macos_say.py +251 -0
- openspeech/providers/tts/minimax.py +122 -0
- openspeech/providers/tts/openai.py +104 -0
- openspeech/providers/tts/piper.py +104 -0
- openspeech/providers/tts/tencent.py +189 -0
- openspeech/providers/tts/volcengine.py +117 -0
- openspeech/providers/tts/windows_sapi.py +234 -0
- openspeech/server/__init__.py +1 -0
- openspeech/server/app.py +72 -0
- openspeech/server/auth.py +42 -0
- openspeech/server/middleware.py +75 -0
- openspeech/server/routes/__init__.py +1 -0
- openspeech/server/routes/management.py +848 -0
- openspeech/server/routes/stt.py +121 -0
- openspeech/server/routes/tts.py +159 -0
- openspeech/server/routes/webui.py +29 -0
- openspeech/server/webui/app.js +2649 -0
- openspeech/server/webui/index.html +216 -0
- openspeech/server/webui/styles.css +617 -0
- openspeech/server/ws/__init__.py +1 -0
- openspeech/server/ws/stt_stream.py +263 -0
- openspeech/server/ws/tts_stream.py +207 -0
- openspeech/telemetry/__init__.py +21 -0
- openspeech/telemetry/perf.py +307 -0
- openspeech/utils/__init__.py +5 -0
- openspeech/utils/audio_converter.py +406 -0
- openspeech/utils/audio_playback.py +156 -0
- openspeech/vendor_registry.yaml +74 -0
- openspeechapi-0.1.0.dist-info/METADATA +101 -0
- openspeechapi-0.1.0.dist-info/RECORD +118 -0
- openspeechapi-0.1.0.dist-info/WHEEL +4 -0
- openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
openspeech/config.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""YAML configuration loading with environment variable interpolation."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
from openspeech.exceptions import ConfigError
|
|
13
|
+
|
|
14
|
+
_ENV_PATTERN = re.compile(r"\$\{([^}]+)\}")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def interpolate_env(value: str) -> str:
|
|
18
|
+
"""Interpolate ${VAR} references in a string. $$ escapes to literal $."""
|
|
19
|
+
if "$$" in value:
|
|
20
|
+
value = value.replace("$$", "\x00")
|
|
21
|
+
|
|
22
|
+
def _replace(match: re.Match) -> str:
|
|
23
|
+
var_name = match.group(1)
|
|
24
|
+
val = os.environ.get(var_name)
|
|
25
|
+
if val is None:
|
|
26
|
+
raise ConfigError(f"Environment variable '{var_name}' not set")
|
|
27
|
+
return val
|
|
28
|
+
|
|
29
|
+
result = _ENV_PATTERN.sub(_replace, value)
|
|
30
|
+
return result.replace("\x00", "$")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _interpolate_recursive(obj: Any) -> Any:
|
|
34
|
+
if isinstance(obj, str):
|
|
35
|
+
return interpolate_env(obj)
|
|
36
|
+
if isinstance(obj, dict):
|
|
37
|
+
return {k: _interpolate_recursive(v) for k, v in obj.items()}
|
|
38
|
+
if isinstance(obj, list):
|
|
39
|
+
return [_interpolate_recursive(item) for item in obj]
|
|
40
|
+
return obj
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class EngineConfig:
|
|
45
|
+
"""Configuration for a single engine instance.
|
|
46
|
+
|
|
47
|
+
The `provider` field serves dual purpose:
|
|
48
|
+
- If it matches a key in the top-level `providers:` section, it references
|
|
49
|
+
shared credentials from that provider, and the factory key is resolved
|
|
50
|
+
from the engine catalog.
|
|
51
|
+
- If it does not match (or providers section is absent), it is treated as
|
|
52
|
+
the factory key directly (backward compatibility with old config format).
|
|
53
|
+
"""
|
|
54
|
+
provider: str
|
|
55
|
+
exec_mode: str
|
|
56
|
+
settings: dict[str, Any] = field(default_factory=dict)
|
|
57
|
+
filters: list[dict[str, Any]] = field(default_factory=list)
|
|
58
|
+
endpoint: str | None = None
|
|
59
|
+
preload: bool = False
|
|
60
|
+
keepalive: int = 0
|
|
61
|
+
module: str | None = None
|
|
62
|
+
provider_class: str | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Backward compatibility alias
|
|
66
|
+
ProviderConfig = EngineConfig
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class ProviderCredentials:
|
|
71
|
+
"""A cloud provider's shared credentials."""
|
|
72
|
+
settings: dict[str, Any] = field(default_factory=dict)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Keep old name as alias
|
|
76
|
+
CloudProviderConfig = ProviderCredentials
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class ServerConfig:
|
|
81
|
+
host: str = "0.0.0.0"
|
|
82
|
+
port: int = 8600
|
|
83
|
+
auth_enabled: bool = False
|
|
84
|
+
api_keys: list[str] = field(default_factory=list)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass
|
|
88
|
+
class OpenSpeechConfig:
|
|
89
|
+
engines: dict[str, EngineConfig]
|
|
90
|
+
providers: dict[str, ProviderCredentials] = field(default_factory=dict)
|
|
91
|
+
server: ServerConfig = field(default_factory=ServerConfig)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _resolve_factory_key(alias: str, provider_ref: str) -> str:
|
|
95
|
+
"""Resolve the factory key for an engine that references a provider.
|
|
96
|
+
|
|
97
|
+
Looks up the engine catalog to find the factory key from the alias.
|
|
98
|
+
Falls back to the provider_ref if catalog lookup fails.
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
from openspeech.engine_catalog import get_catalog
|
|
102
|
+
for entry in get_catalog():
|
|
103
|
+
if entry.default_alias == alias:
|
|
104
|
+
return entry.provider
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
# Fallback: use provider ref as factory key
|
|
108
|
+
return provider_ref
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def load_config(path: Path) -> OpenSpeechConfig:
|
|
112
|
+
"""Load and validate providers.yaml configuration.
|
|
113
|
+
|
|
114
|
+
Supports both formats:
|
|
115
|
+
- New: `providers:` (shared credentials) + `engines:` (engine instances)
|
|
116
|
+
- Old: `providers:` (engine instances, no `engines:` key)
|
|
117
|
+
"""
|
|
118
|
+
if not path.exists():
|
|
119
|
+
raise ConfigError(f"Config file not found: {path}")
|
|
120
|
+
|
|
121
|
+
with open(path, encoding="utf-8") as f:
|
|
122
|
+
raw = yaml.safe_load(f)
|
|
123
|
+
|
|
124
|
+
if not isinstance(raw, dict):
|
|
125
|
+
raise ConfigError("Config must be a YAML mapping")
|
|
126
|
+
|
|
127
|
+
# Detect format: if 'engines' key exists → new format
|
|
128
|
+
is_new_format = "engines" in raw
|
|
129
|
+
|
|
130
|
+
if is_new_format:
|
|
131
|
+
engines_raw = raw.get("engines") or {}
|
|
132
|
+
providers_raw = raw.get("providers") or {}
|
|
133
|
+
else:
|
|
134
|
+
# Old format: 'providers' key contains engine configs
|
|
135
|
+
engines_raw = raw.get("providers") or {}
|
|
136
|
+
providers_raw = {}
|
|
137
|
+
|
|
138
|
+
if not engines_raw:
|
|
139
|
+
raise ConfigError("Config must contain an 'engines' (or 'providers') key with entries")
|
|
140
|
+
|
|
141
|
+
raw = _interpolate_recursive(raw)
|
|
142
|
+
|
|
143
|
+
# Re-read after interpolation
|
|
144
|
+
if is_new_format:
|
|
145
|
+
engines_raw = raw.get("engines") or {}
|
|
146
|
+
providers_raw = raw.get("providers") or {}
|
|
147
|
+
else:
|
|
148
|
+
engines_raw = raw.get("providers") or {}
|
|
149
|
+
providers_raw = {}
|
|
150
|
+
|
|
151
|
+
# Parse providers section (shared credentials)
|
|
152
|
+
cred_providers: dict[str, ProviderCredentials] = {}
|
|
153
|
+
for name, spec in providers_raw.items():
|
|
154
|
+
cred_providers[name] = ProviderCredentials(settings=dict(spec))
|
|
155
|
+
|
|
156
|
+
engines: dict[str, EngineConfig] = {}
|
|
157
|
+
for alias, spec in engines_raw.items():
|
|
158
|
+
provider_value = spec.get("provider", "")
|
|
159
|
+
|
|
160
|
+
# Determine if provider references shared credentials or is a factory key
|
|
161
|
+
if is_new_format and provider_value in cred_providers:
|
|
162
|
+
# New format: provider references credential provider
|
|
163
|
+
# Resolve factory key from catalog
|
|
164
|
+
factory_key = _resolve_factory_key(alias, provider_value)
|
|
165
|
+
elif is_new_format and not provider_value:
|
|
166
|
+
# No provider field — resolve factory key from catalog by alias
|
|
167
|
+
factory_key = _resolve_factory_key(alias, "")
|
|
168
|
+
else:
|
|
169
|
+
# Old format or direct factory key
|
|
170
|
+
factory_key = provider_value
|
|
171
|
+
|
|
172
|
+
cfg = EngineConfig(
|
|
173
|
+
provider=factory_key,
|
|
174
|
+
exec_mode=spec.get("exec_mode", "remote"),
|
|
175
|
+
settings=spec.get("settings", {}),
|
|
176
|
+
filters=spec.get("filters", []),
|
|
177
|
+
endpoint=spec.get("endpoint"),
|
|
178
|
+
preload=bool(spec.get("preload", False)),
|
|
179
|
+
keepalive=int(spec.get("keepalive", 0)),
|
|
180
|
+
module=spec.get("module"),
|
|
181
|
+
provider_class=spec.get("class"),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Merge provider credentials into engine settings
|
|
185
|
+
# Vendor credentials serve as defaults; non-empty engine settings override.
|
|
186
|
+
# Empty engine settings do NOT override valid vendor credentials.
|
|
187
|
+
if provider_value in cred_providers:
|
|
188
|
+
merged = dict(cred_providers[provider_value].settings)
|
|
189
|
+
for k, v in cfg.settings.items():
|
|
190
|
+
if k in merged and merged[k] and not v and v != 0 and v is not False:
|
|
191
|
+
continue # keep vendor value when engine value is empty
|
|
192
|
+
merged[k] = v
|
|
193
|
+
cfg.settings = merged
|
|
194
|
+
|
|
195
|
+
engines[alias] = cfg
|
|
196
|
+
|
|
197
|
+
# Parse optional server section
|
|
198
|
+
server_cfg = ServerConfig()
|
|
199
|
+
if "server" in raw:
|
|
200
|
+
srv = raw["server"]
|
|
201
|
+
server_cfg.host = srv.get("host", server_cfg.host)
|
|
202
|
+
server_cfg.port = srv.get("port", server_cfg.port)
|
|
203
|
+
auth = srv.get("auth", {})
|
|
204
|
+
if auth:
|
|
205
|
+
server_cfg.auth_enabled = bool(auth.get("enabled", False))
|
|
206
|
+
server_cfg.api_keys = list(auth.get("api_keys", []))
|
|
207
|
+
|
|
208
|
+
return OpenSpeechConfig(
|
|
209
|
+
engines=engines,
|
|
210
|
+
providers=cred_providers,
|
|
211
|
+
server=server_cfg,
|
|
212
|
+
)
|
|
File without changes
|
openspeech/core/base.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Provider abstract base classes."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from collections.abc import AsyncIterator
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from openspeech.core.enums import Capability, ExecMode, ProviderType
|
|
9
|
+
from openspeech.core.models import AudioChunk, AudioData, STTOptions, TTSOptions, Transcription
|
|
10
|
+
from openspeech.core.settings import BaseSettings
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SpeechProvider(ABC):
|
|
14
|
+
"""Base class for all speech providers."""
|
|
15
|
+
|
|
16
|
+
name: str
|
|
17
|
+
provider_type: ProviderType
|
|
18
|
+
execution_mode: ExecMode
|
|
19
|
+
settings: BaseSettings
|
|
20
|
+
settings_cls: type[BaseSettings] = BaseSettings # Override in subclass
|
|
21
|
+
capabilities: set[Capability]
|
|
22
|
+
field_options: dict[str, list] = {} # Dropdown choices per setting key (override in subclass)
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
async def start(self) -> None: ...
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
async def stop(self) -> None: ...
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
async def health_check(self) -> bool: ...
|
|
32
|
+
|
|
33
|
+
def set_http_client(self, client: Any) -> None:
|
|
34
|
+
"""Inject a shared HTTP client before ``start()`` is called.
|
|
35
|
+
|
|
36
|
+
Override in providers that use ``httpx.AsyncClient`` so they can
|
|
37
|
+
reuse a single client instance (and its SSL/transport resources)
|
|
38
|
+
instead of each creating their own. Providers that receive a
|
|
39
|
+
shared client must **not** close it in ``stop()``.
|
|
40
|
+
|
|
41
|
+
The default implementation is a no-op — providers that don't use
|
|
42
|
+
HTTP (WebSocket-only, local inference, etc.) simply ignore it.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class STTProvider(SpeechProvider):
|
|
47
|
+
"""Base class for speech-to-text providers."""
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
async def transcribe(
|
|
51
|
+
self, audio: AudioData, opts: STTOptions | None = None
|
|
52
|
+
) -> Transcription: ...
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def transcribe_stream(
|
|
56
|
+
self, stream: AsyncIterator[bytes]
|
|
57
|
+
) -> AsyncIterator[Any]: ...
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class TTSProvider(SpeechProvider):
|
|
61
|
+
"""Base class for text-to-speech providers."""
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
async def synthesize(
|
|
65
|
+
self, text: str, opts: TTSOptions | None = None
|
|
66
|
+
) -> AudioData: ...
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def synthesize_stream(
|
|
70
|
+
self, text: str, opts: TTSOptions | None = None
|
|
71
|
+
) -> AsyncIterator[AudioChunk]: ...
|
|
72
|
+
|
|
73
|
+
async def list_voices(self) -> list[dict]:
|
|
74
|
+
"""Return available voices. Override in subclass to provide voice list."""
|
|
75
|
+
return []
|
openspeech/core/enums.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Core enumerations for OpenSpeech."""
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ProviderType(str, Enum):
|
|
6
|
+
STT = "stt"
|
|
7
|
+
TTS = "tts"
|
|
8
|
+
BOTH = "both"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExecMode(str, Enum):
|
|
12
|
+
# True in-process model execution (reserved for ultra-low-latency local models).
|
|
13
|
+
IN_PROCESS = "in_process"
|
|
14
|
+
# Local service engine (managed separately; provider talks over HTTP/HTTPS).
|
|
15
|
+
LOCAL = "local"
|
|
16
|
+
# Provider worker subprocess + IPC.
|
|
17
|
+
SUBPROCESS = "subprocess"
|
|
18
|
+
# Cloud/remote service over network API.
|
|
19
|
+
REMOTE = "remote"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AudioFormat(str, Enum):
|
|
23
|
+
PCM_16K = "pcm_16k"
|
|
24
|
+
PCM_44K = "pcm_44k"
|
|
25
|
+
WAV = "wav"
|
|
26
|
+
AIFF = "aiff"
|
|
27
|
+
MP3 = "mp3"
|
|
28
|
+
OGG = "ogg"
|
|
29
|
+
FLAC = "flac"
|
|
30
|
+
OPUS = "opus"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Capability(str, Enum):
|
|
34
|
+
STREAMING = "streaming"
|
|
35
|
+
BATCH = "batch"
|
|
36
|
+
MULTILINGUAL = "multilingual"
|
|
37
|
+
VOICE_CLONE = "voice_clone"
|
|
38
|
+
EMOTION = "emotion"
|
|
39
|
+
WORD_TIMESTAMPS = "word_timestamps"
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Core data models — plain dataclasses for data transfer."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from openspeech.core.enums import AudioFormat
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class AudioData:
|
|
11
|
+
data: bytes
|
|
12
|
+
sample_rate: int
|
|
13
|
+
channels: int
|
|
14
|
+
format: AudioFormat
|
|
15
|
+
duration_ms: int | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Word:
|
|
20
|
+
text: str
|
|
21
|
+
start_ms: int
|
|
22
|
+
end_ms: int
|
|
23
|
+
confidence: float | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class Transcription:
|
|
28
|
+
text: str
|
|
29
|
+
language: str | None = None
|
|
30
|
+
confidence: float | None = None
|
|
31
|
+
words: list[Word] | None = None
|
|
32
|
+
duration_ms: int | None = None
|
|
33
|
+
is_partial: bool = False # True for intermediate results from streaming STT
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class AudioChunk:
|
|
38
|
+
data: bytes
|
|
39
|
+
sequence: int
|
|
40
|
+
is_final: bool = False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class STTOptions:
|
|
45
|
+
language: str | None = None
|
|
46
|
+
prompt: str | None = None
|
|
47
|
+
temperature: float | None = None
|
|
48
|
+
model: str | None = None
|
|
49
|
+
device: str | None = None
|
|
50
|
+
beam_size: int | None = None
|
|
51
|
+
compute_type: str | None = None
|
|
52
|
+
fp16: bool | None = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class TTSOptions:
|
|
57
|
+
voice: str | None = None
|
|
58
|
+
speed: float = 1.0
|
|
59
|
+
output_format: AudioFormat = AudioFormat.PCM_16K
|
|
60
|
+
model: str | None = None
|
|
61
|
+
stream_transport: str | None = None
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Provider registry — lookup by name, type, or capability."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from openspeech.core.enums import Capability, ProviderType
|
|
7
|
+
from openspeech.exceptions import ProviderNotFoundError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ProviderRegistry:
|
|
11
|
+
"""Registry for provider classes. Lookup by name, type, or capability."""
|
|
12
|
+
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
self._providers: dict[str, Any] = {}
|
|
15
|
+
|
|
16
|
+
def register(self, name: str, provider_cls: Any) -> None:
|
|
17
|
+
self._providers[name] = provider_cls
|
|
18
|
+
|
|
19
|
+
def get(self, name: str) -> Any:
|
|
20
|
+
if name not in self._providers:
|
|
21
|
+
raise ProviderNotFoundError(name)
|
|
22
|
+
return self._providers[name]
|
|
23
|
+
|
|
24
|
+
def find_by_type(self, provider_type: ProviderType) -> list[tuple[str, Any]]:
|
|
25
|
+
return [
|
|
26
|
+
(name, cls) for name, cls in self._providers.items()
|
|
27
|
+
if getattr(cls, "provider_type", None) == provider_type
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
def find_by_capability(self, capability: Capability) -> list[tuple[str, Any]]:
|
|
31
|
+
return [
|
|
32
|
+
(name, cls) for name, cls in self._providers.items()
|
|
33
|
+
if capability in getattr(cls, "capabilities", set())
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
def list_all(self) -> list[tuple[str, Any]]:
|
|
37
|
+
return list(self._providers.items())
|