openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
openspeech/config.py ADDED
@@ -0,0 +1,212 @@
1
+ """YAML configuration loading with environment variable interpolation."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+ from openspeech.exceptions import ConfigError
13
+
14
+ _ENV_PATTERN = re.compile(r"\$\{([^}]+)\}")
15
+
16
+
17
+ def interpolate_env(value: str) -> str:
18
+ """Interpolate ${VAR} references in a string. $$ escapes to literal $."""
19
+ if "$$" in value:
20
+ value = value.replace("$$", "\x00")
21
+
22
+ def _replace(match: re.Match) -> str:
23
+ var_name = match.group(1)
24
+ val = os.environ.get(var_name)
25
+ if val is None:
26
+ raise ConfigError(f"Environment variable '{var_name}' not set")
27
+ return val
28
+
29
+ result = _ENV_PATTERN.sub(_replace, value)
30
+ return result.replace("\x00", "$")
31
+
32
+
33
+ def _interpolate_recursive(obj: Any) -> Any:
34
+ if isinstance(obj, str):
35
+ return interpolate_env(obj)
36
+ if isinstance(obj, dict):
37
+ return {k: _interpolate_recursive(v) for k, v in obj.items()}
38
+ if isinstance(obj, list):
39
+ return [_interpolate_recursive(item) for item in obj]
40
+ return obj
41
+
42
+
43
+ @dataclass
44
+ class EngineConfig:
45
+ """Configuration for a single engine instance.
46
+
47
+ The `provider` field serves dual purpose:
48
+ - If it matches a key in the top-level `providers:` section, it references
49
+ shared credentials from that provider, and the factory key is resolved
50
+ from the engine catalog.
51
+ - If it does not match (or providers section is absent), it is treated as
52
+ the factory key directly (backward compatibility with old config format).
53
+ """
54
+ provider: str
55
+ exec_mode: str
56
+ settings: dict[str, Any] = field(default_factory=dict)
57
+ filters: list[dict[str, Any]] = field(default_factory=list)
58
+ endpoint: str | None = None
59
+ preload: bool = False
60
+ keepalive: int = 0
61
+ module: str | None = None
62
+ provider_class: str | None = None
63
+
64
+
65
+ # Backward compatibility alias
66
+ ProviderConfig = EngineConfig
67
+
68
+
69
+ @dataclass
70
+ class ProviderCredentials:
71
+ """A cloud provider's shared credentials."""
72
+ settings: dict[str, Any] = field(default_factory=dict)
73
+
74
+
75
+ # Keep old name as alias
76
+ CloudProviderConfig = ProviderCredentials
77
+
78
+
79
+ @dataclass
80
+ class ServerConfig:
81
+ host: str = "0.0.0.0"
82
+ port: int = 8600
83
+ auth_enabled: bool = False
84
+ api_keys: list[str] = field(default_factory=list)
85
+
86
+
87
+ @dataclass
88
+ class OpenSpeechConfig:
89
+ engines: dict[str, EngineConfig]
90
+ providers: dict[str, ProviderCredentials] = field(default_factory=dict)
91
+ server: ServerConfig = field(default_factory=ServerConfig)
92
+
93
+
94
+ def _resolve_factory_key(alias: str, provider_ref: str) -> str:
95
+ """Resolve the factory key for an engine that references a provider.
96
+
97
+ Looks up the engine catalog to find the factory key from the alias.
98
+ Falls back to the provider_ref if catalog lookup fails.
99
+ """
100
+ try:
101
+ from openspeech.engine_catalog import get_catalog
102
+ for entry in get_catalog():
103
+ if entry.default_alias == alias:
104
+ return entry.provider
105
+ except Exception:
106
+ pass
107
+ # Fallback: use provider ref as factory key
108
+ return provider_ref
109
+
110
+
111
+ def load_config(path: Path) -> OpenSpeechConfig:
112
+ """Load and validate providers.yaml configuration.
113
+
114
+ Supports both formats:
115
+ - New: `providers:` (shared credentials) + `engines:` (engine instances)
116
+ - Old: `providers:` (engine instances, no `engines:` key)
117
+ """
118
+ if not path.exists():
119
+ raise ConfigError(f"Config file not found: {path}")
120
+
121
+ with open(path, encoding="utf-8") as f:
122
+ raw = yaml.safe_load(f)
123
+
124
+ if not isinstance(raw, dict):
125
+ raise ConfigError("Config must be a YAML mapping")
126
+
127
+ # Detect format: if 'engines' key exists → new format
128
+ is_new_format = "engines" in raw
129
+
130
+ if is_new_format:
131
+ engines_raw = raw.get("engines") or {}
132
+ providers_raw = raw.get("providers") or {}
133
+ else:
134
+ # Old format: 'providers' key contains engine configs
135
+ engines_raw = raw.get("providers") or {}
136
+ providers_raw = {}
137
+
138
+ if not engines_raw:
139
+ raise ConfigError("Config must contain an 'engines' (or 'providers') key with entries")
140
+
141
+ raw = _interpolate_recursive(raw)
142
+
143
+ # Re-read after interpolation
144
+ if is_new_format:
145
+ engines_raw = raw.get("engines") or {}
146
+ providers_raw = raw.get("providers") or {}
147
+ else:
148
+ engines_raw = raw.get("providers") or {}
149
+ providers_raw = {}
150
+
151
+ # Parse providers section (shared credentials)
152
+ cred_providers: dict[str, ProviderCredentials] = {}
153
+ for name, spec in providers_raw.items():
154
+ cred_providers[name] = ProviderCredentials(settings=dict(spec))
155
+
156
+ engines: dict[str, EngineConfig] = {}
157
+ for alias, spec in engines_raw.items():
158
+ provider_value = spec.get("provider", "")
159
+
160
+ # Determine if provider references shared credentials or is a factory key
161
+ if is_new_format and provider_value in cred_providers:
162
+ # New format: provider references credential provider
163
+ # Resolve factory key from catalog
164
+ factory_key = _resolve_factory_key(alias, provider_value)
165
+ elif is_new_format and not provider_value:
166
+ # No provider field — resolve factory key from catalog by alias
167
+ factory_key = _resolve_factory_key(alias, "")
168
+ else:
169
+ # Old format or direct factory key
170
+ factory_key = provider_value
171
+
172
+ cfg = EngineConfig(
173
+ provider=factory_key,
174
+ exec_mode=spec.get("exec_mode", "remote"),
175
+ settings=spec.get("settings", {}),
176
+ filters=spec.get("filters", []),
177
+ endpoint=spec.get("endpoint"),
178
+ preload=bool(spec.get("preload", False)),
179
+ keepalive=int(spec.get("keepalive", 0)),
180
+ module=spec.get("module"),
181
+ provider_class=spec.get("class"),
182
+ )
183
+
184
+ # Merge provider credentials into engine settings
185
+ # Vendor credentials serve as defaults; non-empty engine settings override.
186
+ # Empty engine settings do NOT override valid vendor credentials.
187
+ if provider_value in cred_providers:
188
+ merged = dict(cred_providers[provider_value].settings)
189
+ for k, v in cfg.settings.items():
190
+ if k in merged and merged[k] and not v and v != 0 and v is not False:
191
+ continue # keep vendor value when engine value is empty
192
+ merged[k] = v
193
+ cfg.settings = merged
194
+
195
+ engines[alias] = cfg
196
+
197
+ # Parse optional server section
198
+ server_cfg = ServerConfig()
199
+ if "server" in raw:
200
+ srv = raw["server"]
201
+ server_cfg.host = srv.get("host", server_cfg.host)
202
+ server_cfg.port = srv.get("port", server_cfg.port)
203
+ auth = srv.get("auth", {})
204
+ if auth:
205
+ server_cfg.auth_enabled = bool(auth.get("enabled", False))
206
+ server_cfg.api_keys = list(auth.get("api_keys", []))
207
+
208
+ return OpenSpeechConfig(
209
+ engines=engines,
210
+ providers=cred_providers,
211
+ server=server_cfg,
212
+ )
File without changes
@@ -0,0 +1,75 @@
1
+ """Provider abstract base classes."""
2
+ from __future__ import annotations
3
+
4
+ from abc import ABC, abstractmethod
5
+ from collections.abc import AsyncIterator
6
+ from typing import Any
7
+
8
+ from openspeech.core.enums import Capability, ExecMode, ProviderType
9
+ from openspeech.core.models import AudioChunk, AudioData, STTOptions, TTSOptions, Transcription
10
+ from openspeech.core.settings import BaseSettings
11
+
12
+
13
+ class SpeechProvider(ABC):
14
+ """Base class for all speech providers."""
15
+
16
+ name: str
17
+ provider_type: ProviderType
18
+ execution_mode: ExecMode
19
+ settings: BaseSettings
20
+ settings_cls: type[BaseSettings] = BaseSettings # Override in subclass
21
+ capabilities: set[Capability]
22
+ field_options: dict[str, list] = {} # Dropdown choices per setting key (override in subclass)
23
+
24
+ @abstractmethod
25
+ async def start(self) -> None: ...
26
+
27
+ @abstractmethod
28
+ async def stop(self) -> None: ...
29
+
30
+ @abstractmethod
31
+ async def health_check(self) -> bool: ...
32
+
33
+ def set_http_client(self, client: Any) -> None:
34
+ """Inject a shared HTTP client before ``start()`` is called.
35
+
36
+ Override in providers that use ``httpx.AsyncClient`` so they can
37
+ reuse a single client instance (and its SSL/transport resources)
38
+ instead of each creating their own. Providers that receive a
39
+ shared client must **not** close it in ``stop()``.
40
+
41
+ The default implementation is a no-op — providers that don't use
42
+ HTTP (WebSocket-only, local inference, etc.) simply ignore it.
43
+ """
44
+
45
+
46
+ class STTProvider(SpeechProvider):
47
+ """Base class for speech-to-text providers."""
48
+
49
+ @abstractmethod
50
+ async def transcribe(
51
+ self, audio: AudioData, opts: STTOptions | None = None
52
+ ) -> Transcription: ...
53
+
54
+ @abstractmethod
55
+ def transcribe_stream(
56
+ self, stream: AsyncIterator[bytes]
57
+ ) -> AsyncIterator[Any]: ...
58
+
59
+
60
+ class TTSProvider(SpeechProvider):
61
+ """Base class for text-to-speech providers."""
62
+
63
+ @abstractmethod
64
+ async def synthesize(
65
+ self, text: str, opts: TTSOptions | None = None
66
+ ) -> AudioData: ...
67
+
68
+ @abstractmethod
69
+ def synthesize_stream(
70
+ self, text: str, opts: TTSOptions | None = None
71
+ ) -> AsyncIterator[AudioChunk]: ...
72
+
73
+ async def list_voices(self) -> list[dict]:
74
+ """Return available voices. Override in subclass to provide voice list."""
75
+ return []
@@ -0,0 +1,39 @@
1
+ """Core enumerations for OpenSpeech."""
2
+ from enum import Enum
3
+
4
+
5
+ class ProviderType(str, Enum):
6
+ STT = "stt"
7
+ TTS = "tts"
8
+ BOTH = "both"
9
+
10
+
11
+ class ExecMode(str, Enum):
12
+ # True in-process model execution (reserved for ultra-low-latency local models).
13
+ IN_PROCESS = "in_process"
14
+ # Local service engine (managed separately; provider talks over HTTP/HTTPS).
15
+ LOCAL = "local"
16
+ # Provider worker subprocess + IPC.
17
+ SUBPROCESS = "subprocess"
18
+ # Cloud/remote service over network API.
19
+ REMOTE = "remote"
20
+
21
+
22
+ class AudioFormat(str, Enum):
23
+ PCM_16K = "pcm_16k"
24
+ PCM_44K = "pcm_44k"
25
+ WAV = "wav"
26
+ AIFF = "aiff"
27
+ MP3 = "mp3"
28
+ OGG = "ogg"
29
+ FLAC = "flac"
30
+ OPUS = "opus"
31
+
32
+
33
+ class Capability(str, Enum):
34
+ STREAMING = "streaming"
35
+ BATCH = "batch"
36
+ MULTILINGUAL = "multilingual"
37
+ VOICE_CLONE = "voice_clone"
38
+ EMOTION = "emotion"
39
+ WORD_TIMESTAMPS = "word_timestamps"
@@ -0,0 +1,61 @@
1
+ """Core data models — plain dataclasses for data transfer."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+ from openspeech.core.enums import AudioFormat
7
+
8
+
9
+ @dataclass
10
+ class AudioData:
11
+ data: bytes
12
+ sample_rate: int
13
+ channels: int
14
+ format: AudioFormat
15
+ duration_ms: int | None = None
16
+
17
+
18
+ @dataclass
19
+ class Word:
20
+ text: str
21
+ start_ms: int
22
+ end_ms: int
23
+ confidence: float | None = None
24
+
25
+
26
+ @dataclass
27
+ class Transcription:
28
+ text: str
29
+ language: str | None = None
30
+ confidence: float | None = None
31
+ words: list[Word] | None = None
32
+ duration_ms: int | None = None
33
+ is_partial: bool = False # True for intermediate results from streaming STT
34
+
35
+
36
+ @dataclass
37
+ class AudioChunk:
38
+ data: bytes
39
+ sequence: int
40
+ is_final: bool = False
41
+
42
+
43
+ @dataclass
44
+ class STTOptions:
45
+ language: str | None = None
46
+ prompt: str | None = None
47
+ temperature: float | None = None
48
+ model: str | None = None
49
+ device: str | None = None
50
+ beam_size: int | None = None
51
+ compute_type: str | None = None
52
+ fp16: bool | None = None
53
+
54
+
55
+ @dataclass
56
+ class TTSOptions:
57
+ voice: str | None = None
58
+ speed: float = 1.0
59
+ output_format: AudioFormat = AudioFormat.PCM_16K
60
+ model: str | None = None
61
+ stream_transport: str | None = None
@@ -0,0 +1,37 @@
1
+ """Provider registry — lookup by name, type, or capability."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any
5
+
6
+ from openspeech.core.enums import Capability, ProviderType
7
+ from openspeech.exceptions import ProviderNotFoundError
8
+
9
+
10
+ class ProviderRegistry:
11
+ """Registry for provider classes. Lookup by name, type, or capability."""
12
+
13
+ def __init__(self) -> None:
14
+ self._providers: dict[str, Any] = {}
15
+
16
+ def register(self, name: str, provider_cls: Any) -> None:
17
+ self._providers[name] = provider_cls
18
+
19
+ def get(self, name: str) -> Any:
20
+ if name not in self._providers:
21
+ raise ProviderNotFoundError(name)
22
+ return self._providers[name]
23
+
24
+ def find_by_type(self, provider_type: ProviderType) -> list[tuple[str, Any]]:
25
+ return [
26
+ (name, cls) for name, cls in self._providers.items()
27
+ if getattr(cls, "provider_type", None) == provider_type
28
+ ]
29
+
30
+ def find_by_capability(self, capability: Capability) -> list[tuple[str, Any]]:
31
+ return [
32
+ (name, cls) for name, cls in self._providers.items()
33
+ if capability in getattr(cls, "capabilities", set())
34
+ ]
35
+
36
+ def list_all(self) -> list[tuple[str, Any]]:
37
+ return list(self._providers.items())
@@ -0,0 +1,8 @@
1
+ """Base settings for provider configuration."""
2
+ from dataclasses import dataclass
3
+
4
+
5
+ @dataclass
6
+ class BaseSettings:
7
+ """Base class for provider-specific settings. Subclass as a dataclass."""
8
+ pass