openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,34 @@
1
+ """InvokeContext — request-scoped metadata for invocation lifecycle."""
2
+ from __future__ import annotations
3
+
4
+ import time
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+ from openspeech.core.enums import ExecMode
10
+
11
+
12
+ @dataclass
13
+ class InvokeContext:
14
+ provider_name: str
15
+ method: str
16
+ exec_mode: ExecMode
17
+ request_id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
18
+ start_time_ns: int = field(default_factory=time.time_ns)
19
+ ttfb_ns: int | None = None
20
+ end_time_ns: int | None = None
21
+ metadata: dict[str, Any] = field(default_factory=dict)
22
+ parent_id: str | None = None
23
+
24
+ def record_ttfb(self) -> None:
25
+ if self.ttfb_ns is None:
26
+ self.ttfb_ns = time.time_ns()
27
+
28
+ def record_end(self) -> None:
29
+ self.end_time_ns = time.time_ns()
30
+
31
+ @property
32
+ def elapsed_ms(self) -> float:
33
+ end = self.end_time_ns or time.time_ns()
34
+ return (end - self.start_time_ns) / 1_000_000
@@ -0,0 +1,661 @@
1
+ """ServiceDispatcher — config-driven orchestrator for speech provider execution."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ from openspeech.logging_config import logger
6
+ from collections.abc import AsyncIterator
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import Any
10
+ import warnings
11
+
12
+ from openspeech.config import EngineConfig, load_config
13
+ from openspeech.core.base import SpeechProvider
14
+ from openspeech.core.enums import ExecMode
15
+ from openspeech.core.models import AudioData, STTOptions, TTSOptions, Transcription
16
+ from openspeech.core.registry import ProviderRegistry
17
+ from openspeech.core.settings import BaseSettings
18
+ from openspeech.dispatch.context import InvokeContext
19
+ from openspeech.dispatch.executors.base import Executor
20
+ from openspeech.dispatch.executors.in_process import InProcessExecutor
21
+ from openspeech.dispatch.executors.remote import RemoteExecutor
22
+ from openspeech.dispatch.executors.subprocess_exec import SubprocessExecutor
23
+ from openspeech.dispatch.fanout import FirstCompleted, MergeStrategy, fan_out
24
+ from openspeech.dispatch.lifecycle import ProviderLifecycleManager
25
+ from openspeech.exceptions import ProviderNotFoundError
26
+ from openspeech.logging_config import bind_context
27
+ from openspeech.observe.base import DispatchObserver, ObserverManager
28
+ from openspeech.telemetry.perf import Event, PerfTimer, milestone
29
+
30
+ @dataclass
31
+ class _EngineHandle:
32
+ """Internal handle pairing a provider class with its executor and settings dict."""
33
+ name: str
34
+ provider_cls: type[SpeechProvider]
35
+ executor: Executor
36
+ exec_mode: ExecMode = ExecMode.IN_PROCESS
37
+ settings_dict: dict[str, Any] = field(default_factory=dict)
38
+ preload: bool = False
39
+ keepalive: int = 0
40
+
41
+ # Backward compatibility alias
42
+ _ProviderHandle = _EngineHandle
43
+
44
+ def _exec_mode_desc(mode: str) -> str:
45
+ """Human-readable description of an execution mode."""
46
+ m = str(mode).strip().lower()
47
+ if m == "subprocess":
48
+ return "Worker subprocess + IPC"
49
+ if m == "local":
50
+ return "Local engine service + HTTP(S)"
51
+ if m == "remote":
52
+ return "Cloud/remote API over network"
53
+ if m == "in_process":
54
+ return "In-process model execution"
55
+ return "Unknown"
56
+
57
+
58
+ def _make_executor(mode: ExecMode) -> Executor:
59
+ if mode == ExecMode.IN_PROCESS:
60
+ return InProcessExecutor()
61
+ if mode == ExecMode.SUBPROCESS:
62
+ return SubprocessExecutor()
63
+ if mode in {ExecMode.REMOTE, ExecMode.LOCAL}:
64
+ return RemoteExecutor()
65
+ raise ValueError(f"Unknown exec_mode: {mode!r}")
66
+
67
+ # Providers that genuinely run in-process (no network/subprocess).
68
+ _TRUE_IN_PROCESS_PROVIDERS = {
69
+ "macos-say", "macos-stt",
70
+ "windows-tts", "windows-stt",
71
+ "native-tts", "native-stt",
72
+ }
73
+
74
+ def _resolve_exec_mode(alias: str, provider_name: str, raw_mode: str) -> ExecMode:
75
+ m = str(raw_mode).strip().lower()
76
+ if m == ExecMode.IN_PROCESS.value:
77
+ # True in-process providers: keep IN_PROCESS
78
+ if provider_name in _TRUE_IN_PROCESS_PROVIDERS:
79
+ return ExecMode.IN_PROCESS
80
+ # Backward compatibility: old configs used in_process for both local-service and cloud providers.
81
+ if provider_name == "fish-speech":
82
+ warnings.warn(
83
+ f"Provider '{alias}' uses legacy exec_mode='in_process'; auto-mapped to 'local'. "
84
+ "Please update config to exec_mode='local'.",
85
+ DeprecationWarning,
86
+ stacklevel=2,
87
+ )
88
+ return ExecMode.LOCAL
89
+ warnings.warn(
90
+ f"Provider '{alias}' uses legacy exec_mode='in_process'; auto-mapped to 'remote'. "
91
+ "Please update config to exec_mode='remote' (or keep 'in_process' only for true in-process models).",
92
+ DeprecationWarning,
93
+ stacklevel=2,
94
+ )
95
+ return ExecMode.REMOTE
96
+ return ExecMode(m)
97
+
98
+ class _STTNamespace:
99
+ """Namespaced STT operations on a ServiceDispatcher."""
100
+
101
+ def __init__(self, dispatcher: "ServiceDispatcher") -> None:
102
+ self._dispatcher = dispatcher
103
+
104
+ async def transcribe(
105
+ self,
106
+ provider: str,
107
+ audio: "AudioData",
108
+ opts: "STTOptions | None" = None,
109
+ ) -> "Transcription":
110
+ """Transcribe audio using the named provider."""
111
+ return await self._dispatcher._transcribe(provider, audio, opts)
112
+
113
+ async def transcribe_stream(
114
+ self,
115
+ provider: str,
116
+ stream: "AsyncIterator[bytes]",
117
+ ) -> "AsyncIterator[Transcription]":
118
+ """Stream audio chunks to the named provider and yield transcriptions."""
119
+ async for item in self._dispatcher._transcribe_stream(provider, stream):
120
+ yield item
121
+
122
+ async def fanout(
123
+ self,
124
+ providers: list[str],
125
+ audio: "AudioData",
126
+ opts: "STTOptions | None" = None,
127
+ strategy: "MergeStrategy | None" = None,
128
+ ) -> Any:
129
+ """Dispatch transcription to multiple providers concurrently."""
130
+ return await self._dispatcher._fanout_transcribe(providers, audio, opts, strategy)
131
+
132
+ class _TTSNamespace:
133
+ """Namespaced TTS operations on a ServiceDispatcher."""
134
+
135
+ def __init__(self, dispatcher: "ServiceDispatcher") -> None:
136
+ self._dispatcher = dispatcher
137
+
138
+ async def synthesize(
139
+ self,
140
+ provider: str,
141
+ text: str,
142
+ opts: "TTSOptions | None" = None,
143
+ ) -> "AudioData":
144
+ """Synthesize speech using the named provider."""
145
+ return await self._dispatcher._synthesize(provider, text, opts)
146
+
147
+ async def synthesize_stream(
148
+ self,
149
+ provider: str,
150
+ text: str,
151
+ opts: "TTSOptions | None" = None,
152
+ ) -> "AsyncIterator[Any]":
153
+ """Stream synthesized speech using the named provider."""
154
+ async for chunk in self._dispatcher._synthesize_stream(provider, text, opts):
155
+ yield chunk
156
+
157
+ class ServiceDispatcher:
158
+ """Orchestrates multiple speech providers via config-driven executor routing."""
159
+
160
+ def __init__(self, handles: dict[str, _EngineHandle]) -> None:
161
+ self._handles = handles
162
+ self._observer_mgr = ObserverManager()
163
+ self._lifecycle = ProviderLifecycleManager()
164
+ self.stt = _STTNamespace(self)
165
+ self.tts = _TTSNamespace(self)
166
+
167
+ # ------------------------------------------------------------------
168
+ # Factory
169
+ # ------------------------------------------------------------------
170
+
171
+ @classmethod
172
+ def from_config(cls, path: Path, registry: ProviderRegistry) -> "ServiceDispatcher":
173
+ """Load YAML config and construct a ServiceDispatcher.
174
+
175
+ Raises ProviderNotFoundError if any configured provider name is not in the registry.
176
+ """
177
+ config = load_config(path)
178
+ handles: dict[str, _EngineHandle] = {}
179
+
180
+ for alias, eng_cfg in config.engines.items():
181
+ handles[alias] = cls._build_handle(alias, eng_cfg, registry)
182
+
183
+ return cls(handles)
184
+
185
+ @staticmethod
186
+ def _build_handle(
187
+ alias: str,
188
+ eng_cfg: Any,
189
+ registry: ProviderRegistry,
190
+ ) -> _EngineHandle:
191
+ """Construct an _EngineHandle from an EngineConfig entry."""
192
+ provider_cls = registry.get(eng_cfg.provider) # raises ProviderNotFoundError
193
+ mode = _resolve_exec_mode(alias, eng_cfg.provider, eng_cfg.exec_mode)
194
+ executor = _make_executor(mode)
195
+
196
+ # Store provider info so health_check works before lazy-start
197
+ if hasattr(executor, "set_provider_info"):
198
+ settings_cls = getattr(provider_cls, "settings_cls", None)
199
+ if settings_cls:
200
+ try:
201
+ settings_obj = settings_cls(**eng_cfg.settings)
202
+ except Exception:
203
+ settings_obj = settings_cls()
204
+ executor.set_provider_info(provider_cls, settings_obj)
205
+
206
+ return _EngineHandle(
207
+ name=alias,
208
+ provider_cls=provider_cls,
209
+ executor=executor,
210
+ exec_mode=mode,
211
+ settings_dict=eng_cfg.settings,
212
+ preload=eng_cfg.preload,
213
+ keepalive=eng_cfg.keepalive,
214
+ )
215
+
216
+ # ------------------------------------------------------------------
217
+ # Lifecycle
218
+ # ------------------------------------------------------------------
219
+
220
+ async def start(self) -> None:
221
+ """Register all providers in lifecycle manager and start preloaded ones."""
222
+ # Create a shared httpx.AsyncClient up-front so all cloud providers
223
+ # reuse the same SSL/transport resources (avoids N × 5 s cold-start
224
+ # on Windows where SSL certificate chain loading is expensive).
225
+ import httpx
226
+
227
+ self._shared_http_client = httpx.AsyncClient(timeout=60.0)
228
+ self._lifecycle.set_shared_http_client(self._shared_http_client)
229
+
230
+ for handle in self._handles.values():
231
+ self._lifecycle.register(handle.name, handle, handle.keepalive)
232
+ self._lifecycle.start_idle_checker()
233
+ for handle in self._handles.values():
234
+ if handle.preload:
235
+ await self._lifecycle.ensure_ready(handle.name)
236
+
237
+ async def stop(self) -> None:
238
+ """Stop all configured provider executors via lifecycle manager."""
239
+ await self._lifecycle.stop_all()
240
+ # Close the shared HTTP client *after* all providers have stopped
241
+ # so no in-flight requests are interrupted.
242
+ if getattr(self, "_shared_http_client", None) is not None:
243
+ await self._shared_http_client.aclose()
244
+ self._shared_http_client = None
245
+
246
+ async def reload_config(
247
+ self, config_path: Path, registry: ProviderRegistry
248
+ ) -> dict[str, list[str]]:
249
+ """Hot-reload: re-read config, diff with current state, apply changes.
250
+
251
+ Returns a dict with keys added/removed/updated/unchanged listing provider aliases.
252
+ """
253
+ new_config = load_config(config_path)
254
+ old_names = set(self._handles.keys())
255
+ new_names = set(new_config.engines.keys())
256
+
257
+ added = new_names - old_names
258
+ removed = old_names - new_names
259
+ potentially_changed = old_names & new_names
260
+
261
+ updated: list[str] = []
262
+ unchanged: list[str] = []
263
+
264
+ # Remove deleted providers
265
+ for name in removed:
266
+ await self._lifecycle.stop_provider(name)
267
+ self._lifecycle.unregister(name)
268
+ del self._handles[name]
269
+
270
+ # Check for changed settings in existing providers
271
+ for name in potentially_changed:
272
+ old_handle = self._handles[name]
273
+ new_cfg = new_config.engines[name]
274
+ if (
275
+ old_handle.settings_dict != new_cfg.settings
276
+ or old_handle.exec_mode
277
+ != _resolve_exec_mode(name, new_cfg.provider, new_cfg.exec_mode)
278
+ or old_handle.provider_cls != registry.get(new_cfg.provider)
279
+ ):
280
+ # Settings changed — stop old, rebuild handle
281
+ await self._lifecycle.stop_provider(name)
282
+ self._lifecycle.unregister(name)
283
+ new_handle = self._build_handle(name, new_cfg, registry)
284
+ self._handles[name] = new_handle
285
+ self._lifecycle.register(name, new_handle, new_cfg.keepalive)
286
+ updated.append(name)
287
+ else:
288
+ unchanged.append(name)
289
+
290
+ # Add new engines
291
+ for name in added:
292
+ eng_cfg = new_config.engines[name]
293
+ new_handle = self._build_handle(name, eng_cfg, registry)
294
+ self._handles[name] = new_handle
295
+ self._lifecycle.register(name, new_handle, eng_cfg.keepalive)
296
+ if eng_cfg.preload:
297
+ await self._lifecycle.ensure_ready(name)
298
+
299
+ return {
300
+ "added": sorted(added),
301
+ "removed": sorted(removed),
302
+ "updated": sorted(updated),
303
+ "unchanged": sorted(unchanged),
304
+ }
305
+
306
+ def provider_states(self) -> dict[str, str]:
307
+ """Return lifecycle state for each registered provider."""
308
+ return self._lifecycle.list_states()
309
+
310
+ # ------------------------------------------------------------------
311
+ # Provider lookup
312
+ # ------------------------------------------------------------------
313
+
314
+ def list_engines(self) -> list[str]:
315
+ """Return the list of configured engine aliases."""
316
+ return list(self._handles.keys())
317
+
318
+ # Backward compatibility alias
319
+ def list_providers(self) -> list[str]:
320
+ """Return the list of configured engine aliases (deprecated, use list_engines)."""
321
+ return self.list_engines()
322
+
323
+ # Display-friendly settings keys extracted for UI display_info
324
+ _DISPLAY_INFO_KEYS = (
325
+ "model", "model_size", "model_name", "voice", "voice_id", "voice_type",
326
+ "voice_name", "language", "engine_type", "dev_pid", "base_url", "region",
327
+ "api_url", "cluster",
328
+ )
329
+
330
+ def list_engines_info(self, *, engine_type: str | None = None) -> list[dict]:
331
+ """Return detailed information for all configured engines.
332
+
333
+ Equivalent to the ``GET /v1/engines`` response payload but available
334
+ as a library API — no HTTP server required.
335
+
336
+ Args:
337
+ engine_type: Optional filter — ``"stt"`` or ``"tts"``.
338
+ ``None`` returns all engines.
339
+
340
+ Returns:
341
+ A list of dicts, each containing::
342
+
343
+ {
344
+ "name": str, # config alias
345
+ "display_name": str, # human-friendly name (from catalog)
346
+ "provider": str, # provider engine name
347
+ "type": str, # "stt" | "tts"
348
+ "category": str, # "cloud" | "local" | "native"
349
+ "state": str, # lifecycle state
350
+ "capabilities": list, # e.g. ["batch", "streaming"]
351
+ "exec_mode": str, # "remote" | "local" | "in_process" | ...
352
+ "exec_mode_desc": str, # human-readable exec_mode
353
+ "display_info": dict, # key settings (model, voice, ...)
354
+ }
355
+ """
356
+ from openspeech.engine_catalog import get_catalog_entry
357
+
358
+ states = self.provider_states()
359
+ result: list[dict] = []
360
+
361
+ for name, handle in self._handles.items():
362
+ provider_cls = handle.provider_cls
363
+ provider_type = getattr(provider_cls, "provider_type", None)
364
+ type_str = (
365
+ provider_type.value
366
+ if provider_type and hasattr(provider_type, "value")
367
+ else "unknown"
368
+ )
369
+
370
+ if engine_type and type_str != engine_type:
371
+ continue
372
+
373
+ catalog_entry = get_catalog_entry(name)
374
+ exec_mode_value = (
375
+ handle.exec_mode.value
376
+ if hasattr(handle.exec_mode, "value")
377
+ else str(handle.exec_mode)
378
+ )
379
+
380
+ # Capabilities from provider class
381
+ caps = getattr(provider_cls, "capabilities", set())
382
+ capabilities = sorted(
383
+ c.value if hasattr(c, "value") else str(c) for c in caps
384
+ )
385
+
386
+ # Pick display-friendly settings
387
+ display_info = {}
388
+ for key in self._DISPLAY_INFO_KEYS:
389
+ val = handle.settings_dict.get(key)
390
+ if val:
391
+ display_info[key] = val
392
+
393
+ result.append({
394
+ "name": name,
395
+ "display_name": (
396
+ (catalog_entry.display_name or catalog_entry.name)
397
+ if catalog_entry
398
+ else name
399
+ ),
400
+ "provider": getattr(provider_cls, "name", ""),
401
+ "type": type_str,
402
+ "category": catalog_entry.category if catalog_entry else "unknown",
403
+ "state": states.get(name, "unknown"),
404
+ "capabilities": capabilities,
405
+ "exec_mode": exec_mode_value,
406
+ "exec_mode_desc": _exec_mode_desc(exec_mode_value),
407
+ "display_info": display_info,
408
+ })
409
+
410
+ return result
411
+
412
+ def _get_handle(self, alias: str) -> _ProviderHandle:
413
+ if alias not in self._handles:
414
+ raise ProviderNotFoundError(alias)
415
+ return self._handles[alias]
416
+
417
+ # ------------------------------------------------------------------
418
+ # STT
419
+ # ------------------------------------------------------------------
420
+
421
+ async def _transcribe(
422
+ self,
423
+ provider: str,
424
+ audio: AudioData,
425
+ opts: STTOptions | None = None,
426
+ ) -> Transcription:
427
+ handle = self._get_handle(provider)
428
+ await self._lifecycle.ensure_ready(provider)
429
+ ctx = InvokeContext(
430
+ provider_name=provider,
431
+ method="transcribe",
432
+ exec_mode=handle.exec_mode,
433
+ )
434
+ kwargs: dict[str, Any] = {"audio": audio}
435
+ if opts is not None:
436
+ kwargs["opts"] = opts
437
+ with bind_context(
438
+ provider=provider,
439
+ engine=provider,
440
+ request_id=ctx.request_id,
441
+ ):
442
+ milestone(
443
+ Event.DISPATCH_INVOKE_START,
444
+ level="verbose",
445
+ method="transcribe",
446
+ exec_mode=handle.exec_mode.value,
447
+ )
448
+ await self._observer_mgr.notify_invoke_start(ctx)
449
+ with PerfTimer(
450
+ Event.DISPATCH_TOTAL,
451
+ method="transcribe",
452
+ exec_mode=handle.exec_mode.value,
453
+ ) as t:
454
+ try:
455
+ result = await handle.executor.invoke("transcribe", **kwargs)
456
+ ctx.record_end()
457
+ t.add(bytes=len(getattr(audio, "data", b"") or b""))
458
+ await self._observer_mgr.notify_invoke_end(ctx, result)
459
+ self._lifecycle.touch(provider)
460
+ milestone(
461
+ Event.DISPATCH_INVOKE_END,
462
+ level="verbose",
463
+ method="transcribe",
464
+ elapsed_ms=ctx.elapsed_ms,
465
+ )
466
+ return result
467
+ except Exception as exc:
468
+ ctx.record_end()
469
+ await self._observer_mgr.notify_invoke_error(ctx, exc)
470
+ milestone(
471
+ Event.DISPATCH_INVOKE_ERROR,
472
+ method="transcribe",
473
+ error_type=type(exc).__name__,
474
+ error_message=str(exc),
475
+ )
476
+ raise
477
+
478
+ async def _transcribe_stream(
479
+ self,
480
+ provider: str,
481
+ stream: AsyncIterator[bytes],
482
+ ) -> AsyncIterator[Transcription]:
483
+ """Stream audio chunks to the provider and yield Transcription results."""
484
+ handle = self._get_handle(provider)
485
+ await self._lifecycle.ensure_ready(provider)
486
+ ctx = InvokeContext(
487
+ provider_name=provider,
488
+ method="transcribe_stream",
489
+ exec_mode=handle.exec_mode,
490
+ )
491
+ with bind_context(
492
+ provider=provider, engine=provider, request_id=ctx.request_id
493
+ ):
494
+ milestone(Event.DISPATCH_STREAM_START, method="transcribe_stream")
495
+ with PerfTimer(
496
+ Event.DISPATCH_TOTAL,
497
+ method="transcribe_stream",
498
+ exec_mode=handle.exec_mode.value,
499
+ ) as t:
500
+ count = 0
501
+ try:
502
+ async for item in handle.executor.invoke_stream(
503
+ "transcribe_stream", stream=stream
504
+ ):
505
+ count += 1
506
+ yield item
507
+ finally:
508
+ t.add(stream_items=count)
509
+ milestone(Event.DISPATCH_STREAM_END, method="transcribe_stream", items=count)
510
+ self._lifecycle.touch(provider)
511
+
512
+ # ------------------------------------------------------------------
513
+ # TTS
514
+ # ------------------------------------------------------------------
515
+
516
+ async def _synthesize(
517
+ self,
518
+ provider: str,
519
+ text: str,
520
+ opts: TTSOptions | None = None,
521
+ ) -> AudioData:
522
+ handle = self._get_handle(provider)
523
+ await self._lifecycle.ensure_ready(provider)
524
+ ctx = InvokeContext(
525
+ provider_name=provider,
526
+ method="synthesize",
527
+ exec_mode=handle.exec_mode,
528
+ metadata={"text": text},
529
+ )
530
+ kwargs: dict[str, Any] = {"text": text}
531
+ if opts is not None:
532
+ kwargs["opts"] = opts
533
+ with bind_context(
534
+ provider=provider, engine=provider, request_id=ctx.request_id
535
+ ):
536
+ milestone(
537
+ Event.DISPATCH_INVOKE_START,
538
+ level="verbose",
539
+ method="synthesize",
540
+ exec_mode=handle.exec_mode.value,
541
+ text_len=len(text),
542
+ )
543
+ await self._observer_mgr.notify_invoke_start(ctx)
544
+ with PerfTimer(
545
+ Event.DISPATCH_TOTAL,
546
+ method="synthesize",
547
+ exec_mode=handle.exec_mode.value,
548
+ ) as t:
549
+ try:
550
+ result = await handle.executor.invoke("synthesize", **kwargs)
551
+ ctx.record_end()
552
+ t.add(audio_bytes=len(getattr(result, "data", b"") or b""))
553
+ await self._observer_mgr.notify_invoke_end(ctx, result)
554
+ self._lifecycle.touch(provider)
555
+ milestone(
556
+ Event.DISPATCH_INVOKE_END,
557
+ level="verbose",
558
+ method="synthesize",
559
+ elapsed_ms=ctx.elapsed_ms,
560
+ )
561
+ return result
562
+ except Exception as exc:
563
+ ctx.record_end()
564
+ await self._observer_mgr.notify_invoke_error(ctx, exc)
565
+ milestone(
566
+ Event.DISPATCH_INVOKE_ERROR,
567
+ method="synthesize",
568
+ error_type=type(exc).__name__,
569
+ error_message=str(exc),
570
+ )
571
+ raise
572
+
573
+ async def _synthesize_stream(
574
+ self,
575
+ provider: str,
576
+ text: str,
577
+ opts: TTSOptions | None = None,
578
+ ) -> AsyncIterator[Any]:
579
+ """Stream synthesis — yields AudioChunk objects from the provider."""
580
+ handle = self._get_handle(provider)
581
+ await self._lifecycle.ensure_ready(provider)
582
+ ctx = InvokeContext(
583
+ provider_name=provider,
584
+ method="synthesize_stream",
585
+ exec_mode=handle.exec_mode,
586
+ metadata={"text_len": len(text)},
587
+ )
588
+ kwargs: dict[str, Any] = {"text": text}
589
+ if opts is not None:
590
+ kwargs["opts"] = opts
591
+ with bind_context(
592
+ provider=provider, engine=provider, request_id=ctx.request_id
593
+ ):
594
+ milestone(Event.DISPATCH_STREAM_START, method="synthesize_stream", text_len=len(text))
595
+ with PerfTimer(
596
+ Event.DISPATCH_TOTAL,
597
+ method="synthesize_stream",
598
+ exec_mode=handle.exec_mode.value,
599
+ ) as t:
600
+ count = 0
601
+ try:
602
+ async for chunk in handle.executor.invoke_stream(
603
+ "synthesize_stream", **kwargs
604
+ ):
605
+ count += 1
606
+ yield chunk
607
+ finally:
608
+ t.add(stream_items=count)
609
+ milestone(Event.DISPATCH_STREAM_END, method="synthesize_stream", items=count)
610
+ self._lifecycle.touch(provider)
611
+
612
+ # ------------------------------------------------------------------
613
+ # FanOut
614
+ # ------------------------------------------------------------------
615
+
616
+ async def _fanout_transcribe(
617
+ self,
618
+ providers: list[str],
619
+ audio: AudioData,
620
+ opts: STTOptions | None = None,
621
+ strategy: MergeStrategy | None = None,
622
+ ) -> Any:
623
+ """Dispatch transcription to multiple providers concurrently."""
624
+ if strategy is None:
625
+ strategy = FirstCompleted()
626
+
627
+ tasks = {
628
+ alias: self._transcribe(alias, audio, opts)
629
+ for alias in providers
630
+ }
631
+ return await fan_out(tasks, strategy)
632
+
633
+ # ------------------------------------------------------------------
634
+ # Health
635
+ # ------------------------------------------------------------------
636
+
637
+ async def health(self) -> dict[str, bool]:
638
+ """Return health status for all configured providers."""
639
+ results = await asyncio.gather(
640
+ *[h.executor.health_check() for h in self._handles.values()],
641
+ return_exceptions=True,
642
+ )
643
+ return {
644
+ alias: (result is True)
645
+ for alias, result in zip(self._handles.keys(), results)
646
+ }
647
+
648
+ # ------------------------------------------------------------------
649
+ # Observer management (Chunk 5 will add notification calls)
650
+ # ------------------------------------------------------------------
651
+
652
+ def add_observer(self, observer: DispatchObserver) -> None:
653
+ """Register an observer for future lifecycle notifications."""
654
+ self._observer_mgr.add(observer)
655
+
656
+ def remove_observer(self, observer: DispatchObserver) -> None:
657
+ """Unregister an observer. No-op if the observer is not registered."""
658
+ try:
659
+ self._observer_mgr.remove(observer)
660
+ except ValueError:
661
+ pass
File without changes