openspeechapi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openspeech/__init__.py +75 -0
- openspeech/__main__.py +5 -0
- openspeech/cli.py +413 -0
- openspeech/client/__init__.py +4 -0
- openspeech/client/client.py +145 -0
- openspeech/config.py +212 -0
- openspeech/core/__init__.py +0 -0
- openspeech/core/base.py +75 -0
- openspeech/core/enums.py +39 -0
- openspeech/core/models.py +61 -0
- openspeech/core/registry.py +37 -0
- openspeech/core/settings.py +8 -0
- openspeech/demo.py +675 -0
- openspeech/dispatch/__init__.py +0 -0
- openspeech/dispatch/context.py +34 -0
- openspeech/dispatch/dispatcher.py +661 -0
- openspeech/dispatch/executors/__init__.py +0 -0
- openspeech/dispatch/executors/base.py +34 -0
- openspeech/dispatch/executors/in_process.py +66 -0
- openspeech/dispatch/executors/remote.py +64 -0
- openspeech/dispatch/executors/subprocess_exec.py +446 -0
- openspeech/dispatch/fanout.py +95 -0
- openspeech/dispatch/filters.py +73 -0
- openspeech/dispatch/lifecycle.py +178 -0
- openspeech/dispatch/watcher.py +82 -0
- openspeech/engine_catalog.py +236 -0
- openspeech/engine_registry.yaml +347 -0
- openspeech/exceptions.py +51 -0
- openspeech/factory.py +325 -0
- openspeech/local_engines/__init__.py +12 -0
- openspeech/local_engines/aim_resolver.py +91 -0
- openspeech/local_engines/backends/__init__.py +1 -0
- openspeech/local_engines/backends/docker_backend.py +490 -0
- openspeech/local_engines/backends/native_backend.py +902 -0
- openspeech/local_engines/base.py +30 -0
- openspeech/local_engines/engines/__init__.py +1 -0
- openspeech/local_engines/engines/faster_whisper.py +36 -0
- openspeech/local_engines/engines/fish_speech.py +33 -0
- openspeech/local_engines/engines/sherpa_onnx.py +56 -0
- openspeech/local_engines/engines/whisper.py +41 -0
- openspeech/local_engines/engines/whisperlivekit.py +60 -0
- openspeech/local_engines/manager.py +208 -0
- openspeech/local_engines/models.py +50 -0
- openspeech/local_engines/progress.py +69 -0
- openspeech/local_engines/registry.py +19 -0
- openspeech/local_engines/task_store.py +52 -0
- openspeech/local_engines/tasks.py +71 -0
- openspeech/logging_config.py +607 -0
- openspeech/observe/__init__.py +0 -0
- openspeech/observe/base.py +79 -0
- openspeech/observe/debug.py +44 -0
- openspeech/observe/latency.py +19 -0
- openspeech/observe/metrics.py +47 -0
- openspeech/observe/tracing.py +44 -0
- openspeech/observe/usage.py +27 -0
- openspeech/providers/__init__.py +0 -0
- openspeech/providers/_template.py +101 -0
- openspeech/providers/stt/__init__.py +0 -0
- openspeech/providers/stt/alibaba.py +86 -0
- openspeech/providers/stt/assemblyai.py +135 -0
- openspeech/providers/stt/azure_speech.py +99 -0
- openspeech/providers/stt/baidu.py +135 -0
- openspeech/providers/stt/deepgram.py +311 -0
- openspeech/providers/stt/elevenlabs.py +385 -0
- openspeech/providers/stt/faster_whisper.py +211 -0
- openspeech/providers/stt/google_cloud.py +106 -0
- openspeech/providers/stt/iflytek.py +427 -0
- openspeech/providers/stt/macos_speech.py +226 -0
- openspeech/providers/stt/openai.py +84 -0
- openspeech/providers/stt/sherpa_onnx.py +353 -0
- openspeech/providers/stt/tencent.py +212 -0
- openspeech/providers/stt/volcengine.py +107 -0
- openspeech/providers/stt/whisper.py +153 -0
- openspeech/providers/stt/whisperlivekit.py +530 -0
- openspeech/providers/stt/windows_speech.py +249 -0
- openspeech/providers/tts/__init__.py +0 -0
- openspeech/providers/tts/alibaba.py +95 -0
- openspeech/providers/tts/azure_speech.py +123 -0
- openspeech/providers/tts/baidu.py +143 -0
- openspeech/providers/tts/coqui.py +64 -0
- openspeech/providers/tts/cosyvoice.py +90 -0
- openspeech/providers/tts/deepgram.py +174 -0
- openspeech/providers/tts/elevenlabs.py +311 -0
- openspeech/providers/tts/fish_speech.py +158 -0
- openspeech/providers/tts/google_cloud.py +107 -0
- openspeech/providers/tts/iflytek.py +209 -0
- openspeech/providers/tts/macos_say.py +251 -0
- openspeech/providers/tts/minimax.py +122 -0
- openspeech/providers/tts/openai.py +104 -0
- openspeech/providers/tts/piper.py +104 -0
- openspeech/providers/tts/tencent.py +189 -0
- openspeech/providers/tts/volcengine.py +117 -0
- openspeech/providers/tts/windows_sapi.py +234 -0
- openspeech/server/__init__.py +1 -0
- openspeech/server/app.py +72 -0
- openspeech/server/auth.py +42 -0
- openspeech/server/middleware.py +75 -0
- openspeech/server/routes/__init__.py +1 -0
- openspeech/server/routes/management.py +848 -0
- openspeech/server/routes/stt.py +121 -0
- openspeech/server/routes/tts.py +159 -0
- openspeech/server/routes/webui.py +29 -0
- openspeech/server/webui/app.js +2649 -0
- openspeech/server/webui/index.html +216 -0
- openspeech/server/webui/styles.css +617 -0
- openspeech/server/ws/__init__.py +1 -0
- openspeech/server/ws/stt_stream.py +263 -0
- openspeech/server/ws/tts_stream.py +207 -0
- openspeech/telemetry/__init__.py +21 -0
- openspeech/telemetry/perf.py +307 -0
- openspeech/utils/__init__.py +5 -0
- openspeech/utils/audio_converter.py +406 -0
- openspeech/utils/audio_playback.py +156 -0
- openspeech/vendor_registry.yaml +74 -0
- openspeechapi-0.1.0.dist-info/METADATA +101 -0
- openspeechapi-0.1.0.dist-info/RECORD +118 -0
- openspeechapi-0.1.0.dist-info/WHEEL +4 -0
- openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""DispatchObserver ABC and ObserverManager with error isolation."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import Any
|
|
5
|
+
from openspeech.logging_config import logger
|
|
6
|
+
from openspeech.core.enums import ExecMode
|
|
7
|
+
from openspeech.dispatch.context import InvokeContext
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DispatchObserver(ABC):
|
|
11
|
+
"""Base class for dispatch observers. All methods are optional no-ops."""
|
|
12
|
+
|
|
13
|
+
async def on_invoke_start(self, ctx: InvokeContext) -> None: pass
|
|
14
|
+
async def on_invoke_end(self, ctx: InvokeContext, result: Any) -> None: pass
|
|
15
|
+
async def on_invoke_error(self, ctx: InvokeContext, error: Exception) -> None: pass
|
|
16
|
+
async def on_stream_chunk(self, ctx: InvokeContext, chunk: Any) -> None: pass
|
|
17
|
+
async def on_provider_start(self, provider: str, exec_mode: ExecMode) -> None: pass
|
|
18
|
+
async def on_provider_stop(self, provider: str) -> None: pass
|
|
19
|
+
async def on_health_change(self, provider: str, healthy: bool) -> None: pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ObserverManager:
|
|
23
|
+
"""Manages observers with error isolation and auto-deregistration."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, max_consecutive_errors: int = 10) -> None:
|
|
26
|
+
self._observers: list[DispatchObserver] = []
|
|
27
|
+
self._error_counts: dict[int, int] = {}
|
|
28
|
+
self._max_errors = max_consecutive_errors
|
|
29
|
+
|
|
30
|
+
def add(self, observer: DispatchObserver) -> None:
|
|
31
|
+
self._observers.append(observer)
|
|
32
|
+
self._error_counts[id(observer)] = 0
|
|
33
|
+
|
|
34
|
+
def remove(self, observer: DispatchObserver) -> None:
|
|
35
|
+
self._observers.remove(observer)
|
|
36
|
+
self._error_counts.pop(id(observer), None)
|
|
37
|
+
|
|
38
|
+
def list(self) -> list[DispatchObserver]:
|
|
39
|
+
return list(self._observers)
|
|
40
|
+
|
|
41
|
+
async def _notify(self, method: str, *args: Any, **kwargs: Any) -> None:
|
|
42
|
+
to_remove = []
|
|
43
|
+
for obs in self._observers:
|
|
44
|
+
try:
|
|
45
|
+
fn = getattr(obs, method)
|
|
46
|
+
await fn(*args, **kwargs)
|
|
47
|
+
self._error_counts[id(obs)] = 0
|
|
48
|
+
except Exception as e:
|
|
49
|
+
obs_id = id(obs)
|
|
50
|
+
self._error_counts[obs_id] = self._error_counts.get(obs_id, 0) + 1
|
|
51
|
+
logger.warning(f"Observer {type(obs).__name__} raised {type(e).__name__}: {e}")
|
|
52
|
+
if self._error_counts[obs_id] >= self._max_errors:
|
|
53
|
+
to_remove.append(obs)
|
|
54
|
+
logger.warning(
|
|
55
|
+
f"Auto-deregistering {type(obs).__name__} after {self._max_errors} consecutive errors"
|
|
56
|
+
)
|
|
57
|
+
for obs in to_remove:
|
|
58
|
+
self.remove(obs)
|
|
59
|
+
|
|
60
|
+
async def notify_invoke_start(self, ctx: InvokeContext) -> None:
|
|
61
|
+
await self._notify("on_invoke_start", ctx)
|
|
62
|
+
|
|
63
|
+
async def notify_invoke_end(self, ctx: InvokeContext, result: Any) -> None:
|
|
64
|
+
await self._notify("on_invoke_end", ctx, result)
|
|
65
|
+
|
|
66
|
+
async def notify_invoke_error(self, ctx: InvokeContext, error: Exception) -> None:
|
|
67
|
+
await self._notify("on_invoke_error", ctx, error)
|
|
68
|
+
|
|
69
|
+
async def notify_stream_chunk(self, ctx: InvokeContext, chunk: Any) -> None:
|
|
70
|
+
await self._notify("on_stream_chunk", ctx, chunk)
|
|
71
|
+
|
|
72
|
+
async def notify_provider_start(self, provider: str, exec_mode: ExecMode) -> None:
|
|
73
|
+
await self._notify("on_provider_start", provider, exec_mode)
|
|
74
|
+
|
|
75
|
+
async def notify_provider_stop(self, provider: str) -> None:
|
|
76
|
+
await self._notify("on_provider_stop", provider)
|
|
77
|
+
|
|
78
|
+
async def notify_health_change(self, provider: str, healthy: bool) -> None:
|
|
79
|
+
await self._notify("on_health_change", provider, healthy)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""DebugLogObserver — emits loguru debug output for every lifecycle event."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any
|
|
4
|
+
from openspeech.logging_config import logger
|
|
5
|
+
from openspeech.core.enums import ExecMode
|
|
6
|
+
from openspeech.dispatch.context import InvokeContext
|
|
7
|
+
from openspeech.observe.base import DispatchObserver
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DebugLogObserver(DispatchObserver):
|
|
11
|
+
"""Logs all dispatch lifecycle events at DEBUG level via loguru."""
|
|
12
|
+
|
|
13
|
+
async def on_invoke_start(self, ctx: InvokeContext) -> None:
|
|
14
|
+
logger.debug(
|
|
15
|
+
"[openspeech] invoke_start provider={} method={} request_id={}",
|
|
16
|
+
ctx.provider_name, ctx.method, ctx.request_id,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
async def on_invoke_end(self, ctx: InvokeContext, result: Any) -> None:
|
|
20
|
+
logger.debug(
|
|
21
|
+
"[openspeech] invoke_end provider={} method={} request_id={} elapsed_ms={:.2f}",
|
|
22
|
+
ctx.provider_name, ctx.method, ctx.request_id, ctx.elapsed_ms,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
async def on_invoke_error(self, ctx: InvokeContext, error: Exception) -> None:
|
|
26
|
+
logger.debug(
|
|
27
|
+
"[openspeech] invoke_error provider={} method={} request_id={} error={}",
|
|
28
|
+
ctx.provider_name, ctx.method, ctx.request_id, repr(error),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
async def on_stream_chunk(self, ctx: InvokeContext, chunk: Any) -> None:
|
|
32
|
+
logger.debug(
|
|
33
|
+
"[openspeech] stream_chunk provider={} method={} request_id={}",
|
|
34
|
+
ctx.provider_name, ctx.method, ctx.request_id,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
async def on_provider_start(self, provider: str, exec_mode: ExecMode) -> None:
|
|
38
|
+
logger.debug("[openspeech] provider_start provider={} exec_mode={}", provider, exec_mode)
|
|
39
|
+
|
|
40
|
+
async def on_provider_stop(self, provider: str) -> None:
|
|
41
|
+
logger.debug("[openspeech] provider_stop provider={}", provider)
|
|
42
|
+
|
|
43
|
+
async def on_health_change(self, provider: str, healthy: bool) -> None:
|
|
44
|
+
logger.debug("[openspeech] health_change provider={} healthy={}", provider, healthy)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""LatencyObserver — records total_ms and ttfb_ms latency samples."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any
|
|
4
|
+
from openspeech.dispatch.context import InvokeContext
|
|
5
|
+
from openspeech.observe.base import DispatchObserver
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class LatencyObserver(DispatchObserver):
|
|
9
|
+
"""Accumulates per-invocation latency samples (total duration and TTFB)."""
|
|
10
|
+
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
self.total_ms: list[float] = []
|
|
13
|
+
self.ttfb_ms: list[float] = []
|
|
14
|
+
|
|
15
|
+
async def on_invoke_end(self, ctx: InvokeContext, result: Any) -> None:
|
|
16
|
+
self.total_ms.append(ctx.elapsed_ms)
|
|
17
|
+
if ctx.ttfb_ns is not None:
|
|
18
|
+
ttfb = (ctx.ttfb_ns - ctx.start_time_ns) / 1_000_000
|
|
19
|
+
self.ttfb_ms.append(ttfb)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""MetricsObserver — tracks TTFB, duration, and error counts per provider+method."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any
|
|
5
|
+
from openspeech.dispatch.context import InvokeContext
|
|
6
|
+
from openspeech.observe.base import DispatchObserver
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class _ProviderMethodMetrics:
|
|
11
|
+
total_calls: int = 0
|
|
12
|
+
error_count: int = 0
|
|
13
|
+
durations_ms: list[float] = field(default_factory=list)
|
|
14
|
+
ttfb_ms: list[float] = field(default_factory=list)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MetricsObserver(DispatchObserver):
|
|
18
|
+
"""Collects invocation metrics (call counts, durations, TTFB, errors) keyed by provider+method."""
|
|
19
|
+
|
|
20
|
+
def __init__(self) -> None:
|
|
21
|
+
self._data: dict[str, _ProviderMethodMetrics] = {}
|
|
22
|
+
|
|
23
|
+
def _key(self, ctx: InvokeContext) -> str:
|
|
24
|
+
return f"{ctx.provider_name}.{ctx.method}"
|
|
25
|
+
|
|
26
|
+
def _get(self, key: str) -> _ProviderMethodMetrics:
|
|
27
|
+
if key not in self._data:
|
|
28
|
+
self._data[key] = _ProviderMethodMetrics()
|
|
29
|
+
return self._data[key]
|
|
30
|
+
|
|
31
|
+
async def on_invoke_end(self, ctx: InvokeContext, result: Any) -> None:
|
|
32
|
+
key = self._key(ctx)
|
|
33
|
+
m = self._get(key)
|
|
34
|
+
m.total_calls += 1
|
|
35
|
+
m.durations_ms.append(ctx.elapsed_ms)
|
|
36
|
+
if ctx.ttfb_ns is not None:
|
|
37
|
+
ttfb = (ctx.ttfb_ns - ctx.start_time_ns) / 1_000_000
|
|
38
|
+
m.ttfb_ms.append(ttfb)
|
|
39
|
+
|
|
40
|
+
async def on_invoke_error(self, ctx: InvokeContext, error: Exception) -> None:
|
|
41
|
+
key = self._key(ctx)
|
|
42
|
+
m = self._get(key)
|
|
43
|
+
m.error_count += 1
|
|
44
|
+
|
|
45
|
+
def get(self, provider: str, method: str) -> _ProviderMethodMetrics | None:
|
|
46
|
+
"""Return accumulated metrics for the given provider+method, or None if not seen."""
|
|
47
|
+
return self._data.get(f"{provider}.{method}")
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""TracingObserver — OpenTelemetry tracing stub, graceful if not installed."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any
|
|
4
|
+
from openspeech.dispatch.context import InvokeContext
|
|
5
|
+
from openspeech.observe.base import DispatchObserver
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TracingObserver(DispatchObserver):
|
|
9
|
+
"""Wraps each invocation in an OpenTelemetry span. Falls back to a no-op when
|
|
10
|
+
the ``opentelemetry`` package is not installed."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, service_name: str = "openspeech") -> None:
|
|
13
|
+
try:
|
|
14
|
+
from opentelemetry import trace
|
|
15
|
+
self._tracer = trace.get_tracer(service_name)
|
|
16
|
+
except ImportError:
|
|
17
|
+
self._tracer = None
|
|
18
|
+
self._spans: dict[str, Any] = {}
|
|
19
|
+
|
|
20
|
+
async def on_invoke_start(self, ctx: InvokeContext) -> None:
|
|
21
|
+
if self._tracer is None:
|
|
22
|
+
return
|
|
23
|
+
span = self._tracer.start_span(
|
|
24
|
+
f"{ctx.provider_name}.{ctx.method}",
|
|
25
|
+
attributes={
|
|
26
|
+
"provider": ctx.provider_name,
|
|
27
|
+
"method": ctx.method,
|
|
28
|
+
"exec_mode": ctx.exec_mode.value,
|
|
29
|
+
"request_id": ctx.request_id,
|
|
30
|
+
},
|
|
31
|
+
)
|
|
32
|
+
self._spans[ctx.request_id] = span
|
|
33
|
+
|
|
34
|
+
async def on_invoke_end(self, ctx: InvokeContext, result: Any) -> None:
|
|
35
|
+
span = self._spans.pop(ctx.request_id, None)
|
|
36
|
+
if span:
|
|
37
|
+
span.end()
|
|
38
|
+
|
|
39
|
+
async def on_invoke_error(self, ctx: InvokeContext, error: Exception) -> None:
|
|
40
|
+
span = self._spans.pop(ctx.request_id, None)
|
|
41
|
+
if span:
|
|
42
|
+
from opentelemetry.trace import StatusCode
|
|
43
|
+
span.set_status(StatusCode.ERROR, str(error))
|
|
44
|
+
span.end()
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""UsageObserver — tracks call counts, audio duration, and character counts."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any
|
|
4
|
+
from openspeech.dispatch.context import InvokeContext
|
|
5
|
+
from openspeech.observe.base import DispatchObserver
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UsageObserver(DispatchObserver):
|
|
9
|
+
"""Accumulates usage statistics: invocation counts, audio duration, and text characters."""
|
|
10
|
+
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
self.call_count: int = 0
|
|
13
|
+
self.total_audio_duration_ms: int = 0
|
|
14
|
+
self.total_characters: int = 0
|
|
15
|
+
|
|
16
|
+
async def on_invoke_end(self, ctx: InvokeContext, result: Any) -> None:
|
|
17
|
+
self.call_count += 1
|
|
18
|
+
|
|
19
|
+
# Accumulate audio duration from AudioData results (STT input or TTS output)
|
|
20
|
+
audio_duration = getattr(result, "duration_ms", None)
|
|
21
|
+
if audio_duration is not None:
|
|
22
|
+
self.total_audio_duration_ms += audio_duration
|
|
23
|
+
|
|
24
|
+
# Accumulate text characters from ctx.metadata["text"] (set by caller for TTS)
|
|
25
|
+
text = ctx.metadata.get("text")
|
|
26
|
+
if isinstance(text, str):
|
|
27
|
+
self.total_characters += len(text)
|
|
File without changes
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provider adapter template — copy-paste starting point for new adapters.
|
|
3
|
+
|
|
4
|
+
Steps to create a new adapter:
|
|
5
|
+
1. Copy this file to providers/stt/<name>.py or providers/tts/<name>.py
|
|
6
|
+
2. Replace MyProvider* placeholders with the real names
|
|
7
|
+
3. Fill in the correct name, provider_type, execution_mode, capabilities
|
|
8
|
+
4. Implement start() to import the SDK and build the client
|
|
9
|
+
5. Implement transcribe() / synthesize() (and streaming variants if supported)
|
|
10
|
+
6. Add the provider to openspeech/core/registry.py if auto-registration is desired
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from collections.abc import AsyncIterator
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from openspeech.core.base import STTProvider # swap for TTSProvider as needed
|
|
19
|
+
from openspeech.core.enums import Capability, ExecMode, ProviderType
|
|
20
|
+
from openspeech.core.models import AudioData, STTOptions, Transcription
|
|
21
|
+
from openspeech.core.settings import BaseSettings
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Settings dataclass — one field per provider-specific config value
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
@dataclass
|
|
28
|
+
class MyProviderSettings(BaseSettings):
|
|
29
|
+
api_key: str = ""
|
|
30
|
+
model: str = "default-model"
|
|
31
|
+
# add more fields as needed
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# Provider class
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
class MyProviderSTT(STTProvider):
|
|
38
|
+
# --- class-level metadata (required by SpeechProvider ABC) ---
|
|
39
|
+
name = "my-provider"
|
|
40
|
+
provider_type = ProviderType.STT # or ProviderType.TTS
|
|
41
|
+
execution_mode = ExecMode.IN_PROCESS # or SUBPROCESS / REMOTE
|
|
42
|
+
settings_cls = MyProviderSettings
|
|
43
|
+
capabilities = {Capability.BATCH} # choose from enums.Capability
|
|
44
|
+
|
|
45
|
+
def __init__(self, settings: MyProviderSettings | None = None) -> None:
|
|
46
|
+
self.settings = settings or MyProviderSettings()
|
|
47
|
+
self._client: Any = None
|
|
48
|
+
|
|
49
|
+
# --- lifecycle ---
|
|
50
|
+
|
|
51
|
+
async def start(self) -> None:
|
|
52
|
+
"""Import SDK and initialise the client."""
|
|
53
|
+
try:
|
|
54
|
+
import my_sdk # noqa: F401 replace with real import
|
|
55
|
+
self._client = object() # replace with real client construction
|
|
56
|
+
except ImportError:
|
|
57
|
+
raise ImportError(
|
|
58
|
+
"Install the SDK: pip install openspeech[my-provider]"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
async def stop(self) -> None:
|
|
62
|
+
"""Release resources."""
|
|
63
|
+
self._client = None
|
|
64
|
+
|
|
65
|
+
async def health_check(self) -> bool:
|
|
66
|
+
return self._client is not None
|
|
67
|
+
|
|
68
|
+
# --- STT interface (remove if implementing TTSProvider) ---
|
|
69
|
+
|
|
70
|
+
async def transcribe(
|
|
71
|
+
self, audio: AudioData, opts: STTOptions | None = None
|
|
72
|
+
) -> Transcription:
|
|
73
|
+
if self._client is None:
|
|
74
|
+
raise RuntimeError("Provider not started — call start() first")
|
|
75
|
+
opts = opts or STTOptions()
|
|
76
|
+
raise NotImplementedError("Implement transcribe() for MyProviderSTT")
|
|
77
|
+
|
|
78
|
+
async def transcribe_stream(
|
|
79
|
+
self, stream: AsyncIterator[bytes]
|
|
80
|
+
) -> AsyncIterator[Any]:
|
|
81
|
+
raise NotImplementedError(
|
|
82
|
+
"Streaming transcription is not supported by MyProviderSTT"
|
|
83
|
+
)
|
|
84
|
+
yield # pragma: no cover
|
|
85
|
+
|
|
86
|
+
# --- TTS interface (uncomment if implementing TTSProvider) ---
|
|
87
|
+
|
|
88
|
+
# async def synthesize(
|
|
89
|
+
# self, text: str, opts: TTSOptions | None = None
|
|
90
|
+
# ) -> AudioData:
|
|
91
|
+
# if self._client is None:
|
|
92
|
+
# raise RuntimeError("Provider not started — call start() first")
|
|
93
|
+
# raise NotImplementedError("Implement synthesize() for MyProviderTTS")
|
|
94
|
+
|
|
95
|
+
# async def synthesize_stream(
|
|
96
|
+
# self, text: str, opts: TTSOptions | None = None
|
|
97
|
+
# ) -> AsyncIterator[AudioChunk]:
|
|
98
|
+
# raise NotImplementedError(
|
|
99
|
+
# "Streaming synthesis is not supported by MyProviderTTS"
|
|
100
|
+
# )
|
|
101
|
+
# yield # pragma: no cover
|
|
File without changes
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Alibaba Cloud (Bailian/DashScope) STT provider adapter — OpenAI-compatible."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from openspeech.logging_config import logger
|
|
5
|
+
import time
|
|
6
|
+
from collections.abc import AsyncIterator
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from openspeech.core.base import STTProvider
|
|
13
|
+
|
|
14
|
+
from openspeech.core.enums import Capability, ExecMode, ProviderType
|
|
15
|
+
from openspeech.core.models import AudioData, STTOptions, Transcription
|
|
16
|
+
from openspeech.core.settings import BaseSettings
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class AlibabaSTTSettings(BaseSettings):
|
|
20
|
+
api_key: str = ""
|
|
21
|
+
model: str = "paraformer-v2"
|
|
22
|
+
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
|
23
|
+
|
|
24
|
+
class AlibabaSTT(STTProvider):
|
|
25
|
+
name = "alibaba-stt"
|
|
26
|
+
provider_type = ProviderType.STT
|
|
27
|
+
execution_mode = ExecMode.IN_PROCESS
|
|
28
|
+
settings_cls = AlibabaSTTSettings
|
|
29
|
+
capabilities = {Capability.BATCH, Capability.MULTILINGUAL}
|
|
30
|
+
field_options = {"model": ["paraformer-v2", "paraformer-realtime-v2", "paraformer-8k-v1"]}
|
|
31
|
+
|
|
32
|
+
def __init__(self, settings: AlibabaSTTSettings | None = None) -> None:
|
|
33
|
+
self.settings = settings or AlibabaSTTSettings()
|
|
34
|
+
self._client: httpx.AsyncClient | None = None
|
|
35
|
+
self._owns_client: bool = True
|
|
36
|
+
|
|
37
|
+
def set_http_client(self, client) -> None:
|
|
38
|
+
self._client = client
|
|
39
|
+
self._owns_client = False
|
|
40
|
+
|
|
41
|
+
async def start(self) -> None:
|
|
42
|
+
if self._client is None:
|
|
43
|
+
self._client = httpx.AsyncClient(timeout=60.0)
|
|
44
|
+
self._owns_client = True
|
|
45
|
+
logger.info("{} provider started", self.name)
|
|
46
|
+
|
|
47
|
+
async def stop(self) -> None:
|
|
48
|
+
if self._client and self._owns_client:
|
|
49
|
+
await self._client.aclose()
|
|
50
|
+
self._client = None
|
|
51
|
+
logger.info("{} provider stopped", self.name)
|
|
52
|
+
|
|
53
|
+
async def health_check(self) -> bool:
|
|
54
|
+
return bool(self.settings.api_key)
|
|
55
|
+
|
|
56
|
+
async def transcribe(
|
|
57
|
+
self, audio: AudioData, opts: STTOptions | None = None
|
|
58
|
+
) -> Transcription:
|
|
59
|
+
if self._client is None:
|
|
60
|
+
raise RuntimeError("Provider not started — call start() first")
|
|
61
|
+
logger.info("{}: request received, audio={} bytes", self.name, len(audio.data))
|
|
62
|
+
_t0 = time.perf_counter()
|
|
63
|
+
|
|
64
|
+
opts = opts or STTOptions()
|
|
65
|
+
model = opts.model or self.settings.model
|
|
66
|
+
url = f"{self.settings.base_url}/audio/transcriptions"
|
|
67
|
+
|
|
68
|
+
headers = {"Authorization": f"Bearer {self.settings.api_key}"}
|
|
69
|
+
files = {"file": ("audio.wav", audio.data, "audio/wav")}
|
|
70
|
+
data = {"model": model}
|
|
71
|
+
|
|
72
|
+
resp = await self._client.post(
|
|
73
|
+
url, headers=headers, files=files, data=data
|
|
74
|
+
)
|
|
75
|
+
resp.raise_for_status()
|
|
76
|
+
result = resp.json()
|
|
77
|
+
|
|
78
|
+
transcription = Transcription(text=result.get("text", ""))
|
|
79
|
+
logger.info("{}: completed in {:.0f}ms, result={} chars", self.name, (time.perf_counter() - _t0) * 1000, len(transcription.text))
|
|
80
|
+
return transcription
|
|
81
|
+
|
|
82
|
+
async def transcribe_stream(
|
|
83
|
+
self, stream: AsyncIterator[bytes]
|
|
84
|
+
) -> AsyncIterator[Any]:
|
|
85
|
+
raise NotImplementedError("Alibaba STT streaming not implemented")
|
|
86
|
+
yield # noqa: unreachable — makes this an async generator
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""AssemblyAI STT provider adapter (batch, httpx)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
from openspeech.logging_config import logger
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from openspeech.core.base import STTProvider
|
|
14
|
+
|
|
15
|
+
from openspeech.core.enums import Capability, ExecMode, ProviderType
|
|
16
|
+
from openspeech.core.models import AudioData, STTOptions, Transcription
|
|
17
|
+
from openspeech.core.settings import BaseSettings
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class AssemblyAISTTSettings(BaseSettings):
|
|
21
|
+
api_key: str = ""
|
|
22
|
+
model: str = "best"
|
|
23
|
+
language: str = "en"
|
|
24
|
+
|
|
25
|
+
class AssemblyAISTT(STTProvider):
|
|
26
|
+
name = "assemblyai-stt"
|
|
27
|
+
provider_type = ProviderType.STT
|
|
28
|
+
execution_mode = ExecMode.REMOTE
|
|
29
|
+
settings_cls = AssemblyAISTTSettings
|
|
30
|
+
capabilities = {Capability.BATCH, Capability.MULTILINGUAL}
|
|
31
|
+
field_options = {"model": ["best", "nano"], "language": ["en", "zh", "ja", "ko", "es", "fr", "de", "pt", "it", "nl", "ru", "ar", "hi", "auto"]}
|
|
32
|
+
|
|
33
|
+
def __init__(self, settings: AssemblyAISTTSettings | None = None) -> None:
|
|
34
|
+
self.settings = settings or AssemblyAISTTSettings()
|
|
35
|
+
self._client: httpx.AsyncClient | None = None
|
|
36
|
+
self._owns_client: bool = True
|
|
37
|
+
|
|
38
|
+
def set_http_client(self, client) -> None:
|
|
39
|
+
self._client = client
|
|
40
|
+
self._owns_client = False
|
|
41
|
+
|
|
42
|
+
async def start(self) -> None:
|
|
43
|
+
if self._client is None:
|
|
44
|
+
self._client = httpx.AsyncClient(timeout=60.0)
|
|
45
|
+
self._owns_client = True
|
|
46
|
+
logger.info("{} provider started", self.name)
|
|
47
|
+
|
|
48
|
+
async def stop(self) -> None:
|
|
49
|
+
if self._client and self._owns_client:
|
|
50
|
+
await self._client.aclose()
|
|
51
|
+
self._client = None
|
|
52
|
+
logger.info("{} provider stopped", self.name)
|
|
53
|
+
|
|
54
|
+
async def health_check(self) -> bool:
|
|
55
|
+
return bool(self.settings.api_key)
|
|
56
|
+
|
|
57
|
+
async def transcribe(
|
|
58
|
+
self, audio: AudioData, opts: STTOptions | None = None
|
|
59
|
+
) -> Transcription:
|
|
60
|
+
if self._client is None:
|
|
61
|
+
raise RuntimeError("Provider not started — call start() first")
|
|
62
|
+
logger.info("{}: request received, audio={} bytes", self.name, len(audio.data))
|
|
63
|
+
_t0 = time.perf_counter()
|
|
64
|
+
opts = opts or STTOptions()
|
|
65
|
+
language = opts.language or self.settings.language
|
|
66
|
+
headers = {"Authorization": self.settings.api_key}
|
|
67
|
+
|
|
68
|
+
# Step 1: Upload audio
|
|
69
|
+
upload_resp = await self._client.post(
|
|
70
|
+
"https://api.assemblyai.com/v2/upload",
|
|
71
|
+
headers=headers,
|
|
72
|
+
content=audio.data,
|
|
73
|
+
)
|
|
74
|
+
if upload_resp.status_code != 200:
|
|
75
|
+
raise RuntimeError(
|
|
76
|
+
f"AssemblyAI upload error {upload_resp.status_code}: {upload_resp.text}"
|
|
77
|
+
)
|
|
78
|
+
upload_url = upload_resp.json()["upload_url"]
|
|
79
|
+
|
|
80
|
+
# Step 2: Create transcript
|
|
81
|
+
create_resp = await self._client.post(
|
|
82
|
+
"https://api.assemblyai.com/v2/transcript",
|
|
83
|
+
headers=headers,
|
|
84
|
+
json={
|
|
85
|
+
"audio_url": upload_url,
|
|
86
|
+
"language_code": language,
|
|
87
|
+
"speech_model": self.settings.model,
|
|
88
|
+
},
|
|
89
|
+
)
|
|
90
|
+
if create_resp.status_code != 200:
|
|
91
|
+
raise RuntimeError(
|
|
92
|
+
f"AssemblyAI transcript creation error "
|
|
93
|
+
f"{create_resp.status_code}: {create_resp.text}"
|
|
94
|
+
)
|
|
95
|
+
transcript_id = create_resp.json()["id"]
|
|
96
|
+
|
|
97
|
+
# Step 3: Poll until completed
|
|
98
|
+
poll_url = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
|
|
99
|
+
max_wait = 60.0
|
|
100
|
+
elapsed = 0.0
|
|
101
|
+
while elapsed < max_wait:
|
|
102
|
+
poll_resp = await self._client.get(poll_url, headers=headers)
|
|
103
|
+
if poll_resp.status_code != 200:
|
|
104
|
+
raise RuntimeError(
|
|
105
|
+
f"AssemblyAI poll error {poll_resp.status_code}: {poll_resp.text}"
|
|
106
|
+
)
|
|
107
|
+
data = poll_resp.json()
|
|
108
|
+
status = data.get("status", "")
|
|
109
|
+
if status == "completed":
|
|
110
|
+
result = Transcription(
|
|
111
|
+
text=data.get("text", ""),
|
|
112
|
+
language=language,
|
|
113
|
+
confidence=data.get("confidence"),
|
|
114
|
+
)
|
|
115
|
+
logger.info("{}: completed in {:.0f}ms, result={} chars", self.name, (time.perf_counter() - _t0) * 1000, len(result.text))
|
|
116
|
+
return result
|
|
117
|
+
if status == "error":
|
|
118
|
+
raise RuntimeError(
|
|
119
|
+
f"AssemblyAI transcription failed: {data.get('error', 'unknown')}"
|
|
120
|
+
)
|
|
121
|
+
await asyncio.sleep(1.0)
|
|
122
|
+
elapsed += 1.0
|
|
123
|
+
|
|
124
|
+
raise RuntimeError(
|
|
125
|
+
f"AssemblyAI transcription timed out after {max_wait}s "
|
|
126
|
+
f"for transcript {transcript_id}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
async def transcribe_stream(
|
|
130
|
+
self, stream: AsyncIterator[bytes]
|
|
131
|
+
) -> AsyncIterator[Any]:
|
|
132
|
+
raise NotImplementedError(
|
|
133
|
+
"AssemblyAI batch provider does not support streaming input"
|
|
134
|
+
)
|
|
135
|
+
yield # pragma: no cover
|