openspeechapi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openspeech/__init__.py +75 -0
- openspeech/__main__.py +5 -0
- openspeech/cli.py +413 -0
- openspeech/client/__init__.py +4 -0
- openspeech/client/client.py +145 -0
- openspeech/config.py +212 -0
- openspeech/core/__init__.py +0 -0
- openspeech/core/base.py +75 -0
- openspeech/core/enums.py +39 -0
- openspeech/core/models.py +61 -0
- openspeech/core/registry.py +37 -0
- openspeech/core/settings.py +8 -0
- openspeech/demo.py +675 -0
- openspeech/dispatch/__init__.py +0 -0
- openspeech/dispatch/context.py +34 -0
- openspeech/dispatch/dispatcher.py +661 -0
- openspeech/dispatch/executors/__init__.py +0 -0
- openspeech/dispatch/executors/base.py +34 -0
- openspeech/dispatch/executors/in_process.py +66 -0
- openspeech/dispatch/executors/remote.py +64 -0
- openspeech/dispatch/executors/subprocess_exec.py +446 -0
- openspeech/dispatch/fanout.py +95 -0
- openspeech/dispatch/filters.py +73 -0
- openspeech/dispatch/lifecycle.py +178 -0
- openspeech/dispatch/watcher.py +82 -0
- openspeech/engine_catalog.py +236 -0
- openspeech/engine_registry.yaml +347 -0
- openspeech/exceptions.py +51 -0
- openspeech/factory.py +325 -0
- openspeech/local_engines/__init__.py +12 -0
- openspeech/local_engines/aim_resolver.py +91 -0
- openspeech/local_engines/backends/__init__.py +1 -0
- openspeech/local_engines/backends/docker_backend.py +490 -0
- openspeech/local_engines/backends/native_backend.py +902 -0
- openspeech/local_engines/base.py +30 -0
- openspeech/local_engines/engines/__init__.py +1 -0
- openspeech/local_engines/engines/faster_whisper.py +36 -0
- openspeech/local_engines/engines/fish_speech.py +33 -0
- openspeech/local_engines/engines/sherpa_onnx.py +56 -0
- openspeech/local_engines/engines/whisper.py +41 -0
- openspeech/local_engines/engines/whisperlivekit.py +60 -0
- openspeech/local_engines/manager.py +208 -0
- openspeech/local_engines/models.py +50 -0
- openspeech/local_engines/progress.py +69 -0
- openspeech/local_engines/registry.py +19 -0
- openspeech/local_engines/task_store.py +52 -0
- openspeech/local_engines/tasks.py +71 -0
- openspeech/logging_config.py +607 -0
- openspeech/observe/__init__.py +0 -0
- openspeech/observe/base.py +79 -0
- openspeech/observe/debug.py +44 -0
- openspeech/observe/latency.py +19 -0
- openspeech/observe/metrics.py +47 -0
- openspeech/observe/tracing.py +44 -0
- openspeech/observe/usage.py +27 -0
- openspeech/providers/__init__.py +0 -0
- openspeech/providers/_template.py +101 -0
- openspeech/providers/stt/__init__.py +0 -0
- openspeech/providers/stt/alibaba.py +86 -0
- openspeech/providers/stt/assemblyai.py +135 -0
- openspeech/providers/stt/azure_speech.py +99 -0
- openspeech/providers/stt/baidu.py +135 -0
- openspeech/providers/stt/deepgram.py +311 -0
- openspeech/providers/stt/elevenlabs.py +385 -0
- openspeech/providers/stt/faster_whisper.py +211 -0
- openspeech/providers/stt/google_cloud.py +106 -0
- openspeech/providers/stt/iflytek.py +427 -0
- openspeech/providers/stt/macos_speech.py +226 -0
- openspeech/providers/stt/openai.py +84 -0
- openspeech/providers/stt/sherpa_onnx.py +353 -0
- openspeech/providers/stt/tencent.py +212 -0
- openspeech/providers/stt/volcengine.py +107 -0
- openspeech/providers/stt/whisper.py +153 -0
- openspeech/providers/stt/whisperlivekit.py +530 -0
- openspeech/providers/stt/windows_speech.py +249 -0
- openspeech/providers/tts/__init__.py +0 -0
- openspeech/providers/tts/alibaba.py +95 -0
- openspeech/providers/tts/azure_speech.py +123 -0
- openspeech/providers/tts/baidu.py +143 -0
- openspeech/providers/tts/coqui.py +64 -0
- openspeech/providers/tts/cosyvoice.py +90 -0
- openspeech/providers/tts/deepgram.py +174 -0
- openspeech/providers/tts/elevenlabs.py +311 -0
- openspeech/providers/tts/fish_speech.py +158 -0
- openspeech/providers/tts/google_cloud.py +107 -0
- openspeech/providers/tts/iflytek.py +209 -0
- openspeech/providers/tts/macos_say.py +251 -0
- openspeech/providers/tts/minimax.py +122 -0
- openspeech/providers/tts/openai.py +104 -0
- openspeech/providers/tts/piper.py +104 -0
- openspeech/providers/tts/tencent.py +189 -0
- openspeech/providers/tts/volcengine.py +117 -0
- openspeech/providers/tts/windows_sapi.py +234 -0
- openspeech/server/__init__.py +1 -0
- openspeech/server/app.py +72 -0
- openspeech/server/auth.py +42 -0
- openspeech/server/middleware.py +75 -0
- openspeech/server/routes/__init__.py +1 -0
- openspeech/server/routes/management.py +848 -0
- openspeech/server/routes/stt.py +121 -0
- openspeech/server/routes/tts.py +159 -0
- openspeech/server/routes/webui.py +29 -0
- openspeech/server/webui/app.js +2649 -0
- openspeech/server/webui/index.html +216 -0
- openspeech/server/webui/styles.css +617 -0
- openspeech/server/ws/__init__.py +1 -0
- openspeech/server/ws/stt_stream.py +263 -0
- openspeech/server/ws/tts_stream.py +207 -0
- openspeech/telemetry/__init__.py +21 -0
- openspeech/telemetry/perf.py +307 -0
- openspeech/utils/__init__.py +5 -0
- openspeech/utils/audio_converter.py +406 -0
- openspeech/utils/audio_playback.py +156 -0
- openspeech/vendor_registry.yaml +74 -0
- openspeechapi-0.1.0.dist-info/METADATA +101 -0
- openspeechapi-0.1.0.dist-info/RECORD +118 -0
- openspeechapi-0.1.0.dist-info/WHEEL +4 -0
- openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""STT REST endpoints."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from fastapi import APIRouter, File, Form, Request, UploadFile, HTTPException
|
|
4
|
+
from openspeech.core.models import AudioData, STTOptions
|
|
5
|
+
from openspeech.dispatch.fanout import FirstCompleted, HighestConfidence, CollectAll
|
|
6
|
+
from openspeech.exceptions import ProviderNotFoundError, FanOutAllFailedError
|
|
7
|
+
from openspeech.utils.audio_converter import AudioConverter
|
|
8
|
+
|
|
9
|
+
router = APIRouter()
|
|
10
|
+
|
|
11
|
+
STRATEGY_MAP = {
|
|
12
|
+
"first_completed": FirstCompleted,
|
|
13
|
+
"highest_confidence": HighestConfidence,
|
|
14
|
+
"collect_all": CollectAll,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@router.post("/transcribe")
|
|
19
|
+
async def transcribe(
|
|
20
|
+
request: Request,
|
|
21
|
+
audio: UploadFile = File(...),
|
|
22
|
+
provider: str = Form(...),
|
|
23
|
+
language: str | None = Form(None),
|
|
24
|
+
prompt: str | None = Form(None),
|
|
25
|
+
temperature: float | None = Form(None),
|
|
26
|
+
model: str | None = Form(None),
|
|
27
|
+
device: str | None = Form(None),
|
|
28
|
+
beam_size: int | None = Form(None),
|
|
29
|
+
compute_type: str | None = Form(None),
|
|
30
|
+
fp16: bool | None = Form(None),
|
|
31
|
+
):
|
|
32
|
+
dispatcher = request.app.state.dispatcher
|
|
33
|
+
audio_bytes = await audio.read()
|
|
34
|
+
detected_fmt = AudioConverter.detect_format(audio_bytes)
|
|
35
|
+
audio_data = AudioData(data=audio_bytes, sample_rate=16000, channels=1, format=detected_fmt)
|
|
36
|
+
opts = STTOptions(
|
|
37
|
+
language=language,
|
|
38
|
+
prompt=prompt,
|
|
39
|
+
temperature=temperature,
|
|
40
|
+
model=model,
|
|
41
|
+
device=device,
|
|
42
|
+
beam_size=beam_size,
|
|
43
|
+
compute_type=compute_type,
|
|
44
|
+
fp16=fp16,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
result = await dispatcher.stt.transcribe(provider, audio_data, opts)
|
|
49
|
+
except ProviderNotFoundError as e:
|
|
50
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
51
|
+
except Exception as e: # noqa: BLE001
|
|
52
|
+
raise HTTPException(status_code=502, detail=f"Provider '{provider}' failed: {e}")
|
|
53
|
+
|
|
54
|
+
if result is None:
|
|
55
|
+
return {"text": "", "filtered": True}
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
"text": result.text,
|
|
59
|
+
"language": result.language,
|
|
60
|
+
"confidence": result.confidence,
|
|
61
|
+
"words": [
|
|
62
|
+
{
|
|
63
|
+
"text": (w.get("text") if isinstance(w, dict) else w.text),
|
|
64
|
+
"start_ms": (w.get("start_ms") if isinstance(w, dict) else w.start_ms),
|
|
65
|
+
"end_ms": (w.get("end_ms") if isinstance(w, dict) else w.end_ms),
|
|
66
|
+
"confidence": (w.get("confidence") if isinstance(w, dict) else w.confidence),
|
|
67
|
+
}
|
|
68
|
+
for w in (result.words or [])
|
|
69
|
+
],
|
|
70
|
+
"duration_ms": result.duration_ms,
|
|
71
|
+
"provider": provider,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@router.post("/transcribe/fanout")
|
|
76
|
+
async def transcribe_fanout(
|
|
77
|
+
request: Request,
|
|
78
|
+
audio: UploadFile = File(...),
|
|
79
|
+
providers: str = Form(...), # comma-separated
|
|
80
|
+
strategy: str = Form("first_completed"),
|
|
81
|
+
language: str | None = Form(None),
|
|
82
|
+
):
|
|
83
|
+
dispatcher = request.app.state.dispatcher
|
|
84
|
+
audio_bytes = await audio.read()
|
|
85
|
+
detected_fmt = AudioConverter.detect_format(audio_bytes)
|
|
86
|
+
audio_data = AudioData(data=audio_bytes, sample_rate=16000, channels=1, format=detected_fmt)
|
|
87
|
+
opts = STTOptions(language=language)
|
|
88
|
+
provider_list = [p.strip() for p in providers.split(",")]
|
|
89
|
+
|
|
90
|
+
strategy_cls = STRATEGY_MAP.get(strategy)
|
|
91
|
+
if strategy_cls is None:
|
|
92
|
+
raise HTTPException(
|
|
93
|
+
status_code=422,
|
|
94
|
+
detail=f"Unknown strategy: {strategy}. Use: {list(STRATEGY_MAP)}",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
result = await dispatcher.stt.fanout(provider_list, audio_data, opts=opts, strategy=strategy_cls())
|
|
99
|
+
except ProviderNotFoundError as e:
|
|
100
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
101
|
+
except FanOutAllFailedError as e:
|
|
102
|
+
raise HTTPException(status_code=502, detail=f"All providers failed: {list(e.errors.keys())}")
|
|
103
|
+
except Exception as e: # noqa: BLE001
|
|
104
|
+
raise HTTPException(status_code=502, detail=f"Fanout request failed: {e}")
|
|
105
|
+
|
|
106
|
+
# CollectAll returns FanOutResult, others return Transcription
|
|
107
|
+
if hasattr(result, "successes"):
|
|
108
|
+
return {
|
|
109
|
+
"strategy": strategy,
|
|
110
|
+
"successes": {
|
|
111
|
+
name: {"text": t.text, "confidence": t.confidence, "language": t.language}
|
|
112
|
+
for name, t in result.successes.items()
|
|
113
|
+
},
|
|
114
|
+
"errors": {name: str(e) for name, e in result.errors.items()},
|
|
115
|
+
}
|
|
116
|
+
return {
|
|
117
|
+
"text": result.text,
|
|
118
|
+
"language": result.language,
|
|
119
|
+
"confidence": result.confidence,
|
|
120
|
+
"provider": "fanout",
|
|
121
|
+
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""TTS REST endpoints."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import time
|
|
4
|
+
from fastapi import APIRouter, Request, HTTPException, UploadFile, File, Form
|
|
5
|
+
from fastapi.responses import Response
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from openspeech.core.enums import AudioFormat
|
|
8
|
+
from openspeech.core.models import AudioData, TTSOptions
|
|
9
|
+
from openspeech.exceptions import ProviderNotFoundError
|
|
10
|
+
from openspeech.utils.audio_converter import AudioConverter
|
|
11
|
+
|
|
12
|
+
router = APIRouter()
|
|
13
|
+
|
|
14
|
+
_MIME_MAP = {
|
|
15
|
+
"wav": "audio/wav",
|
|
16
|
+
"mp3": "audio/mpeg",
|
|
17
|
+
"ogg": "audio/ogg",
|
|
18
|
+
"flac": "audio/flac",
|
|
19
|
+
"opus": "audio/opus",
|
|
20
|
+
"pcm_16k": "audio/pcm",
|
|
21
|
+
"pcm_44k": "audio/pcm",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
_FORMAT_EXT = {
|
|
25
|
+
"wav": ".wav",
|
|
26
|
+
"mp3": ".mp3",
|
|
27
|
+
"ogg": ".ogg",
|
|
28
|
+
"flac": ".flac",
|
|
29
|
+
"opus": ".opus",
|
|
30
|
+
"pcm_16k": ".pcm",
|
|
31
|
+
"pcm_44k": ".pcm",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SynthesizeRequest(BaseModel):
|
|
36
|
+
text: str
|
|
37
|
+
provider: str
|
|
38
|
+
voice: str | None = None
|
|
39
|
+
speed: float = 1.0
|
|
40
|
+
model: str | None = None
|
|
41
|
+
stream_transport: str | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@router.get("/{provider}/voices")
|
|
45
|
+
async def list_voices(request: Request, provider: str):
|
|
46
|
+
dispatcher = request.app.state.dispatcher
|
|
47
|
+
try:
|
|
48
|
+
dispatcher._get_handle(provider)
|
|
49
|
+
except ProviderNotFoundError as e:
|
|
50
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
51
|
+
|
|
52
|
+
await dispatcher._lifecycle.ensure_ready(provider)
|
|
53
|
+
instance = dispatcher._lifecycle.get_instance(provider)
|
|
54
|
+
|
|
55
|
+
if instance is None or not hasattr(instance, "list_voices"):
|
|
56
|
+
return {"voices": []}
|
|
57
|
+
|
|
58
|
+
voices = await instance.list_voices()
|
|
59
|
+
return {"voices": voices}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@router.post("/synthesize")
|
|
63
|
+
async def synthesize(request: Request, body: SynthesizeRequest):
|
|
64
|
+
dispatcher = request.app.state.dispatcher
|
|
65
|
+
opts = TTSOptions(
|
|
66
|
+
voice=body.voice,
|
|
67
|
+
speed=body.speed,
|
|
68
|
+
model=body.model,
|
|
69
|
+
stream_transport=body.stream_transport,
|
|
70
|
+
)
|
|
71
|
+
started = time.perf_counter()
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
result = await dispatcher.tts.synthesize(body.provider, body.text, opts)
|
|
75
|
+
except ProviderNotFoundError as e:
|
|
76
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
77
|
+
|
|
78
|
+
if result is None:
|
|
79
|
+
raise HTTPException(status_code=500, detail="Synthesis returned no result")
|
|
80
|
+
|
|
81
|
+
fmt = str(getattr(result, "format", "wav") or "wav")
|
|
82
|
+
media_type = _MIME_MAP.get(fmt, "audio/wav")
|
|
83
|
+
|
|
84
|
+
return Response(
|
|
85
|
+
content=result.data,
|
|
86
|
+
media_type=media_type,
|
|
87
|
+
headers={
|
|
88
|
+
"X-Provider": body.provider,
|
|
89
|
+
"X-Audio-Format": fmt,
|
|
90
|
+
"X-Sample-Rate": str(result.sample_rate),
|
|
91
|
+
"X-Duration-Ms": str(result.duration_ms or 0),
|
|
92
|
+
"X-Elapsed-Ms": str(int((time.perf_counter() - started) * 1000)),
|
|
93
|
+
},
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@router.post("/convert")
|
|
98
|
+
async def convert_audio(
|
|
99
|
+
audio: UploadFile = File(...),
|
|
100
|
+
source_format: str = Form("mp3"),
|
|
101
|
+
target_format: str = Form("wav"),
|
|
102
|
+
sample_rate: int = Form(16000),
|
|
103
|
+
channels: int = Form(1),
|
|
104
|
+
):
|
|
105
|
+
"""Convert audio between formats. Requires ffmpeg for MP3/OGG/FLAC/OPUS."""
|
|
106
|
+
# Validate target format
|
|
107
|
+
try:
|
|
108
|
+
target = AudioFormat(target_format)
|
|
109
|
+
except ValueError:
|
|
110
|
+
raise HTTPException(status_code=400, detail=f"Unsupported target format: {target_format}")
|
|
111
|
+
|
|
112
|
+
data = await audio.read()
|
|
113
|
+
if not data:
|
|
114
|
+
raise HTTPException(status_code=400, detail="Empty audio data")
|
|
115
|
+
|
|
116
|
+
# Detect or use declared source format
|
|
117
|
+
try:
|
|
118
|
+
src_fmt = AudioFormat(source_format)
|
|
119
|
+
except ValueError:
|
|
120
|
+
src_fmt = AudioConverter.detect_format(data)
|
|
121
|
+
|
|
122
|
+
src_audio = AudioData(
|
|
123
|
+
data=data,
|
|
124
|
+
sample_rate=sample_rate,
|
|
125
|
+
channels=channels,
|
|
126
|
+
format=src_fmt,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
result = AudioConverter.convert(src_audio, target)
|
|
131
|
+
except RuntimeError as e:
|
|
132
|
+
raise HTTPException(status_code=422, detail=str(e))
|
|
133
|
+
|
|
134
|
+
ext = _FORMAT_EXT.get(target_format, ".bin")
|
|
135
|
+
media_type = _MIME_MAP.get(target_format, "application/octet-stream")
|
|
136
|
+
|
|
137
|
+
return Response(
|
|
138
|
+
content=result.data,
|
|
139
|
+
media_type=media_type,
|
|
140
|
+
headers={
|
|
141
|
+
"Content-Disposition": f'attachment; filename="audio{ext}"',
|
|
142
|
+
"X-Audio-Format": target_format,
|
|
143
|
+
"X-Sample-Rate": str(result.sample_rate),
|
|
144
|
+
},
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@router.get("/formats")
|
|
149
|
+
async def supported_formats():
|
|
150
|
+
"""Return supported download formats and whether ffmpeg is available."""
|
|
151
|
+
has_ffmpeg = AudioConverter.ffmpeg_available()
|
|
152
|
+
formats = [
|
|
153
|
+
{"id": "wav", "label": "WAV", "ext": ".wav", "available": True},
|
|
154
|
+
{"id": "mp3", "label": "MP3", "ext": ".mp3", "available": has_ffmpeg},
|
|
155
|
+
{"id": "ogg", "label": "OGG Vorbis", "ext": ".ogg", "available": has_ffmpeg},
|
|
156
|
+
{"id": "flac", "label": "FLAC", "ext": ".flac", "available": has_ffmpeg},
|
|
157
|
+
{"id": "opus", "label": "Opus", "ext": ".opus", "available": has_ffmpeg},
|
|
158
|
+
]
|
|
159
|
+
return {"formats": formats, "ffmpeg": has_ffmpeg}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Web UI static routes."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, HTTPException
|
|
7
|
+
from fastapi.responses import FileResponse
|
|
8
|
+
|
|
9
|
+
router = APIRouter()
|
|
10
|
+
|
|
11
|
+
_WEBUI_DIR = Path(__file__).resolve().parent.parent / "webui"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@router.get("/ui")
|
|
15
|
+
async def webui_index():
|
|
16
|
+
index = _WEBUI_DIR / "index.html"
|
|
17
|
+
if not index.exists():
|
|
18
|
+
raise HTTPException(status_code=404, detail="WebUI is not available")
|
|
19
|
+
return FileResponse(index)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@router.get("/ui/{asset_name}")
|
|
23
|
+
async def webui_asset(asset_name: str):
|
|
24
|
+
path = (_WEBUI_DIR / asset_name).resolve()
|
|
25
|
+
if _WEBUI_DIR not in path.parents and path != _WEBUI_DIR:
|
|
26
|
+
raise HTTPException(status_code=400, detail="Invalid path")
|
|
27
|
+
if not path.exists() or not path.is_file():
|
|
28
|
+
raise HTTPException(status_code=404, detail="Asset not found")
|
|
29
|
+
return FileResponse(path)
|