openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,121 @@
1
+ """STT REST endpoints."""
2
+ from __future__ import annotations
3
+ from fastapi import APIRouter, File, Form, Request, UploadFile, HTTPException
4
+ from openspeech.core.models import AudioData, STTOptions
5
+ from openspeech.dispatch.fanout import FirstCompleted, HighestConfidence, CollectAll
6
+ from openspeech.exceptions import ProviderNotFoundError, FanOutAllFailedError
7
+ from openspeech.utils.audio_converter import AudioConverter
8
+
9
+ router = APIRouter()
10
+
11
+ STRATEGY_MAP = {
12
+ "first_completed": FirstCompleted,
13
+ "highest_confidence": HighestConfidence,
14
+ "collect_all": CollectAll,
15
+ }
16
+
17
+
18
+ @router.post("/transcribe")
19
+ async def transcribe(
20
+ request: Request,
21
+ audio: UploadFile = File(...),
22
+ provider: str = Form(...),
23
+ language: str | None = Form(None),
24
+ prompt: str | None = Form(None),
25
+ temperature: float | None = Form(None),
26
+ model: str | None = Form(None),
27
+ device: str | None = Form(None),
28
+ beam_size: int | None = Form(None),
29
+ compute_type: str | None = Form(None),
30
+ fp16: bool | None = Form(None),
31
+ ):
32
+ dispatcher = request.app.state.dispatcher
33
+ audio_bytes = await audio.read()
34
+ detected_fmt = AudioConverter.detect_format(audio_bytes)
35
+ audio_data = AudioData(data=audio_bytes, sample_rate=16000, channels=1, format=detected_fmt)
36
+ opts = STTOptions(
37
+ language=language,
38
+ prompt=prompt,
39
+ temperature=temperature,
40
+ model=model,
41
+ device=device,
42
+ beam_size=beam_size,
43
+ compute_type=compute_type,
44
+ fp16=fp16,
45
+ )
46
+
47
+ try:
48
+ result = await dispatcher.stt.transcribe(provider, audio_data, opts)
49
+ except ProviderNotFoundError as e:
50
+ raise HTTPException(status_code=404, detail=str(e))
51
+ except Exception as e: # noqa: BLE001
52
+ raise HTTPException(status_code=502, detail=f"Provider '{provider}' failed: {e}")
53
+
54
+ if result is None:
55
+ return {"text": "", "filtered": True}
56
+
57
+ return {
58
+ "text": result.text,
59
+ "language": result.language,
60
+ "confidence": result.confidence,
61
+ "words": [
62
+ {
63
+ "text": (w.get("text") if isinstance(w, dict) else w.text),
64
+ "start_ms": (w.get("start_ms") if isinstance(w, dict) else w.start_ms),
65
+ "end_ms": (w.get("end_ms") if isinstance(w, dict) else w.end_ms),
66
+ "confidence": (w.get("confidence") if isinstance(w, dict) else w.confidence),
67
+ }
68
+ for w in (result.words or [])
69
+ ],
70
+ "duration_ms": result.duration_ms,
71
+ "provider": provider,
72
+ }
73
+
74
+
75
+ @router.post("/transcribe/fanout")
76
+ async def transcribe_fanout(
77
+ request: Request,
78
+ audio: UploadFile = File(...),
79
+ providers: str = Form(...), # comma-separated
80
+ strategy: str = Form("first_completed"),
81
+ language: str | None = Form(None),
82
+ ):
83
+ dispatcher = request.app.state.dispatcher
84
+ audio_bytes = await audio.read()
85
+ detected_fmt = AudioConverter.detect_format(audio_bytes)
86
+ audio_data = AudioData(data=audio_bytes, sample_rate=16000, channels=1, format=detected_fmt)
87
+ opts = STTOptions(language=language)
88
+ provider_list = [p.strip() for p in providers.split(",")]
89
+
90
+ strategy_cls = STRATEGY_MAP.get(strategy)
91
+ if strategy_cls is None:
92
+ raise HTTPException(
93
+ status_code=422,
94
+ detail=f"Unknown strategy: {strategy}. Use: {list(STRATEGY_MAP)}",
95
+ )
96
+
97
+ try:
98
+ result = await dispatcher.stt.fanout(provider_list, audio_data, opts=opts, strategy=strategy_cls())
99
+ except ProviderNotFoundError as e:
100
+ raise HTTPException(status_code=404, detail=str(e))
101
+ except FanOutAllFailedError as e:
102
+ raise HTTPException(status_code=502, detail=f"All providers failed: {list(e.errors.keys())}")
103
+ except Exception as e: # noqa: BLE001
104
+ raise HTTPException(status_code=502, detail=f"Fanout request failed: {e}")
105
+
106
+ # CollectAll returns FanOutResult, others return Transcription
107
+ if hasattr(result, "successes"):
108
+ return {
109
+ "strategy": strategy,
110
+ "successes": {
111
+ name: {"text": t.text, "confidence": t.confidence, "language": t.language}
112
+ for name, t in result.successes.items()
113
+ },
114
+ "errors": {name: str(e) for name, e in result.errors.items()},
115
+ }
116
+ return {
117
+ "text": result.text,
118
+ "language": result.language,
119
+ "confidence": result.confidence,
120
+ "provider": "fanout",
121
+ }
@@ -0,0 +1,159 @@
1
+ """TTS REST endpoints."""
2
+ from __future__ import annotations
3
+ import time
4
+ from fastapi import APIRouter, Request, HTTPException, UploadFile, File, Form
5
+ from fastapi.responses import Response
6
+ from pydantic import BaseModel
7
+ from openspeech.core.enums import AudioFormat
8
+ from openspeech.core.models import AudioData, TTSOptions
9
+ from openspeech.exceptions import ProviderNotFoundError
10
+ from openspeech.utils.audio_converter import AudioConverter
11
+
12
+ router = APIRouter()
13
+
14
+ _MIME_MAP = {
15
+ "wav": "audio/wav",
16
+ "mp3": "audio/mpeg",
17
+ "ogg": "audio/ogg",
18
+ "flac": "audio/flac",
19
+ "opus": "audio/opus",
20
+ "pcm_16k": "audio/pcm",
21
+ "pcm_44k": "audio/pcm",
22
+ }
23
+
24
+ _FORMAT_EXT = {
25
+ "wav": ".wav",
26
+ "mp3": ".mp3",
27
+ "ogg": ".ogg",
28
+ "flac": ".flac",
29
+ "opus": ".opus",
30
+ "pcm_16k": ".pcm",
31
+ "pcm_44k": ".pcm",
32
+ }
33
+
34
+
35
+ class SynthesizeRequest(BaseModel):
36
+ text: str
37
+ provider: str
38
+ voice: str | None = None
39
+ speed: float = 1.0
40
+ model: str | None = None
41
+ stream_transport: str | None = None
42
+
43
+
44
+ @router.get("/{provider}/voices")
45
+ async def list_voices(request: Request, provider: str):
46
+ dispatcher = request.app.state.dispatcher
47
+ try:
48
+ dispatcher._get_handle(provider)
49
+ except ProviderNotFoundError as e:
50
+ raise HTTPException(status_code=404, detail=str(e))
51
+
52
+ await dispatcher._lifecycle.ensure_ready(provider)
53
+ instance = dispatcher._lifecycle.get_instance(provider)
54
+
55
+ if instance is None or not hasattr(instance, "list_voices"):
56
+ return {"voices": []}
57
+
58
+ voices = await instance.list_voices()
59
+ return {"voices": voices}
60
+
61
+
62
+ @router.post("/synthesize")
63
+ async def synthesize(request: Request, body: SynthesizeRequest):
64
+ dispatcher = request.app.state.dispatcher
65
+ opts = TTSOptions(
66
+ voice=body.voice,
67
+ speed=body.speed,
68
+ model=body.model,
69
+ stream_transport=body.stream_transport,
70
+ )
71
+ started = time.perf_counter()
72
+
73
+ try:
74
+ result = await dispatcher.tts.synthesize(body.provider, body.text, opts)
75
+ except ProviderNotFoundError as e:
76
+ raise HTTPException(status_code=404, detail=str(e))
77
+
78
+ if result is None:
79
+ raise HTTPException(status_code=500, detail="Synthesis returned no result")
80
+
81
+ fmt = str(getattr(result, "format", "wav") or "wav")
82
+ media_type = _MIME_MAP.get(fmt, "audio/wav")
83
+
84
+ return Response(
85
+ content=result.data,
86
+ media_type=media_type,
87
+ headers={
88
+ "X-Provider": body.provider,
89
+ "X-Audio-Format": fmt,
90
+ "X-Sample-Rate": str(result.sample_rate),
91
+ "X-Duration-Ms": str(result.duration_ms or 0),
92
+ "X-Elapsed-Ms": str(int((time.perf_counter() - started) * 1000)),
93
+ },
94
+ )
95
+
96
+
97
+ @router.post("/convert")
98
+ async def convert_audio(
99
+ audio: UploadFile = File(...),
100
+ source_format: str = Form("mp3"),
101
+ target_format: str = Form("wav"),
102
+ sample_rate: int = Form(16000),
103
+ channels: int = Form(1),
104
+ ):
105
+ """Convert audio between formats. Requires ffmpeg for MP3/OGG/FLAC/OPUS."""
106
+ # Validate target format
107
+ try:
108
+ target = AudioFormat(target_format)
109
+ except ValueError:
110
+ raise HTTPException(status_code=400, detail=f"Unsupported target format: {target_format}")
111
+
112
+ data = await audio.read()
113
+ if not data:
114
+ raise HTTPException(status_code=400, detail="Empty audio data")
115
+
116
+ # Detect or use declared source format
117
+ try:
118
+ src_fmt = AudioFormat(source_format)
119
+ except ValueError:
120
+ src_fmt = AudioConverter.detect_format(data)
121
+
122
+ src_audio = AudioData(
123
+ data=data,
124
+ sample_rate=sample_rate,
125
+ channels=channels,
126
+ format=src_fmt,
127
+ )
128
+
129
+ try:
130
+ result = AudioConverter.convert(src_audio, target)
131
+ except RuntimeError as e:
132
+ raise HTTPException(status_code=422, detail=str(e))
133
+
134
+ ext = _FORMAT_EXT.get(target_format, ".bin")
135
+ media_type = _MIME_MAP.get(target_format, "application/octet-stream")
136
+
137
+ return Response(
138
+ content=result.data,
139
+ media_type=media_type,
140
+ headers={
141
+ "Content-Disposition": f'attachment; filename="audio{ext}"',
142
+ "X-Audio-Format": target_format,
143
+ "X-Sample-Rate": str(result.sample_rate),
144
+ },
145
+ )
146
+
147
+
148
+ @router.get("/formats")
149
+ async def supported_formats():
150
+ """Return supported download formats and whether ffmpeg is available."""
151
+ has_ffmpeg = AudioConverter.ffmpeg_available()
152
+ formats = [
153
+ {"id": "wav", "label": "WAV", "ext": ".wav", "available": True},
154
+ {"id": "mp3", "label": "MP3", "ext": ".mp3", "available": has_ffmpeg},
155
+ {"id": "ogg", "label": "OGG Vorbis", "ext": ".ogg", "available": has_ffmpeg},
156
+ {"id": "flac", "label": "FLAC", "ext": ".flac", "available": has_ffmpeg},
157
+ {"id": "opus", "label": "Opus", "ext": ".opus", "available": has_ffmpeg},
158
+ ]
159
+ return {"formats": formats, "ffmpeg": has_ffmpeg}
@@ -0,0 +1,29 @@
1
+ """Web UI static routes."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ from fastapi import APIRouter, HTTPException
7
+ from fastapi.responses import FileResponse
8
+
9
+ router = APIRouter()
10
+
11
+ _WEBUI_DIR = Path(__file__).resolve().parent.parent / "webui"
12
+
13
+
14
+ @router.get("/ui")
15
+ async def webui_index():
16
+ index = _WEBUI_DIR / "index.html"
17
+ if not index.exists():
18
+ raise HTTPException(status_code=404, detail="WebUI is not available")
19
+ return FileResponse(index)
20
+
21
+
22
+ @router.get("/ui/{asset_name}")
23
+ async def webui_asset(asset_name: str):
24
+ path = (_WEBUI_DIR / asset_name).resolve()
25
+ if _WEBUI_DIR not in path.parents and path != _WEBUI_DIR:
26
+ raise HTTPException(status_code=400, detail="Invalid path")
27
+ if not path.exists() or not path.is_file():
28
+ raise HTTPException(status_code=404, detail="Asset not found")
29
+ return FileResponse(path)