openspeechapi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openspeech/__init__.py +75 -0
- openspeech/__main__.py +5 -0
- openspeech/cli.py +413 -0
- openspeech/client/__init__.py +4 -0
- openspeech/client/client.py +145 -0
- openspeech/config.py +212 -0
- openspeech/core/__init__.py +0 -0
- openspeech/core/base.py +75 -0
- openspeech/core/enums.py +39 -0
- openspeech/core/models.py +61 -0
- openspeech/core/registry.py +37 -0
- openspeech/core/settings.py +8 -0
- openspeech/demo.py +675 -0
- openspeech/dispatch/__init__.py +0 -0
- openspeech/dispatch/context.py +34 -0
- openspeech/dispatch/dispatcher.py +661 -0
- openspeech/dispatch/executors/__init__.py +0 -0
- openspeech/dispatch/executors/base.py +34 -0
- openspeech/dispatch/executors/in_process.py +66 -0
- openspeech/dispatch/executors/remote.py +64 -0
- openspeech/dispatch/executors/subprocess_exec.py +446 -0
- openspeech/dispatch/fanout.py +95 -0
- openspeech/dispatch/filters.py +73 -0
- openspeech/dispatch/lifecycle.py +178 -0
- openspeech/dispatch/watcher.py +82 -0
- openspeech/engine_catalog.py +236 -0
- openspeech/engine_registry.yaml +347 -0
- openspeech/exceptions.py +51 -0
- openspeech/factory.py +325 -0
- openspeech/local_engines/__init__.py +12 -0
- openspeech/local_engines/aim_resolver.py +91 -0
- openspeech/local_engines/backends/__init__.py +1 -0
- openspeech/local_engines/backends/docker_backend.py +490 -0
- openspeech/local_engines/backends/native_backend.py +902 -0
- openspeech/local_engines/base.py +30 -0
- openspeech/local_engines/engines/__init__.py +1 -0
- openspeech/local_engines/engines/faster_whisper.py +36 -0
- openspeech/local_engines/engines/fish_speech.py +33 -0
- openspeech/local_engines/engines/sherpa_onnx.py +56 -0
- openspeech/local_engines/engines/whisper.py +41 -0
- openspeech/local_engines/engines/whisperlivekit.py +60 -0
- openspeech/local_engines/manager.py +208 -0
- openspeech/local_engines/models.py +50 -0
- openspeech/local_engines/progress.py +69 -0
- openspeech/local_engines/registry.py +19 -0
- openspeech/local_engines/task_store.py +52 -0
- openspeech/local_engines/tasks.py +71 -0
- openspeech/logging_config.py +607 -0
- openspeech/observe/__init__.py +0 -0
- openspeech/observe/base.py +79 -0
- openspeech/observe/debug.py +44 -0
- openspeech/observe/latency.py +19 -0
- openspeech/observe/metrics.py +47 -0
- openspeech/observe/tracing.py +44 -0
- openspeech/observe/usage.py +27 -0
- openspeech/providers/__init__.py +0 -0
- openspeech/providers/_template.py +101 -0
- openspeech/providers/stt/__init__.py +0 -0
- openspeech/providers/stt/alibaba.py +86 -0
- openspeech/providers/stt/assemblyai.py +135 -0
- openspeech/providers/stt/azure_speech.py +99 -0
- openspeech/providers/stt/baidu.py +135 -0
- openspeech/providers/stt/deepgram.py +311 -0
- openspeech/providers/stt/elevenlabs.py +385 -0
- openspeech/providers/stt/faster_whisper.py +211 -0
- openspeech/providers/stt/google_cloud.py +106 -0
- openspeech/providers/stt/iflytek.py +427 -0
- openspeech/providers/stt/macos_speech.py +226 -0
- openspeech/providers/stt/openai.py +84 -0
- openspeech/providers/stt/sherpa_onnx.py +353 -0
- openspeech/providers/stt/tencent.py +212 -0
- openspeech/providers/stt/volcengine.py +107 -0
- openspeech/providers/stt/whisper.py +153 -0
- openspeech/providers/stt/whisperlivekit.py +530 -0
- openspeech/providers/stt/windows_speech.py +249 -0
- openspeech/providers/tts/__init__.py +0 -0
- openspeech/providers/tts/alibaba.py +95 -0
- openspeech/providers/tts/azure_speech.py +123 -0
- openspeech/providers/tts/baidu.py +143 -0
- openspeech/providers/tts/coqui.py +64 -0
- openspeech/providers/tts/cosyvoice.py +90 -0
- openspeech/providers/tts/deepgram.py +174 -0
- openspeech/providers/tts/elevenlabs.py +311 -0
- openspeech/providers/tts/fish_speech.py +158 -0
- openspeech/providers/tts/google_cloud.py +107 -0
- openspeech/providers/tts/iflytek.py +209 -0
- openspeech/providers/tts/macos_say.py +251 -0
- openspeech/providers/tts/minimax.py +122 -0
- openspeech/providers/tts/openai.py +104 -0
- openspeech/providers/tts/piper.py +104 -0
- openspeech/providers/tts/tencent.py +189 -0
- openspeech/providers/tts/volcengine.py +117 -0
- openspeech/providers/tts/windows_sapi.py +234 -0
- openspeech/server/__init__.py +1 -0
- openspeech/server/app.py +72 -0
- openspeech/server/auth.py +42 -0
- openspeech/server/middleware.py +75 -0
- openspeech/server/routes/__init__.py +1 -0
- openspeech/server/routes/management.py +848 -0
- openspeech/server/routes/stt.py +121 -0
- openspeech/server/routes/tts.py +159 -0
- openspeech/server/routes/webui.py +29 -0
- openspeech/server/webui/app.js +2649 -0
- openspeech/server/webui/index.html +216 -0
- openspeech/server/webui/styles.css +617 -0
- openspeech/server/ws/__init__.py +1 -0
- openspeech/server/ws/stt_stream.py +263 -0
- openspeech/server/ws/tts_stream.py +207 -0
- openspeech/telemetry/__init__.py +21 -0
- openspeech/telemetry/perf.py +307 -0
- openspeech/utils/__init__.py +5 -0
- openspeech/utils/audio_converter.py +406 -0
- openspeech/utils/audio_playback.py +156 -0
- openspeech/vendor_registry.yaml +74 -0
- openspeechapi-0.1.0.dist-info/METADATA +101 -0
- openspeechapi-0.1.0.dist-info/RECORD +118 -0
- openspeechapi-0.1.0.dist-info/WHEEL +4 -0
- openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
openspeech/__init__.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""OpenSpeechAPI — Unified speech interface for STT/TTS providers."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from openspeech.config import load_config
|
|
6
|
+
from openspeech.core.base import SpeechProvider, STTProvider, TTSProvider
|
|
7
|
+
from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
|
|
8
|
+
from openspeech.core.models import AudioChunk, AudioData, STTOptions, Transcription, TTSOptions, Word
|
|
9
|
+
from openspeech.core.registry import ProviderRegistry
|
|
10
|
+
from openspeech.core.settings import BaseSettings
|
|
11
|
+
from openspeech.dispatch.context import InvokeContext
|
|
12
|
+
from openspeech.dispatch.dispatcher import ServiceDispatcher
|
|
13
|
+
from openspeech.client.client import Client
|
|
14
|
+
from openspeech.factory import create_default_registry, create_provider, list_providers
|
|
15
|
+
from openspeech.logging_config import (
|
|
16
|
+
bind_context,
|
|
17
|
+
configure_logging,
|
|
18
|
+
get_integration_tag,
|
|
19
|
+
get_log_settings,
|
|
20
|
+
integrate_with_host,
|
|
21
|
+
is_host_managed,
|
|
22
|
+
openspeech_filter,
|
|
23
|
+
openspeech_level_filter,
|
|
24
|
+
)
|
|
25
|
+
from openspeech.telemetry.perf import Event, PerfTimer, milestone
|
|
26
|
+
from openspeech.utils import list_output_devices, play_audio
|
|
27
|
+
from openspeech.exceptions import (
|
|
28
|
+
ConfigError,
|
|
29
|
+
FanOutAllFailedError,
|
|
30
|
+
OpenSpeechError,
|
|
31
|
+
ProviderCrashedError,
|
|
32
|
+
ProviderError,
|
|
33
|
+
ProviderNotFoundError,
|
|
34
|
+
ProviderUnavailableError,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def list_configured_engines(
|
|
38
|
+
config_path: "str | Path",
|
|
39
|
+
engine_type: str | None = None,
|
|
40
|
+
) -> list[dict]:
|
|
41
|
+
"""List configured engines from a config file (without starting providers).
|
|
42
|
+
|
|
43
|
+
Lightweight query: parses YAML + resolves catalog metadata only.
|
|
44
|
+
Suitable for populating UI dropdowns without running a full server.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
config_path: Path to ``providers.yaml`` (or equivalent config file).
|
|
48
|
+
engine_type: Optional filter — ``"stt"``, ``"tts"``, or ``None`` for all.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
List of engine info dicts (same schema as
|
|
52
|
+
:py:meth:`ServiceDispatcher.list_engines_info`).
|
|
53
|
+
"""
|
|
54
|
+
from pathlib import Path as _Path
|
|
55
|
+
|
|
56
|
+
registry = create_default_registry()
|
|
57
|
+
dispatcher = ServiceDispatcher.from_config(_Path(config_path), registry)
|
|
58
|
+
return dispatcher.list_engines_info(engine_type=engine_type)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
__all__ = [
|
|
62
|
+
"AudioChunk", "AudioData", "AudioFormat", "BaseSettings", "Capability",
|
|
63
|
+
"Client", "ConfigError", "Event", "ExecMode", "FanOutAllFailedError", "InvokeContext",
|
|
64
|
+
"PerfTimer",
|
|
65
|
+
"bind_context", "configure_logging", "get_integration_tag", "get_log_settings",
|
|
66
|
+
"integrate_with_host", "is_host_managed", "milestone",
|
|
67
|
+
"openspeech_filter", "openspeech_level_filter",
|
|
68
|
+
"create_default_registry", "create_provider", "list_configured_engines", "list_providers",
|
|
69
|
+
"OpenSpeechError", "ProviderCrashedError", "ProviderError",
|
|
70
|
+
"ProviderNotFoundError", "ProviderRegistry", "ProviderType",
|
|
71
|
+
"ProviderUnavailableError", "STTOptions", "STTProvider", "ServiceDispatcher",
|
|
72
|
+
"SpeechProvider", "TTSOptions", "TTSProvider", "Transcription", "Word",
|
|
73
|
+
"list_output_devices", "play_audio",
|
|
74
|
+
"load_config",
|
|
75
|
+
]
|
openspeech/__main__.py
ADDED
openspeech/cli.py
ADDED
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""CLI entry point for OpenSpeech."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import argparse
|
|
4
|
+
import queue
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from openspeech.config import load_config
|
|
9
|
+
from openspeech.exceptions import ConfigError
|
|
10
|
+
from openspeech.local_engines import EngineAction, EngineManager, RuntimeConfig
|
|
11
|
+
from openspeech.local_engines.models import TaskStatus
|
|
12
|
+
from openspeech.logging_config import configure_logging
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _cmd_list(args: argparse.Namespace) -> None:
|
|
16
|
+
try:
|
|
17
|
+
config = load_config(Path(args.config))
|
|
18
|
+
except ConfigError as e:
|
|
19
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
20
|
+
sys.exit(1)
|
|
21
|
+
if not config.engines:
|
|
22
|
+
print("No engines configured.")
|
|
23
|
+
return
|
|
24
|
+
print(f"{'Name':<25} {'Provider':<20} {'ExecMode':<15}")
|
|
25
|
+
print("-" * 60)
|
|
26
|
+
for name, eng in config.engines.items():
|
|
27
|
+
print(f"{name:<25} {eng.provider:<20} {eng.exec_mode:<15}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _cmd_serve(args: argparse.Namespace) -> None:
|
|
31
|
+
"""Start the OpenSpeech HTTP/WebSocket server."""
|
|
32
|
+
import uvicorn
|
|
33
|
+
from openspeech.server.app import create_app
|
|
34
|
+
config_path = Path(args.config)
|
|
35
|
+
|
|
36
|
+
# Build registry with all known providers (lazy imports via factory)
|
|
37
|
+
from openspeech.factory import create_default_registry
|
|
38
|
+
registry = create_default_registry()
|
|
39
|
+
|
|
40
|
+
app = create_app(config_path, registry)
|
|
41
|
+
|
|
42
|
+
config = load_config(config_path)
|
|
43
|
+
host = getattr(args, "host", None) or config.server.host
|
|
44
|
+
port = getattr(args, "port", None) or config.server.port
|
|
45
|
+
print(f"Starting OpenSpeech server on {host}:{port}")
|
|
46
|
+
uvicorn.run(app, host=host, port=port)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _cmd_check(args: argparse.Namespace) -> None:
|
|
50
|
+
try:
|
|
51
|
+
config = load_config(Path(args.config))
|
|
52
|
+
print(f"Config OK: {len(config.engines)} engine(s) configured.")
|
|
53
|
+
except ConfigError as e:
|
|
54
|
+
print(f"Config error: {e}", file=sys.stderr)
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _runtime_cfg_from_args(args: argparse.Namespace) -> RuntimeConfig:
|
|
59
|
+
options = {}
|
|
60
|
+
if getattr(args, "image", None):
|
|
61
|
+
options["docker_image"] = args.image
|
|
62
|
+
if getattr(args, "container_name", None):
|
|
63
|
+
options["container_name"] = args.container_name
|
|
64
|
+
if getattr(args, "host_port", None):
|
|
65
|
+
options["host_port"] = int(args.host_port)
|
|
66
|
+
if getattr(args, "health_url", None):
|
|
67
|
+
options["health_url"] = args.health_url
|
|
68
|
+
if getattr(args, "model_repo", None):
|
|
69
|
+
options["native_model_repo"] = args.model_repo
|
|
70
|
+
if getattr(args, "simulate_download", None) is not None:
|
|
71
|
+
options["native_simulate_download"] = bool(args.simulate_download)
|
|
72
|
+
return RuntimeConfig(
|
|
73
|
+
runtime=getattr(args, "runtime", "docker"),
|
|
74
|
+
api_url=getattr(args, "api_url", "http://127.0.0.1:8080"),
|
|
75
|
+
install_dir=getattr(args, "install_dir", "~/AI/services") or "~/AI/services",
|
|
76
|
+
work_dir=getattr(args, "work_dir", ".openspeech/engines"),
|
|
77
|
+
timeout_s=float(getattr(args, "timeout", 120.0)),
|
|
78
|
+
retries=int(getattr(args, "retries", 0)),
|
|
79
|
+
options=options,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _print_progress_event(event) -> None:
|
|
84
|
+
progress = "--"
|
|
85
|
+
if event.progress is not None:
|
|
86
|
+
progress = f"{event.progress:>5.1f}%"
|
|
87
|
+
eta = "--:--"
|
|
88
|
+
if event.eta_seconds is not None:
|
|
89
|
+
s = max(0, int(event.eta_seconds))
|
|
90
|
+
h = s // 3600
|
|
91
|
+
m = (s % 3600) // 60
|
|
92
|
+
ss = s % 60
|
|
93
|
+
eta = f"{h:02d}:{m:02d}:{ss:02d}" if h > 0 else f"{m:02d}:{ss:02d}"
|
|
94
|
+
print(
|
|
95
|
+
f"[{event.task_id[:8]}] "
|
|
96
|
+
f"{event.action:<7} {event.phase:<14} {progress} ETA {eta} {event.message}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _run_engine_action(
|
|
101
|
+
manager: EngineManager,
|
|
102
|
+
name: str,
|
|
103
|
+
action: EngineAction,
|
|
104
|
+
cfg: RuntimeConfig,
|
|
105
|
+
follow: bool = True,
|
|
106
|
+
) -> None:
|
|
107
|
+
task = manager.run_action_async(name, action, cfg)
|
|
108
|
+
event_queue = manager.emitter.subscribe(task.task_id)
|
|
109
|
+
while True:
|
|
110
|
+
try:
|
|
111
|
+
evt = event_queue.get(timeout=0.2)
|
|
112
|
+
except queue.Empty:
|
|
113
|
+
latest = manager.get_task(task.task_id)
|
|
114
|
+
if latest and latest.status.value in {"succeeded", "failed", "cancelled"}:
|
|
115
|
+
break
|
|
116
|
+
continue
|
|
117
|
+
if follow:
|
|
118
|
+
_print_progress_event(evt)
|
|
119
|
+
if evt.status.value in {"succeeded", "failed", "cancelled"}:
|
|
120
|
+
break
|
|
121
|
+
manager.emitter.unsubscribe(task.task_id, event_queue)
|
|
122
|
+
|
|
123
|
+
task = manager.get_task(task.task_id)
|
|
124
|
+
if task is not None and task.status == TaskStatus.SUCCEEDED:
|
|
125
|
+
print(f"Task completed: {task.task_id}")
|
|
126
|
+
elif task is not None:
|
|
127
|
+
raise RuntimeError(task.error or f"Task failed: {task.task_id}")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _cmd_engine_task(args: argparse.Namespace, manager: EngineManager) -> None:
|
|
131
|
+
if args.engine_task_cmd == "status":
|
|
132
|
+
task = manager.get_task(args.task_id)
|
|
133
|
+
if task is None:
|
|
134
|
+
print(f"Task not found: {args.task_id}", file=sys.stderr)
|
|
135
|
+
sys.exit(1)
|
|
136
|
+
snap = task.snapshot()
|
|
137
|
+
keys = [
|
|
138
|
+
"task_id",
|
|
139
|
+
"engine",
|
|
140
|
+
"action",
|
|
141
|
+
"runtime",
|
|
142
|
+
"status",
|
|
143
|
+
"phase",
|
|
144
|
+
"progress",
|
|
145
|
+
"message",
|
|
146
|
+
"error",
|
|
147
|
+
"started_at",
|
|
148
|
+
"updated_at",
|
|
149
|
+
"finished_at",
|
|
150
|
+
]
|
|
151
|
+
for k in keys:
|
|
152
|
+
print(f"{k}={snap.get(k)}")
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
if args.engine_task_cmd == "list":
|
|
156
|
+
tasks = manager.list_tasks(engine=args.name, limit=args.limit)
|
|
157
|
+
if not tasks:
|
|
158
|
+
print("No tasks.")
|
|
159
|
+
return
|
|
160
|
+
print(f"{'Task ID':<12} {'Engine':<12} {'Action':<8} {'Status':<10} {'Phase':<14} Message")
|
|
161
|
+
print("-" * 90)
|
|
162
|
+
for t in tasks:
|
|
163
|
+
msg = (t.message or "").replace("\n", " ")
|
|
164
|
+
print(
|
|
165
|
+
f"{t.task_id[:12]:<12} {t.engine:<12} {t.action.value:<8} "
|
|
166
|
+
f"{t.status.value:<10} {t.phase:<14} {msg}"
|
|
167
|
+
)
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
if args.engine_task_cmd == "follow":
|
|
171
|
+
deadline = time.monotonic() + max(1.0, args.timeout)
|
|
172
|
+
last_updated = ""
|
|
173
|
+
while time.monotonic() < deadline:
|
|
174
|
+
task = manager.get_task(args.task_id)
|
|
175
|
+
if task is None:
|
|
176
|
+
time.sleep(args.interval)
|
|
177
|
+
continue
|
|
178
|
+
stamp = task.updated_at.isoformat()
|
|
179
|
+
if stamp != last_updated:
|
|
180
|
+
progress = "--" if task.progress is None else f"{task.progress:>5.1f}%"
|
|
181
|
+
print(
|
|
182
|
+
f"[{task.task_id[:8]}] {task.action.value:<7} {task.phase:<14} "
|
|
183
|
+
f"{progress} {task.message}"
|
|
184
|
+
)
|
|
185
|
+
last_updated = stamp
|
|
186
|
+
if task.status.value in {"succeeded", "failed", "cancelled"}:
|
|
187
|
+
return
|
|
188
|
+
time.sleep(args.interval)
|
|
189
|
+
print("Follow timeout reached.")
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
if args.engine_task_cmd == "cancel":
|
|
193
|
+
task = manager.cancel_task(args.task_id)
|
|
194
|
+
if task is None:
|
|
195
|
+
print(f"Task not found: {args.task_id}", file=sys.stderr)
|
|
196
|
+
sys.exit(1)
|
|
197
|
+
print(f"Cancellation requested: {task.task_id}")
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _cmd_engine(args: argparse.Namespace) -> None:
|
|
202
|
+
manager = EngineManager()
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
if args.engine_cmd == "task":
|
|
206
|
+
_cmd_engine_task(args, manager)
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
cfg = _runtime_cfg_from_args(args)
|
|
210
|
+
name = args.name
|
|
211
|
+
if args.engine_cmd == "status":
|
|
212
|
+
s = manager.status(name, cfg)
|
|
213
|
+
print(f"engine={s.engine}")
|
|
214
|
+
print(f"runtime={s.runtime}")
|
|
215
|
+
print(f"running={s.running}")
|
|
216
|
+
print(f"healthy={s.healthy}")
|
|
217
|
+
print(f"detail={s.detail}")
|
|
218
|
+
return
|
|
219
|
+
if args.engine_cmd == "logs":
|
|
220
|
+
out = manager.logs(name, cfg, lines=args.lines)
|
|
221
|
+
print(out or "(no logs)")
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
action_map = {
|
|
225
|
+
"install": EngineAction.INSTALL,
|
|
226
|
+
"update": EngineAction.UPDATE,
|
|
227
|
+
"start": EngineAction.START,
|
|
228
|
+
"stop": EngineAction.STOP,
|
|
229
|
+
}
|
|
230
|
+
if args.engine_cmd not in action_map:
|
|
231
|
+
raise ValueError(f"Unsupported engine command: {args.engine_cmd}")
|
|
232
|
+
_run_engine_action(
|
|
233
|
+
manager=manager,
|
|
234
|
+
name=name,
|
|
235
|
+
action=action_map[args.engine_cmd],
|
|
236
|
+
cfg=cfg,
|
|
237
|
+
follow=bool(args.follow),
|
|
238
|
+
)
|
|
239
|
+
except Exception as e:
|
|
240
|
+
print(f"Engine error: {e}", file=sys.stderr)
|
|
241
|
+
sys.exit(1)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _load_dotenv() -> None:
|
|
245
|
+
"""Load .env file if python-dotenv is available."""
|
|
246
|
+
try:
|
|
247
|
+
from dotenv import load_dotenv
|
|
248
|
+
load_dotenv()
|
|
249
|
+
except ImportError:
|
|
250
|
+
pass
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _add_logging_args(parser: argparse.ArgumentParser) -> None:
|
|
254
|
+
"""Register common logging flags on ``parser``.
|
|
255
|
+
|
|
256
|
+
Values left as ``None`` fall back to ``OPENSPEECH_LOG_*`` env vars,
|
|
257
|
+
then to built-in defaults in ``openspeech.logging_config``.
|
|
258
|
+
"""
|
|
259
|
+
parser.add_argument(
|
|
260
|
+
"--log-level",
|
|
261
|
+
default=None,
|
|
262
|
+
choices=["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
263
|
+
help="Log verbosity (env: OPENSPEECH_LOG_LEVEL). Default INFO.",
|
|
264
|
+
)
|
|
265
|
+
parser.add_argument(
|
|
266
|
+
"--log-format",
|
|
267
|
+
default=None,
|
|
268
|
+
choices=["text", "json"],
|
|
269
|
+
help="Console log format (env: OPENSPEECH_LOG_FORMAT). Default text.",
|
|
270
|
+
)
|
|
271
|
+
parser.add_argument(
|
|
272
|
+
"--log-dir",
|
|
273
|
+
default=None,
|
|
274
|
+
help=(
|
|
275
|
+
"Directory for rotating JSONL logs (env: OPENSPEECH_LOG_DIR). "
|
|
276
|
+
"Default 'logs'. Pass empty string to disable file logging."
|
|
277
|
+
),
|
|
278
|
+
)
|
|
279
|
+
parser.add_argument(
|
|
280
|
+
"--log-perf",
|
|
281
|
+
default=None,
|
|
282
|
+
choices=["off", "basic", "verbose"],
|
|
283
|
+
help="Performance milestone verbosity (env: OPENSPEECH_LOG_PERF). Default basic.",
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _apply_logging_args(args: argparse.Namespace) -> None:
|
|
288
|
+
"""Apply parsed logging flags via ``configure_logging``."""
|
|
289
|
+
log_dir = getattr(args, "log_dir", None)
|
|
290
|
+
configure_logging(
|
|
291
|
+
level=getattr(args, "log_level", None),
|
|
292
|
+
format=getattr(args, "log_format", None),
|
|
293
|
+
log_dir=log_dir, # None → env/default; "" → disabled
|
|
294
|
+
perf=getattr(args, "log_perf", None),
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def main(argv: list[str] | None = None) -> None:
|
|
299
|
+
_load_dotenv()
|
|
300
|
+
parser = argparse.ArgumentParser(prog="openspeech", description="OpenSpeech CLI")
|
|
301
|
+
parser.add_argument("--config", default="providers.yaml", help="Config file path")
|
|
302
|
+
_add_logging_args(parser)
|
|
303
|
+
sub = parser.add_subparsers(dest="command")
|
|
304
|
+
sub.add_parser("list", help="List configured providers")
|
|
305
|
+
sub.add_parser("check", help="Validate configuration")
|
|
306
|
+
serve_p = sub.add_parser("serve", help="Start HTTP/WebSocket server")
|
|
307
|
+
serve_p.add_argument("--host", default="0.0.0.0", help="Bind host")
|
|
308
|
+
serve_p.add_argument("--port", type=int, default=8600, help="Bind port")
|
|
309
|
+
|
|
310
|
+
engine_p = sub.add_parser("engine", help="Manage local engine runtime")
|
|
311
|
+
engine_sub = engine_p.add_subparsers(dest="engine_cmd", required=True)
|
|
312
|
+
|
|
313
|
+
def _add_engine_common(p: argparse.ArgumentParser) -> None:
|
|
314
|
+
p.add_argument("--name", default="fish-speech", help="Engine name")
|
|
315
|
+
p.add_argument("--runtime", default="docker", choices=["docker", "native"])
|
|
316
|
+
p.add_argument("--api-url", default="http://127.0.0.1:8080")
|
|
317
|
+
p.add_argument(
|
|
318
|
+
"--install-dir",
|
|
319
|
+
default="~/AI/services",
|
|
320
|
+
help="Native services root directory",
|
|
321
|
+
)
|
|
322
|
+
p.add_argument("--work-dir", default=".openspeech/engines", help="Engine cache/work dir")
|
|
323
|
+
p.add_argument("--timeout", type=float, default=120.0, help="Operation timeout in seconds")
|
|
324
|
+
p.add_argument("--retries", type=int, default=0, help="Retry count")
|
|
325
|
+
p.add_argument("--image", default="", help="Docker image override")
|
|
326
|
+
p.add_argument("--container-name", default="", help="Docker container name override")
|
|
327
|
+
p.add_argument("--host-port", type=int, default=0, help="Host port override")
|
|
328
|
+
p.add_argument("--health-url", default="", help="Health URL override")
|
|
329
|
+
p.add_argument(
|
|
330
|
+
"--model-repo",
|
|
331
|
+
default="",
|
|
332
|
+
help="Native model repo id (e.g. Hugging Face model repo for sherpa-onnx)",
|
|
333
|
+
)
|
|
334
|
+
p.add_argument(
|
|
335
|
+
"--simulate-download",
|
|
336
|
+
action=argparse.BooleanOptionalAction,
|
|
337
|
+
default=None,
|
|
338
|
+
help="Whether to simulate model download if local/downloaded model is not found",
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
install_p = engine_sub.add_parser("install", help="Install local engine runtime")
|
|
342
|
+
_add_engine_common(install_p)
|
|
343
|
+
install_p.add_argument(
|
|
344
|
+
"--follow",
|
|
345
|
+
action=argparse.BooleanOptionalAction,
|
|
346
|
+
default=True,
|
|
347
|
+
help="Show live progress (default: enabled)",
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
update_p = engine_sub.add_parser("update", help="Update local engine runtime")
|
|
351
|
+
_add_engine_common(update_p)
|
|
352
|
+
update_p.add_argument(
|
|
353
|
+
"--follow",
|
|
354
|
+
action=argparse.BooleanOptionalAction,
|
|
355
|
+
default=True,
|
|
356
|
+
help="Show live progress (default: enabled)",
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
start_p = engine_sub.add_parser("start", help="Start local engine runtime")
|
|
360
|
+
_add_engine_common(start_p)
|
|
361
|
+
start_p.add_argument(
|
|
362
|
+
"--follow",
|
|
363
|
+
action=argparse.BooleanOptionalAction,
|
|
364
|
+
default=True,
|
|
365
|
+
help="Show live progress (default: enabled)",
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
stop_p = engine_sub.add_parser("stop", help="Stop local engine runtime")
|
|
369
|
+
_add_engine_common(stop_p)
|
|
370
|
+
stop_p.add_argument(
|
|
371
|
+
"--follow",
|
|
372
|
+
action=argparse.BooleanOptionalAction,
|
|
373
|
+
default=True,
|
|
374
|
+
help="Show live progress (default: enabled)",
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
status_p = engine_sub.add_parser("status", help="Get runtime status")
|
|
378
|
+
_add_engine_common(status_p)
|
|
379
|
+
|
|
380
|
+
logs_p = engine_sub.add_parser("logs", help="Show engine logs")
|
|
381
|
+
_add_engine_common(logs_p)
|
|
382
|
+
logs_p.add_argument("--lines", type=int, default=100, help="Tail line count")
|
|
383
|
+
|
|
384
|
+
task_p = engine_sub.add_parser("task", help="Inspect engine tasks")
|
|
385
|
+
task_sub = task_p.add_subparsers(dest="engine_task_cmd", required=True)
|
|
386
|
+
task_status_p = task_sub.add_parser("status", help="Get task details")
|
|
387
|
+
task_status_p.add_argument("--task-id", required=True, help="Task ID")
|
|
388
|
+
task_list_p = task_sub.add_parser("list", help="List recent tasks")
|
|
389
|
+
task_list_p.add_argument("--name", default=None, help="Filter by engine name")
|
|
390
|
+
task_list_p.add_argument("--limit", type=int, default=20, help="Max rows")
|
|
391
|
+
task_follow_p = task_sub.add_parser("follow", help="Poll task updates")
|
|
392
|
+
task_follow_p.add_argument("--task-id", required=True, help="Task ID")
|
|
393
|
+
task_follow_p.add_argument("--interval", type=float, default=1.0, help="Poll interval (seconds)")
|
|
394
|
+
task_follow_p.add_argument("--timeout", type=float, default=600.0, help="Max follow time (seconds)")
|
|
395
|
+
task_cancel_p = task_sub.add_parser("cancel", help="Request cancellation for a task")
|
|
396
|
+
task_cancel_p.add_argument("--task-id", required=True, help="Task ID")
|
|
397
|
+
|
|
398
|
+
args = parser.parse_args(argv)
|
|
399
|
+
_apply_logging_args(args)
|
|
400
|
+
if args.command is None:
|
|
401
|
+
parser.print_help()
|
|
402
|
+
return
|
|
403
|
+
commands = {
|
|
404
|
+
"list": _cmd_list,
|
|
405
|
+
"check": _cmd_check,
|
|
406
|
+
"serve": _cmd_serve,
|
|
407
|
+
"engine": _cmd_engine,
|
|
408
|
+
}
|
|
409
|
+
commands[args.command](args)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
if __name__ == "__main__":
|
|
413
|
+
main()
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Python thin client for OpenSpeech HTTP/WebSocket server."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from openspeech.core.enums import AudioFormat
|
|
9
|
+
from openspeech.core.models import AudioData, AudioChunk, STTOptions, TTSOptions, Transcription, Word
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _ClientSTT:
|
|
13
|
+
def __init__(self, client: "Client") -> None:
|
|
14
|
+
self._client = client
|
|
15
|
+
|
|
16
|
+
async def transcribe(
|
|
17
|
+
self, provider: str, audio: AudioData, opts: STTOptions | None = None
|
|
18
|
+
) -> Transcription:
|
|
19
|
+
opts = opts or STTOptions()
|
|
20
|
+
data = {"provider": provider}
|
|
21
|
+
if opts.language:
|
|
22
|
+
data["language"] = opts.language
|
|
23
|
+
if opts.prompt:
|
|
24
|
+
data["prompt"] = opts.prompt
|
|
25
|
+
if opts.temperature is not None:
|
|
26
|
+
data["temperature"] = str(opts.temperature)
|
|
27
|
+
if opts.model:
|
|
28
|
+
data["model"] = opts.model
|
|
29
|
+
if opts.device:
|
|
30
|
+
data["device"] = opts.device
|
|
31
|
+
if opts.beam_size is not None:
|
|
32
|
+
data["beam_size"] = str(opts.beam_size)
|
|
33
|
+
if opts.compute_type:
|
|
34
|
+
data["compute_type"] = opts.compute_type
|
|
35
|
+
if opts.fp16 is not None:
|
|
36
|
+
data["fp16"] = str(bool(opts.fp16)).lower()
|
|
37
|
+
|
|
38
|
+
files = {"audio": ("audio.wav", audio.data, "audio/wav")}
|
|
39
|
+
resp = await self._client._http.post(
|
|
40
|
+
f"{self._client._base_url}/v1/stt/transcribe",
|
|
41
|
+
data=data, files=files,
|
|
42
|
+
)
|
|
43
|
+
resp.raise_for_status()
|
|
44
|
+
j = resp.json()
|
|
45
|
+
|
|
46
|
+
words = None
|
|
47
|
+
if j.get("words"):
|
|
48
|
+
words = [Word(text=w["text"], start_ms=w["start_ms"], end_ms=w["end_ms"],
|
|
49
|
+
confidence=w.get("confidence")) for w in j["words"]]
|
|
50
|
+
|
|
51
|
+
return Transcription(
|
|
52
|
+
text=j["text"],
|
|
53
|
+
language=j.get("language"),
|
|
54
|
+
confidence=j.get("confidence"),
|
|
55
|
+
words=words,
|
|
56
|
+
duration_ms=j.get("duration_ms"),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
async def fanout(
|
|
60
|
+
self, providers: list[str], audio: AudioData,
|
|
61
|
+
opts: STTOptions | None = None, strategy: str = "first_completed",
|
|
62
|
+
) -> Any:
|
|
63
|
+
opts = opts or STTOptions()
|
|
64
|
+
data = {
|
|
65
|
+
"providers": ",".join(providers),
|
|
66
|
+
"strategy": strategy,
|
|
67
|
+
}
|
|
68
|
+
if opts.language:
|
|
69
|
+
data["language"] = opts.language
|
|
70
|
+
if opts.model:
|
|
71
|
+
data["model"] = opts.model
|
|
72
|
+
if opts.device:
|
|
73
|
+
data["device"] = opts.device
|
|
74
|
+
if opts.beam_size is not None:
|
|
75
|
+
data["beam_size"] = str(opts.beam_size)
|
|
76
|
+
|
|
77
|
+
files = {"audio": ("audio.wav", audio.data, "audio/wav")}
|
|
78
|
+
resp = await self._client._http.post(
|
|
79
|
+
f"{self._client._base_url}/v1/stt/transcribe/fanout",
|
|
80
|
+
data=data, files=files,
|
|
81
|
+
)
|
|
82
|
+
resp.raise_for_status()
|
|
83
|
+
return resp.json()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class _ClientTTS:
|
|
87
|
+
def __init__(self, client: "Client") -> None:
|
|
88
|
+
self._client = client
|
|
89
|
+
|
|
90
|
+
async def synthesize(
|
|
91
|
+
self, provider: str, text: str, opts: TTSOptions | None = None
|
|
92
|
+
) -> AudioData:
|
|
93
|
+
opts = opts or TTSOptions()
|
|
94
|
+
body = {"text": text, "provider": provider}
|
|
95
|
+
if opts.voice:
|
|
96
|
+
body["voice"] = opts.voice
|
|
97
|
+
if opts.speed != 1.0:
|
|
98
|
+
body["speed"] = opts.speed
|
|
99
|
+
|
|
100
|
+
resp = await self._client._http.post(
|
|
101
|
+
f"{self._client._base_url}/v1/tts/synthesize",
|
|
102
|
+
json=body,
|
|
103
|
+
)
|
|
104
|
+
resp.raise_for_status()
|
|
105
|
+
|
|
106
|
+
sample_rate = int(resp.headers.get("X-Sample-Rate", "16000"))
|
|
107
|
+
return AudioData(
|
|
108
|
+
data=resp.content,
|
|
109
|
+
sample_rate=sample_rate,
|
|
110
|
+
channels=1,
|
|
111
|
+
format=AudioFormat.WAV,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class Client:
|
|
116
|
+
"""OpenSpeech HTTP client — same interface as ServiceDispatcher."""
|
|
117
|
+
|
|
118
|
+
def __init__(self, base_url: str, timeout: float = 30.0) -> None:
|
|
119
|
+
self._base_url = base_url.rstrip("/")
|
|
120
|
+
self._http = httpx.AsyncClient(timeout=timeout)
|
|
121
|
+
self.stt = _ClientSTT(self)
|
|
122
|
+
self.tts = _ClientTTS(self)
|
|
123
|
+
|
|
124
|
+
async def close(self) -> None:
|
|
125
|
+
await self._http.aclose()
|
|
126
|
+
|
|
127
|
+
async def __aenter__(self) -> "Client":
|
|
128
|
+
return self
|
|
129
|
+
|
|
130
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
131
|
+
await self.close()
|
|
132
|
+
|
|
133
|
+
async def list_engines(self) -> list[dict]:
|
|
134
|
+
resp = await self._http.get(f"{self._base_url}/v1/engines")
|
|
135
|
+
resp.raise_for_status()
|
|
136
|
+
return resp.json()["engines"]
|
|
137
|
+
|
|
138
|
+
# Backward compatibility alias
|
|
139
|
+
async def list_providers(self) -> list[dict]:
|
|
140
|
+
return await self.list_engines()
|
|
141
|
+
|
|
142
|
+
async def health(self) -> dict:
|
|
143
|
+
resp = await self._http.get(f"{self._base_url}/v1/health")
|
|
144
|
+
resp.raise_for_status()
|
|
145
|
+
return resp.json()
|