openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
openspeech/__init__.py ADDED
@@ -0,0 +1,75 @@
1
+ """OpenSpeechAPI — Unified speech interface for STT/TTS providers."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from openspeech.config import load_config
6
+ from openspeech.core.base import SpeechProvider, STTProvider, TTSProvider
7
+ from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
8
+ from openspeech.core.models import AudioChunk, AudioData, STTOptions, Transcription, TTSOptions, Word
9
+ from openspeech.core.registry import ProviderRegistry
10
+ from openspeech.core.settings import BaseSettings
11
+ from openspeech.dispatch.context import InvokeContext
12
+ from openspeech.dispatch.dispatcher import ServiceDispatcher
13
+ from openspeech.client.client import Client
14
+ from openspeech.factory import create_default_registry, create_provider, list_providers
15
+ from openspeech.logging_config import (
16
+ bind_context,
17
+ configure_logging,
18
+ get_integration_tag,
19
+ get_log_settings,
20
+ integrate_with_host,
21
+ is_host_managed,
22
+ openspeech_filter,
23
+ openspeech_level_filter,
24
+ )
25
+ from openspeech.telemetry.perf import Event, PerfTimer, milestone
26
+ from openspeech.utils import list_output_devices, play_audio
27
+ from openspeech.exceptions import (
28
+ ConfigError,
29
+ FanOutAllFailedError,
30
+ OpenSpeechError,
31
+ ProviderCrashedError,
32
+ ProviderError,
33
+ ProviderNotFoundError,
34
+ ProviderUnavailableError,
35
+ )
36
+
37
+ def list_configured_engines(
38
+ config_path: "str | Path",
39
+ engine_type: str | None = None,
40
+ ) -> list[dict]:
41
+ """List configured engines from a config file (without starting providers).
42
+
43
+ Lightweight query: parses YAML + resolves catalog metadata only.
44
+ Suitable for populating UI dropdowns without running a full server.
45
+
46
+ Args:
47
+ config_path: Path to ``providers.yaml`` (or equivalent config file).
48
+ engine_type: Optional filter — ``"stt"``, ``"tts"``, or ``None`` for all.
49
+
50
+ Returns:
51
+ List of engine info dicts (same schema as
52
+ :py:meth:`ServiceDispatcher.list_engines_info`).
53
+ """
54
+ from pathlib import Path as _Path
55
+
56
+ registry = create_default_registry()
57
+ dispatcher = ServiceDispatcher.from_config(_Path(config_path), registry)
58
+ return dispatcher.list_engines_info(engine_type=engine_type)
59
+
60
+
61
+ __all__ = [
62
+ "AudioChunk", "AudioData", "AudioFormat", "BaseSettings", "Capability",
63
+ "Client", "ConfigError", "Event", "ExecMode", "FanOutAllFailedError", "InvokeContext",
64
+ "PerfTimer",
65
+ "bind_context", "configure_logging", "get_integration_tag", "get_log_settings",
66
+ "integrate_with_host", "is_host_managed", "milestone",
67
+ "openspeech_filter", "openspeech_level_filter",
68
+ "create_default_registry", "create_provider", "list_configured_engines", "list_providers",
69
+ "OpenSpeechError", "ProviderCrashedError", "ProviderError",
70
+ "ProviderNotFoundError", "ProviderRegistry", "ProviderType",
71
+ "ProviderUnavailableError", "STTOptions", "STTProvider", "ServiceDispatcher",
72
+ "SpeechProvider", "TTSOptions", "TTSProvider", "Transcription", "Word",
73
+ "list_output_devices", "play_audio",
74
+ "load_config",
75
+ ]
openspeech/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running as python -m openspeech."""
2
+ from openspeech.cli import main
3
+
4
+ if __name__ == "__main__":
5
+ main()
openspeech/cli.py ADDED
@@ -0,0 +1,413 @@
1
+ """CLI entry point for OpenSpeech."""
2
+ from __future__ import annotations
3
+ import argparse
4
+ import queue
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+ from openspeech.config import load_config
9
+ from openspeech.exceptions import ConfigError
10
+ from openspeech.local_engines import EngineAction, EngineManager, RuntimeConfig
11
+ from openspeech.local_engines.models import TaskStatus
12
+ from openspeech.logging_config import configure_logging
13
+
14
+
15
+ def _cmd_list(args: argparse.Namespace) -> None:
16
+ try:
17
+ config = load_config(Path(args.config))
18
+ except ConfigError as e:
19
+ print(f"Error: {e}", file=sys.stderr)
20
+ sys.exit(1)
21
+ if not config.engines:
22
+ print("No engines configured.")
23
+ return
24
+ print(f"{'Name':<25} {'Provider':<20} {'ExecMode':<15}")
25
+ print("-" * 60)
26
+ for name, eng in config.engines.items():
27
+ print(f"{name:<25} {eng.provider:<20} {eng.exec_mode:<15}")
28
+
29
+
30
+ def _cmd_serve(args: argparse.Namespace) -> None:
31
+ """Start the OpenSpeech HTTP/WebSocket server."""
32
+ import uvicorn
33
+ from openspeech.server.app import create_app
34
+ config_path = Path(args.config)
35
+
36
+ # Build registry with all known providers (lazy imports via factory)
37
+ from openspeech.factory import create_default_registry
38
+ registry = create_default_registry()
39
+
40
+ app = create_app(config_path, registry)
41
+
42
+ config = load_config(config_path)
43
+ host = getattr(args, "host", None) or config.server.host
44
+ port = getattr(args, "port", None) or config.server.port
45
+ print(f"Starting OpenSpeech server on {host}:{port}")
46
+ uvicorn.run(app, host=host, port=port)
47
+
48
+
49
+ def _cmd_check(args: argparse.Namespace) -> None:
50
+ try:
51
+ config = load_config(Path(args.config))
52
+ print(f"Config OK: {len(config.engines)} engine(s) configured.")
53
+ except ConfigError as e:
54
+ print(f"Config error: {e}", file=sys.stderr)
55
+ sys.exit(1)
56
+
57
+
58
+ def _runtime_cfg_from_args(args: argparse.Namespace) -> RuntimeConfig:
59
+ options = {}
60
+ if getattr(args, "image", None):
61
+ options["docker_image"] = args.image
62
+ if getattr(args, "container_name", None):
63
+ options["container_name"] = args.container_name
64
+ if getattr(args, "host_port", None):
65
+ options["host_port"] = int(args.host_port)
66
+ if getattr(args, "health_url", None):
67
+ options["health_url"] = args.health_url
68
+ if getattr(args, "model_repo", None):
69
+ options["native_model_repo"] = args.model_repo
70
+ if getattr(args, "simulate_download", None) is not None:
71
+ options["native_simulate_download"] = bool(args.simulate_download)
72
+ return RuntimeConfig(
73
+ runtime=getattr(args, "runtime", "docker"),
74
+ api_url=getattr(args, "api_url", "http://127.0.0.1:8080"),
75
+ install_dir=getattr(args, "install_dir", "~/AI/services") or "~/AI/services",
76
+ work_dir=getattr(args, "work_dir", ".openspeech/engines"),
77
+ timeout_s=float(getattr(args, "timeout", 120.0)),
78
+ retries=int(getattr(args, "retries", 0)),
79
+ options=options,
80
+ )
81
+
82
+
83
+ def _print_progress_event(event) -> None:
84
+ progress = "--"
85
+ if event.progress is not None:
86
+ progress = f"{event.progress:>5.1f}%"
87
+ eta = "--:--"
88
+ if event.eta_seconds is not None:
89
+ s = max(0, int(event.eta_seconds))
90
+ h = s // 3600
91
+ m = (s % 3600) // 60
92
+ ss = s % 60
93
+ eta = f"{h:02d}:{m:02d}:{ss:02d}" if h > 0 else f"{m:02d}:{ss:02d}"
94
+ print(
95
+ f"[{event.task_id[:8]}] "
96
+ f"{event.action:<7} {event.phase:<14} {progress} ETA {eta} {event.message}"
97
+ )
98
+
99
+
100
+ def _run_engine_action(
101
+ manager: EngineManager,
102
+ name: str,
103
+ action: EngineAction,
104
+ cfg: RuntimeConfig,
105
+ follow: bool = True,
106
+ ) -> None:
107
+ task = manager.run_action_async(name, action, cfg)
108
+ event_queue = manager.emitter.subscribe(task.task_id)
109
+ while True:
110
+ try:
111
+ evt = event_queue.get(timeout=0.2)
112
+ except queue.Empty:
113
+ latest = manager.get_task(task.task_id)
114
+ if latest and latest.status.value in {"succeeded", "failed", "cancelled"}:
115
+ break
116
+ continue
117
+ if follow:
118
+ _print_progress_event(evt)
119
+ if evt.status.value in {"succeeded", "failed", "cancelled"}:
120
+ break
121
+ manager.emitter.unsubscribe(task.task_id, event_queue)
122
+
123
+ task = manager.get_task(task.task_id)
124
+ if task is not None and task.status == TaskStatus.SUCCEEDED:
125
+ print(f"Task completed: {task.task_id}")
126
+ elif task is not None:
127
+ raise RuntimeError(task.error or f"Task failed: {task.task_id}")
128
+
129
+
130
+ def _cmd_engine_task(args: argparse.Namespace, manager: EngineManager) -> None:
131
+ if args.engine_task_cmd == "status":
132
+ task = manager.get_task(args.task_id)
133
+ if task is None:
134
+ print(f"Task not found: {args.task_id}", file=sys.stderr)
135
+ sys.exit(1)
136
+ snap = task.snapshot()
137
+ keys = [
138
+ "task_id",
139
+ "engine",
140
+ "action",
141
+ "runtime",
142
+ "status",
143
+ "phase",
144
+ "progress",
145
+ "message",
146
+ "error",
147
+ "started_at",
148
+ "updated_at",
149
+ "finished_at",
150
+ ]
151
+ for k in keys:
152
+ print(f"{k}={snap.get(k)}")
153
+ return
154
+
155
+ if args.engine_task_cmd == "list":
156
+ tasks = manager.list_tasks(engine=args.name, limit=args.limit)
157
+ if not tasks:
158
+ print("No tasks.")
159
+ return
160
+ print(f"{'Task ID':<12} {'Engine':<12} {'Action':<8} {'Status':<10} {'Phase':<14} Message")
161
+ print("-" * 90)
162
+ for t in tasks:
163
+ msg = (t.message or "").replace("\n", " ")
164
+ print(
165
+ f"{t.task_id[:12]:<12} {t.engine:<12} {t.action.value:<8} "
166
+ f"{t.status.value:<10} {t.phase:<14} {msg}"
167
+ )
168
+ return
169
+
170
+ if args.engine_task_cmd == "follow":
171
+ deadline = time.monotonic() + max(1.0, args.timeout)
172
+ last_updated = ""
173
+ while time.monotonic() < deadline:
174
+ task = manager.get_task(args.task_id)
175
+ if task is None:
176
+ time.sleep(args.interval)
177
+ continue
178
+ stamp = task.updated_at.isoformat()
179
+ if stamp != last_updated:
180
+ progress = "--" if task.progress is None else f"{task.progress:>5.1f}%"
181
+ print(
182
+ f"[{task.task_id[:8]}] {task.action.value:<7} {task.phase:<14} "
183
+ f"{progress} {task.message}"
184
+ )
185
+ last_updated = stamp
186
+ if task.status.value in {"succeeded", "failed", "cancelled"}:
187
+ return
188
+ time.sleep(args.interval)
189
+ print("Follow timeout reached.")
190
+ return
191
+
192
+ if args.engine_task_cmd == "cancel":
193
+ task = manager.cancel_task(args.task_id)
194
+ if task is None:
195
+ print(f"Task not found: {args.task_id}", file=sys.stderr)
196
+ sys.exit(1)
197
+ print(f"Cancellation requested: {task.task_id}")
198
+ return
199
+
200
+
201
+ def _cmd_engine(args: argparse.Namespace) -> None:
202
+ manager = EngineManager()
203
+
204
+ try:
205
+ if args.engine_cmd == "task":
206
+ _cmd_engine_task(args, manager)
207
+ return
208
+
209
+ cfg = _runtime_cfg_from_args(args)
210
+ name = args.name
211
+ if args.engine_cmd == "status":
212
+ s = manager.status(name, cfg)
213
+ print(f"engine={s.engine}")
214
+ print(f"runtime={s.runtime}")
215
+ print(f"running={s.running}")
216
+ print(f"healthy={s.healthy}")
217
+ print(f"detail={s.detail}")
218
+ return
219
+ if args.engine_cmd == "logs":
220
+ out = manager.logs(name, cfg, lines=args.lines)
221
+ print(out or "(no logs)")
222
+ return
223
+
224
+ action_map = {
225
+ "install": EngineAction.INSTALL,
226
+ "update": EngineAction.UPDATE,
227
+ "start": EngineAction.START,
228
+ "stop": EngineAction.STOP,
229
+ }
230
+ if args.engine_cmd not in action_map:
231
+ raise ValueError(f"Unsupported engine command: {args.engine_cmd}")
232
+ _run_engine_action(
233
+ manager=manager,
234
+ name=name,
235
+ action=action_map[args.engine_cmd],
236
+ cfg=cfg,
237
+ follow=bool(args.follow),
238
+ )
239
+ except Exception as e:
240
+ print(f"Engine error: {e}", file=sys.stderr)
241
+ sys.exit(1)
242
+
243
+
244
+ def _load_dotenv() -> None:
245
+ """Load .env file if python-dotenv is available."""
246
+ try:
247
+ from dotenv import load_dotenv
248
+ load_dotenv()
249
+ except ImportError:
250
+ pass
251
+
252
+
253
+ def _add_logging_args(parser: argparse.ArgumentParser) -> None:
254
+ """Register common logging flags on ``parser``.
255
+
256
+ Values left as ``None`` fall back to ``OPENSPEECH_LOG_*`` env vars,
257
+ then to built-in defaults in ``openspeech.logging_config``.
258
+ """
259
+ parser.add_argument(
260
+ "--log-level",
261
+ default=None,
262
+ choices=["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
263
+ help="Log verbosity (env: OPENSPEECH_LOG_LEVEL). Default INFO.",
264
+ )
265
+ parser.add_argument(
266
+ "--log-format",
267
+ default=None,
268
+ choices=["text", "json"],
269
+ help="Console log format (env: OPENSPEECH_LOG_FORMAT). Default text.",
270
+ )
271
+ parser.add_argument(
272
+ "--log-dir",
273
+ default=None,
274
+ help=(
275
+ "Directory for rotating JSONL logs (env: OPENSPEECH_LOG_DIR). "
276
+ "Default 'logs'. Pass empty string to disable file logging."
277
+ ),
278
+ )
279
+ parser.add_argument(
280
+ "--log-perf",
281
+ default=None,
282
+ choices=["off", "basic", "verbose"],
283
+ help="Performance milestone verbosity (env: OPENSPEECH_LOG_PERF). Default basic.",
284
+ )
285
+
286
+
287
+ def _apply_logging_args(args: argparse.Namespace) -> None:
288
+ """Apply parsed logging flags via ``configure_logging``."""
289
+ log_dir = getattr(args, "log_dir", None)
290
+ configure_logging(
291
+ level=getattr(args, "log_level", None),
292
+ format=getattr(args, "log_format", None),
293
+ log_dir=log_dir, # None → env/default; "" → disabled
294
+ perf=getattr(args, "log_perf", None),
295
+ )
296
+
297
+
298
+ def main(argv: list[str] | None = None) -> None:
299
+ _load_dotenv()
300
+ parser = argparse.ArgumentParser(prog="openspeech", description="OpenSpeech CLI")
301
+ parser.add_argument("--config", default="providers.yaml", help="Config file path")
302
+ _add_logging_args(parser)
303
+ sub = parser.add_subparsers(dest="command")
304
+ sub.add_parser("list", help="List configured providers")
305
+ sub.add_parser("check", help="Validate configuration")
306
+ serve_p = sub.add_parser("serve", help="Start HTTP/WebSocket server")
307
+ serve_p.add_argument("--host", default="0.0.0.0", help="Bind host")
308
+ serve_p.add_argument("--port", type=int, default=8600, help="Bind port")
309
+
310
+ engine_p = sub.add_parser("engine", help="Manage local engine runtime")
311
+ engine_sub = engine_p.add_subparsers(dest="engine_cmd", required=True)
312
+
313
+ def _add_engine_common(p: argparse.ArgumentParser) -> None:
314
+ p.add_argument("--name", default="fish-speech", help="Engine name")
315
+ p.add_argument("--runtime", default="docker", choices=["docker", "native"])
316
+ p.add_argument("--api-url", default="http://127.0.0.1:8080")
317
+ p.add_argument(
318
+ "--install-dir",
319
+ default="~/AI/services",
320
+ help="Native services root directory",
321
+ )
322
+ p.add_argument("--work-dir", default=".openspeech/engines", help="Engine cache/work dir")
323
+ p.add_argument("--timeout", type=float, default=120.0, help="Operation timeout in seconds")
324
+ p.add_argument("--retries", type=int, default=0, help="Retry count")
325
+ p.add_argument("--image", default="", help="Docker image override")
326
+ p.add_argument("--container-name", default="", help="Docker container name override")
327
+ p.add_argument("--host-port", type=int, default=0, help="Host port override")
328
+ p.add_argument("--health-url", default="", help="Health URL override")
329
+ p.add_argument(
330
+ "--model-repo",
331
+ default="",
332
+ help="Native model repo id (e.g. Hugging Face model repo for sherpa-onnx)",
333
+ )
334
+ p.add_argument(
335
+ "--simulate-download",
336
+ action=argparse.BooleanOptionalAction,
337
+ default=None,
338
+ help="Whether to simulate model download if local/downloaded model is not found",
339
+ )
340
+
341
+ install_p = engine_sub.add_parser("install", help="Install local engine runtime")
342
+ _add_engine_common(install_p)
343
+ install_p.add_argument(
344
+ "--follow",
345
+ action=argparse.BooleanOptionalAction,
346
+ default=True,
347
+ help="Show live progress (default: enabled)",
348
+ )
349
+
350
+ update_p = engine_sub.add_parser("update", help="Update local engine runtime")
351
+ _add_engine_common(update_p)
352
+ update_p.add_argument(
353
+ "--follow",
354
+ action=argparse.BooleanOptionalAction,
355
+ default=True,
356
+ help="Show live progress (default: enabled)",
357
+ )
358
+
359
+ start_p = engine_sub.add_parser("start", help="Start local engine runtime")
360
+ _add_engine_common(start_p)
361
+ start_p.add_argument(
362
+ "--follow",
363
+ action=argparse.BooleanOptionalAction,
364
+ default=True,
365
+ help="Show live progress (default: enabled)",
366
+ )
367
+
368
+ stop_p = engine_sub.add_parser("stop", help="Stop local engine runtime")
369
+ _add_engine_common(stop_p)
370
+ stop_p.add_argument(
371
+ "--follow",
372
+ action=argparse.BooleanOptionalAction,
373
+ default=True,
374
+ help="Show live progress (default: enabled)",
375
+ )
376
+
377
+ status_p = engine_sub.add_parser("status", help="Get runtime status")
378
+ _add_engine_common(status_p)
379
+
380
+ logs_p = engine_sub.add_parser("logs", help="Show engine logs")
381
+ _add_engine_common(logs_p)
382
+ logs_p.add_argument("--lines", type=int, default=100, help="Tail line count")
383
+
384
+ task_p = engine_sub.add_parser("task", help="Inspect engine tasks")
385
+ task_sub = task_p.add_subparsers(dest="engine_task_cmd", required=True)
386
+ task_status_p = task_sub.add_parser("status", help="Get task details")
387
+ task_status_p.add_argument("--task-id", required=True, help="Task ID")
388
+ task_list_p = task_sub.add_parser("list", help="List recent tasks")
389
+ task_list_p.add_argument("--name", default=None, help="Filter by engine name")
390
+ task_list_p.add_argument("--limit", type=int, default=20, help="Max rows")
391
+ task_follow_p = task_sub.add_parser("follow", help="Poll task updates")
392
+ task_follow_p.add_argument("--task-id", required=True, help="Task ID")
393
+ task_follow_p.add_argument("--interval", type=float, default=1.0, help="Poll interval (seconds)")
394
+ task_follow_p.add_argument("--timeout", type=float, default=600.0, help="Max follow time (seconds)")
395
+ task_cancel_p = task_sub.add_parser("cancel", help="Request cancellation for a task")
396
+ task_cancel_p.add_argument("--task-id", required=True, help="Task ID")
397
+
398
+ args = parser.parse_args(argv)
399
+ _apply_logging_args(args)
400
+ if args.command is None:
401
+ parser.print_help()
402
+ return
403
+ commands = {
404
+ "list": _cmd_list,
405
+ "check": _cmd_check,
406
+ "serve": _cmd_serve,
407
+ "engine": _cmd_engine,
408
+ }
409
+ commands[args.command](args)
410
+
411
+
412
+ if __name__ == "__main__":
413
+ main()
@@ -0,0 +1,4 @@
1
+ """OpenSpeech Python thin client."""
2
+ from openspeech.client.client import Client
3
+
4
+ __all__ = ["Client"]
@@ -0,0 +1,145 @@
1
+ """Python thin client for OpenSpeech HTTP/WebSocket server."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any
5
+
6
+ import httpx
7
+
8
+ from openspeech.core.enums import AudioFormat
9
+ from openspeech.core.models import AudioData, AudioChunk, STTOptions, TTSOptions, Transcription, Word
10
+
11
+
12
+ class _ClientSTT:
13
+ def __init__(self, client: "Client") -> None:
14
+ self._client = client
15
+
16
+ async def transcribe(
17
+ self, provider: str, audio: AudioData, opts: STTOptions | None = None
18
+ ) -> Transcription:
19
+ opts = opts or STTOptions()
20
+ data = {"provider": provider}
21
+ if opts.language:
22
+ data["language"] = opts.language
23
+ if opts.prompt:
24
+ data["prompt"] = opts.prompt
25
+ if opts.temperature is not None:
26
+ data["temperature"] = str(opts.temperature)
27
+ if opts.model:
28
+ data["model"] = opts.model
29
+ if opts.device:
30
+ data["device"] = opts.device
31
+ if opts.beam_size is not None:
32
+ data["beam_size"] = str(opts.beam_size)
33
+ if opts.compute_type:
34
+ data["compute_type"] = opts.compute_type
35
+ if opts.fp16 is not None:
36
+ data["fp16"] = str(bool(opts.fp16)).lower()
37
+
38
+ files = {"audio": ("audio.wav", audio.data, "audio/wav")}
39
+ resp = await self._client._http.post(
40
+ f"{self._client._base_url}/v1/stt/transcribe",
41
+ data=data, files=files,
42
+ )
43
+ resp.raise_for_status()
44
+ j = resp.json()
45
+
46
+ words = None
47
+ if j.get("words"):
48
+ words = [Word(text=w["text"], start_ms=w["start_ms"], end_ms=w["end_ms"],
49
+ confidence=w.get("confidence")) for w in j["words"]]
50
+
51
+ return Transcription(
52
+ text=j["text"],
53
+ language=j.get("language"),
54
+ confidence=j.get("confidence"),
55
+ words=words,
56
+ duration_ms=j.get("duration_ms"),
57
+ )
58
+
59
+ async def fanout(
60
+ self, providers: list[str], audio: AudioData,
61
+ opts: STTOptions | None = None, strategy: str = "first_completed",
62
+ ) -> Any:
63
+ opts = opts or STTOptions()
64
+ data = {
65
+ "providers": ",".join(providers),
66
+ "strategy": strategy,
67
+ }
68
+ if opts.language:
69
+ data["language"] = opts.language
70
+ if opts.model:
71
+ data["model"] = opts.model
72
+ if opts.device:
73
+ data["device"] = opts.device
74
+ if opts.beam_size is not None:
75
+ data["beam_size"] = str(opts.beam_size)
76
+
77
+ files = {"audio": ("audio.wav", audio.data, "audio/wav")}
78
+ resp = await self._client._http.post(
79
+ f"{self._client._base_url}/v1/stt/transcribe/fanout",
80
+ data=data, files=files,
81
+ )
82
+ resp.raise_for_status()
83
+ return resp.json()
84
+
85
+
86
+ class _ClientTTS:
87
+ def __init__(self, client: "Client") -> None:
88
+ self._client = client
89
+
90
+ async def synthesize(
91
+ self, provider: str, text: str, opts: TTSOptions | None = None
92
+ ) -> AudioData:
93
+ opts = opts or TTSOptions()
94
+ body = {"text": text, "provider": provider}
95
+ if opts.voice:
96
+ body["voice"] = opts.voice
97
+ if opts.speed != 1.0:
98
+ body["speed"] = opts.speed
99
+
100
+ resp = await self._client._http.post(
101
+ f"{self._client._base_url}/v1/tts/synthesize",
102
+ json=body,
103
+ )
104
+ resp.raise_for_status()
105
+
106
+ sample_rate = int(resp.headers.get("X-Sample-Rate", "16000"))
107
+ return AudioData(
108
+ data=resp.content,
109
+ sample_rate=sample_rate,
110
+ channels=1,
111
+ format=AudioFormat.WAV,
112
+ )
113
+
114
+
115
+ class Client:
116
+ """OpenSpeech HTTP client — same interface as ServiceDispatcher."""
117
+
118
+ def __init__(self, base_url: str, timeout: float = 30.0) -> None:
119
+ self._base_url = base_url.rstrip("/")
120
+ self._http = httpx.AsyncClient(timeout=timeout)
121
+ self.stt = _ClientSTT(self)
122
+ self.tts = _ClientTTS(self)
123
+
124
+ async def close(self) -> None:
125
+ await self._http.aclose()
126
+
127
+ async def __aenter__(self) -> "Client":
128
+ return self
129
+
130
+ async def __aexit__(self, *args: Any) -> None:
131
+ await self.close()
132
+
133
+ async def list_engines(self) -> list[dict]:
134
+ resp = await self._http.get(f"{self._base_url}/v1/engines")
135
+ resp.raise_for_status()
136
+ return resp.json()["engines"]
137
+
138
+ # Backward compatibility alias
139
+ async def list_providers(self) -> list[dict]:
140
+ return await self.list_engines()
141
+
142
+ async def health(self) -> dict:
143
+ resp = await self._http.get(f"{self._base_url}/v1/health")
144
+ resp.raise_for_status()
145
+ return resp.json()