openspeechapi 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openspeech/__init__.py +75 -0
- openspeech/__main__.py +5 -0
- openspeech/cli.py +413 -0
- openspeech/client/__init__.py +4 -0
- openspeech/client/client.py +145 -0
- openspeech/config.py +212 -0
- openspeech/core/__init__.py +0 -0
- openspeech/core/base.py +75 -0
- openspeech/core/enums.py +39 -0
- openspeech/core/models.py +61 -0
- openspeech/core/registry.py +37 -0
- openspeech/core/settings.py +8 -0
- openspeech/demo.py +675 -0
- openspeech/dispatch/__init__.py +0 -0
- openspeech/dispatch/context.py +34 -0
- openspeech/dispatch/dispatcher.py +661 -0
- openspeech/dispatch/executors/__init__.py +0 -0
- openspeech/dispatch/executors/base.py +34 -0
- openspeech/dispatch/executors/in_process.py +66 -0
- openspeech/dispatch/executors/remote.py +64 -0
- openspeech/dispatch/executors/subprocess_exec.py +446 -0
- openspeech/dispatch/fanout.py +95 -0
- openspeech/dispatch/filters.py +73 -0
- openspeech/dispatch/lifecycle.py +178 -0
- openspeech/dispatch/watcher.py +82 -0
- openspeech/engine_catalog.py +236 -0
- openspeech/engine_registry.yaml +347 -0
- openspeech/exceptions.py +51 -0
- openspeech/factory.py +325 -0
- openspeech/local_engines/__init__.py +12 -0
- openspeech/local_engines/aim_resolver.py +91 -0
- openspeech/local_engines/backends/__init__.py +1 -0
- openspeech/local_engines/backends/docker_backend.py +490 -0
- openspeech/local_engines/backends/native_backend.py +902 -0
- openspeech/local_engines/base.py +30 -0
- openspeech/local_engines/engines/__init__.py +1 -0
- openspeech/local_engines/engines/faster_whisper.py +36 -0
- openspeech/local_engines/engines/fish_speech.py +33 -0
- openspeech/local_engines/engines/sherpa_onnx.py +56 -0
- openspeech/local_engines/engines/whisper.py +41 -0
- openspeech/local_engines/engines/whisperlivekit.py +60 -0
- openspeech/local_engines/manager.py +208 -0
- openspeech/local_engines/models.py +50 -0
- openspeech/local_engines/progress.py +69 -0
- openspeech/local_engines/registry.py +19 -0
- openspeech/local_engines/task_store.py +52 -0
- openspeech/local_engines/tasks.py +71 -0
- openspeech/logging_config.py +607 -0
- openspeech/observe/__init__.py +0 -0
- openspeech/observe/base.py +79 -0
- openspeech/observe/debug.py +44 -0
- openspeech/observe/latency.py +19 -0
- openspeech/observe/metrics.py +47 -0
- openspeech/observe/tracing.py +44 -0
- openspeech/observe/usage.py +27 -0
- openspeech/providers/__init__.py +0 -0
- openspeech/providers/_template.py +101 -0
- openspeech/providers/stt/__init__.py +0 -0
- openspeech/providers/stt/alibaba.py +86 -0
- openspeech/providers/stt/assemblyai.py +135 -0
- openspeech/providers/stt/azure_speech.py +99 -0
- openspeech/providers/stt/baidu.py +135 -0
- openspeech/providers/stt/deepgram.py +311 -0
- openspeech/providers/stt/elevenlabs.py +385 -0
- openspeech/providers/stt/faster_whisper.py +211 -0
- openspeech/providers/stt/google_cloud.py +106 -0
- openspeech/providers/stt/iflytek.py +427 -0
- openspeech/providers/stt/macos_speech.py +226 -0
- openspeech/providers/stt/openai.py +84 -0
- openspeech/providers/stt/sherpa_onnx.py +353 -0
- openspeech/providers/stt/tencent.py +212 -0
- openspeech/providers/stt/volcengine.py +107 -0
- openspeech/providers/stt/whisper.py +153 -0
- openspeech/providers/stt/whisperlivekit.py +530 -0
- openspeech/providers/stt/windows_speech.py +249 -0
- openspeech/providers/tts/__init__.py +0 -0
- openspeech/providers/tts/alibaba.py +95 -0
- openspeech/providers/tts/azure_speech.py +123 -0
- openspeech/providers/tts/baidu.py +143 -0
- openspeech/providers/tts/coqui.py +64 -0
- openspeech/providers/tts/cosyvoice.py +90 -0
- openspeech/providers/tts/deepgram.py +174 -0
- openspeech/providers/tts/elevenlabs.py +311 -0
- openspeech/providers/tts/fish_speech.py +158 -0
- openspeech/providers/tts/google_cloud.py +107 -0
- openspeech/providers/tts/iflytek.py +209 -0
- openspeech/providers/tts/macos_say.py +251 -0
- openspeech/providers/tts/minimax.py +122 -0
- openspeech/providers/tts/openai.py +104 -0
- openspeech/providers/tts/piper.py +104 -0
- openspeech/providers/tts/tencent.py +189 -0
- openspeech/providers/tts/volcengine.py +117 -0
- openspeech/providers/tts/windows_sapi.py +234 -0
- openspeech/server/__init__.py +1 -0
- openspeech/server/app.py +72 -0
- openspeech/server/auth.py +42 -0
- openspeech/server/middleware.py +75 -0
- openspeech/server/routes/__init__.py +1 -0
- openspeech/server/routes/management.py +848 -0
- openspeech/server/routes/stt.py +121 -0
- openspeech/server/routes/tts.py +159 -0
- openspeech/server/routes/webui.py +29 -0
- openspeech/server/webui/app.js +2649 -0
- openspeech/server/webui/index.html +216 -0
- openspeech/server/webui/styles.css +617 -0
- openspeech/server/ws/__init__.py +1 -0
- openspeech/server/ws/stt_stream.py +263 -0
- openspeech/server/ws/tts_stream.py +207 -0
- openspeech/telemetry/__init__.py +21 -0
- openspeech/telemetry/perf.py +307 -0
- openspeech/utils/__init__.py +5 -0
- openspeech/utils/audio_converter.py +406 -0
- openspeech/utils/audio_playback.py +156 -0
- openspeech/vendor_registry.yaml +74 -0
- openspeechapi-0.1.0.dist-info/METADATA +101 -0
- openspeechapi-0.1.0.dist-info/RECORD +118 -0
- openspeechapi-0.1.0.dist-info/WHEEL +4 -0
- openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Executor abstract base class."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from collections.abc import AsyncIterator
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from openspeech.core.base import SpeechProvider
|
|
9
|
+
from openspeech.core.settings import BaseSettings
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Executor(ABC):
|
|
13
|
+
"""Abstract executor — runs provider methods in a specific execution mode."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def start(
|
|
17
|
+
self,
|
|
18
|
+
provider_cls: type[SpeechProvider],
|
|
19
|
+
settings: BaseSettings,
|
|
20
|
+
*,
|
|
21
|
+
http_client: Any = None,
|
|
22
|
+
) -> None: ...
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
async def stop(self) -> None: ...
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
async def invoke(self, method: str, **kwargs: Any) -> Any: ...
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
async def invoke_stream(self, method: str, **kwargs: Any) -> AsyncIterator[Any]: ...
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
async def health_check(self) -> bool: ...
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""InProcessExecutor — runs provider directly in the main process."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from collections.abc import AsyncIterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
from openspeech.core.base import SpeechProvider
|
|
6
|
+
from openspeech.core.settings import BaseSettings
|
|
7
|
+
from openspeech.dispatch.executors.base import Executor
|
|
8
|
+
from openspeech.telemetry.perf import Event, PerfTimer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InProcessExecutor(Executor):
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
self._provider: SpeechProvider | None = None
|
|
14
|
+
self._provider_cls: type[SpeechProvider] | None = None
|
|
15
|
+
self._settings: BaseSettings | None = None
|
|
16
|
+
|
|
17
|
+
async def start(
|
|
18
|
+
self,
|
|
19
|
+
provider_cls: type[SpeechProvider],
|
|
20
|
+
settings: BaseSettings,
|
|
21
|
+
*,
|
|
22
|
+
http_client: Any = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
self._provider_cls = provider_cls
|
|
25
|
+
self._settings = settings
|
|
26
|
+
self._provider = provider_cls(settings=settings)
|
|
27
|
+
if http_client is not None:
|
|
28
|
+
self._provider.set_http_client(http_client)
|
|
29
|
+
await self._provider.start()
|
|
30
|
+
|
|
31
|
+
def set_provider_info(self, provider_cls: type[SpeechProvider], settings: BaseSettings) -> None:
|
|
32
|
+
"""Store provider class and settings for pre-start health checks."""
|
|
33
|
+
self._provider_cls = provider_cls
|
|
34
|
+
self._settings = settings
|
|
35
|
+
|
|
36
|
+
async def stop(self) -> None:
|
|
37
|
+
if self._provider:
|
|
38
|
+
await self._provider.stop()
|
|
39
|
+
self._provider = None
|
|
40
|
+
|
|
41
|
+
async def invoke(self, method: str, **kwargs: Any) -> Any:
|
|
42
|
+
if self._provider is None:
|
|
43
|
+
raise RuntimeError("Executor not started")
|
|
44
|
+
fn = getattr(self._provider, method)
|
|
45
|
+
with PerfTimer(Event.PROVIDER_TOTAL, method=method, exec_mode="in_process"):
|
|
46
|
+
return await fn(**kwargs)
|
|
47
|
+
|
|
48
|
+
async def invoke_stream(self, method: str, **kwargs: Any) -> AsyncIterator[Any]:
|
|
49
|
+
if self._provider is None:
|
|
50
|
+
raise RuntimeError("Executor not started")
|
|
51
|
+
fn = getattr(self._provider, method)
|
|
52
|
+
with PerfTimer(Event.PROVIDER_TOTAL, method=method, exec_mode="in_process") as t:
|
|
53
|
+
count = 0
|
|
54
|
+
async for item in fn(**kwargs):
|
|
55
|
+
count += 1
|
|
56
|
+
yield item
|
|
57
|
+
t.add(items=count)
|
|
58
|
+
|
|
59
|
+
async def health_check(self) -> bool:
|
|
60
|
+
if self._provider is not None:
|
|
61
|
+
return await self._provider.health_check()
|
|
62
|
+
# Provider not started yet — instantiate temporarily to check
|
|
63
|
+
if self._provider_cls is not None and self._settings is not None:
|
|
64
|
+
tmp = self._provider_cls(settings=self._settings)
|
|
65
|
+
return await tmp.health_check()
|
|
66
|
+
return False
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""RemoteExecutor — delegates to provider adapter that handles its own remote protocol."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from collections.abc import AsyncIterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
from openspeech.core.base import SpeechProvider
|
|
6
|
+
from openspeech.core.settings import BaseSettings
|
|
7
|
+
from openspeech.dispatch.executors.base import Executor
|
|
8
|
+
from openspeech.telemetry.perf import Event, PerfTimer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RemoteExecutor(Executor):
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
self._provider: SpeechProvider | None = None
|
|
14
|
+
self._provider_cls: type[SpeechProvider] | None = None
|
|
15
|
+
self._settings: BaseSettings | None = None
|
|
16
|
+
|
|
17
|
+
async def start(
|
|
18
|
+
self,
|
|
19
|
+
provider_cls: type[SpeechProvider],
|
|
20
|
+
settings: BaseSettings,
|
|
21
|
+
*,
|
|
22
|
+
http_client: Any = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
self._provider_cls = provider_cls
|
|
25
|
+
self._settings = settings
|
|
26
|
+
self._provider = provider_cls(settings=settings)
|
|
27
|
+
if http_client is not None:
|
|
28
|
+
self._provider.set_http_client(http_client)
|
|
29
|
+
await self._provider.start()
|
|
30
|
+
|
|
31
|
+
def set_provider_info(self, provider_cls: type[SpeechProvider], settings: BaseSettings) -> None:
|
|
32
|
+
"""Store provider class and settings for pre-start health checks."""
|
|
33
|
+
self._provider_cls = provider_cls
|
|
34
|
+
self._settings = settings
|
|
35
|
+
|
|
36
|
+
async def stop(self) -> None:
|
|
37
|
+
if self._provider:
|
|
38
|
+
await self._provider.stop()
|
|
39
|
+
self._provider = None
|
|
40
|
+
|
|
41
|
+
async def invoke(self, method: str, **kwargs: Any) -> Any:
|
|
42
|
+
if self._provider is None:
|
|
43
|
+
raise RuntimeError("Executor not started")
|
|
44
|
+
with PerfTimer(Event.PROVIDER_TOTAL, method=method, exec_mode="remote"):
|
|
45
|
+
return await getattr(self._provider, method)(**kwargs)
|
|
46
|
+
|
|
47
|
+
async def invoke_stream(self, method: str, **kwargs: Any) -> AsyncIterator[Any]:
|
|
48
|
+
if self._provider is None:
|
|
49
|
+
raise RuntimeError("Executor not started")
|
|
50
|
+
with PerfTimer(Event.PROVIDER_TOTAL, method=method, exec_mode="remote") as t:
|
|
51
|
+
count = 0
|
|
52
|
+
async for item in getattr(self._provider, method)(**kwargs):
|
|
53
|
+
count += 1
|
|
54
|
+
yield item
|
|
55
|
+
t.add(items=count)
|
|
56
|
+
|
|
57
|
+
async def health_check(self) -> bool:
|
|
58
|
+
if self._provider is not None:
|
|
59
|
+
return await self._provider.health_check()
|
|
60
|
+
# Provider not started yet — instantiate temporarily to check credentials
|
|
61
|
+
if self._provider_cls is not None and self._settings is not None:
|
|
62
|
+
tmp = self._provider_cls(settings=self._settings)
|
|
63
|
+
return await tmp.health_check()
|
|
64
|
+
return False
|
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
"""SubprocessExecutor — runs provider in a worker subprocess via UDS + msgpack IPC."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import dataclasses
|
|
6
|
+
import enum
|
|
7
|
+
import importlib
|
|
8
|
+
import os
|
|
9
|
+
import pickle
|
|
10
|
+
import struct
|
|
11
|
+
import sys
|
|
12
|
+
import tempfile
|
|
13
|
+
import time
|
|
14
|
+
from collections.abc import AsyncIterator
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import msgpack
|
|
19
|
+
|
|
20
|
+
from openspeech.core.base import SpeechProvider
|
|
21
|
+
from openspeech.core.settings import BaseSettings
|
|
22
|
+
from openspeech.dispatch.executors.base import Executor
|
|
23
|
+
from openspeech.telemetry.perf import Event, PerfTimer
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Wire protocol helpers
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Messages are length-prefixed msgpack: 4-byte big-endian uint32 + payload
|
|
29
|
+
|
|
30
|
+
_HEADER = struct.Struct(">I")
|
|
31
|
+
_PROXY_ENV_KEYS = (
|
|
32
|
+
"HTTP_PROXY",
|
|
33
|
+
"HTTPS_PROXY",
|
|
34
|
+
"ALL_PROXY",
|
|
35
|
+
"http_proxy",
|
|
36
|
+
"https_proxy",
|
|
37
|
+
"all_proxy",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _encode(obj: Any) -> bytes:
|
|
42
|
+
payload = msgpack.packb(obj, use_bin_type=True)
|
|
43
|
+
return _HEADER.pack(len(payload)) + payload
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _decode_obj(data: bytes) -> Any:
|
|
47
|
+
return msgpack.unpackb(data, raw=False)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def _read_msg(reader: asyncio.StreamReader) -> Any:
|
|
51
|
+
header = await reader.readexactly(_HEADER.size)
|
|
52
|
+
(length,) = _HEADER.unpack(header)
|
|
53
|
+
payload = await reader.readexactly(length)
|
|
54
|
+
return _decode_obj(payload)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
async def _write_msg(writer: asyncio.StreamWriter, obj: Any) -> None:
|
|
58
|
+
writer.write(_encode(obj))
|
|
59
|
+
await writer.drain()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
# Dataclass (de)serialization helpers
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
def _serialize_arg(value: Any) -> Any:
|
|
67
|
+
"""Recursively serialize a value so it can be packed with msgpack."""
|
|
68
|
+
if dataclasses.is_dataclass(value) and not isinstance(value, type):
|
|
69
|
+
d = dataclasses.asdict(value)
|
|
70
|
+
return {"__dataclass__": type(value).__module__ + "." + type(value).__qualname__, "fields": d}
|
|
71
|
+
if isinstance(value, enum.Enum):
|
|
72
|
+
return {"__enum__": type(value).__module__ + "." + type(value).__qualname__, "value": value.value}
|
|
73
|
+
if isinstance(value, bytes):
|
|
74
|
+
return value # msgpack handles bytes natively with use_bin_type=True
|
|
75
|
+
if isinstance(value, dict):
|
|
76
|
+
return {k: _serialize_arg(v) for k, v in value.items()}
|
|
77
|
+
if isinstance(value, (list, tuple)):
|
|
78
|
+
return [_serialize_arg(i) for i in value]
|
|
79
|
+
return value
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _deserialize_arg(value: Any) -> Any:
|
|
83
|
+
"""Recursively deserialize msgpack-decoded value back to Python objects."""
|
|
84
|
+
if isinstance(value, dict):
|
|
85
|
+
if "__dataclass__" in value:
|
|
86
|
+
# Reconstruct the dataclass
|
|
87
|
+
cls = _import_dotted(value["__dataclass__"])
|
|
88
|
+
fields = {k: _deserialize_arg(v) for k, v in value["fields"].items()}
|
|
89
|
+
# Reconstruct enum fields based on type hints
|
|
90
|
+
fields = _fix_enum_fields(cls, fields)
|
|
91
|
+
return cls(**fields)
|
|
92
|
+
if "__enum__" in value:
|
|
93
|
+
cls = _import_dotted(value["__enum__"])
|
|
94
|
+
return cls(value["value"])
|
|
95
|
+
return {k: _deserialize_arg(v) for k, v in value.items()}
|
|
96
|
+
if isinstance(value, list):
|
|
97
|
+
return [_deserialize_arg(i) for i in value]
|
|
98
|
+
return value
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _import_dotted(dotted: str) -> Any:
|
|
102
|
+
"""Import a dotted-path class, e.g. 'openspeech.core.enums.AudioFormat'."""
|
|
103
|
+
parts = dotted.rsplit(".", 1)
|
|
104
|
+
module_path, cls_name = parts[0], parts[1]
|
|
105
|
+
module = importlib.import_module(module_path)
|
|
106
|
+
return getattr(module, cls_name)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _fix_enum_fields(cls: type, fields: dict[str, Any]) -> dict[str, Any]:
|
|
110
|
+
"""For a dataclass cls, coerce any str values back to their Enum type when needed."""
|
|
111
|
+
import typing
|
|
112
|
+
hints = {}
|
|
113
|
+
try:
|
|
114
|
+
hints = typing.get_type_hints(cls)
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
result = {}
|
|
118
|
+
for field_name, field_value in fields.items():
|
|
119
|
+
hint = hints.get(field_name)
|
|
120
|
+
if hint is not None and isinstance(field_value, str):
|
|
121
|
+
# Unwrap Optional[X] → X
|
|
122
|
+
origin = getattr(hint, "__origin__", None)
|
|
123
|
+
args = getattr(hint, "__args__", ())
|
|
124
|
+
if origin is type(None):
|
|
125
|
+
pass
|
|
126
|
+
elif origin is not None and args:
|
|
127
|
+
# Handle Optional[Enum] = Union[Enum, None]
|
|
128
|
+
for arg in args:
|
|
129
|
+
if isinstance(arg, type) and issubclass(arg, enum.Enum):
|
|
130
|
+
try:
|
|
131
|
+
field_value = arg(field_value)
|
|
132
|
+
except ValueError:
|
|
133
|
+
pass
|
|
134
|
+
break
|
|
135
|
+
elif isinstance(hint, type) and issubclass(hint, enum.Enum):
|
|
136
|
+
try:
|
|
137
|
+
field_value = hint(field_value)
|
|
138
|
+
except ValueError:
|
|
139
|
+
pass
|
|
140
|
+
result[field_name] = field_value
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
# Worker process entry point
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def _worker_main(provider_pickle: bytes, socket_path: str) -> None:
|
|
149
|
+
"""Entry point for the worker subprocess. Runs the provider and serves IPC requests."""
|
|
150
|
+
asyncio.run(_worker_serve(provider_pickle, socket_path))
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def _worker_serve(provider_pickle: bytes, socket_path: str) -> None:
|
|
154
|
+
provider: SpeechProvider = pickle.loads(provider_pickle)
|
|
155
|
+
|
|
156
|
+
async def handle_client(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
|
157
|
+
try:
|
|
158
|
+
while True:
|
|
159
|
+
try:
|
|
160
|
+
msg = await _read_msg(reader)
|
|
161
|
+
except (asyncio.IncompleteReadError, ConnectionResetError, EOFError):
|
|
162
|
+
break
|
|
163
|
+
|
|
164
|
+
cmd = msg.get("cmd")
|
|
165
|
+
|
|
166
|
+
if cmd == "start":
|
|
167
|
+
try:
|
|
168
|
+
await provider.start()
|
|
169
|
+
await _write_msg(writer, {"ok": True})
|
|
170
|
+
except Exception as exc:
|
|
171
|
+
await _write_msg(writer, {"ok": False, "error": str(exc)})
|
|
172
|
+
|
|
173
|
+
elif cmd == "stop":
|
|
174
|
+
try:
|
|
175
|
+
await provider.stop()
|
|
176
|
+
await _write_msg(writer, {"ok": True})
|
|
177
|
+
except Exception as exc:
|
|
178
|
+
await _write_msg(writer, {"ok": False, "error": str(exc)})
|
|
179
|
+
break
|
|
180
|
+
|
|
181
|
+
elif cmd == "health":
|
|
182
|
+
try:
|
|
183
|
+
result = await provider.health_check()
|
|
184
|
+
await _write_msg(writer, {"ok": True, "result": result})
|
|
185
|
+
except Exception as exc:
|
|
186
|
+
await _write_msg(writer, {"ok": False, "error": str(exc)})
|
|
187
|
+
|
|
188
|
+
elif cmd == "invoke":
|
|
189
|
+
method = msg["method"]
|
|
190
|
+
raw_kwargs = msg.get("kwargs", {})
|
|
191
|
+
kwargs = {k: _deserialize_arg(v) for k, v in raw_kwargs.items()}
|
|
192
|
+
try:
|
|
193
|
+
fn = getattr(provider, method)
|
|
194
|
+
result = await fn(**kwargs)
|
|
195
|
+
serialized = _serialize_arg(result)
|
|
196
|
+
await _write_msg(writer, {"ok": True, "result": serialized})
|
|
197
|
+
except Exception as exc:
|
|
198
|
+
await _write_msg(writer, {"ok": False, "error": str(exc)})
|
|
199
|
+
|
|
200
|
+
elif cmd == "invoke_stream":
|
|
201
|
+
method = msg["method"]
|
|
202
|
+
raw_kwargs = msg.get("kwargs", {})
|
|
203
|
+
kwargs = {k: _deserialize_arg(v) for k, v in raw_kwargs.items()}
|
|
204
|
+
try:
|
|
205
|
+
fn = getattr(provider, method)
|
|
206
|
+
async for item in fn(**kwargs):
|
|
207
|
+
serialized = _serialize_arg(item)
|
|
208
|
+
await _write_msg(writer, {"ok": True, "chunk": serialized, "done": False})
|
|
209
|
+
await _write_msg(writer, {"ok": True, "done": True})
|
|
210
|
+
except Exception as exc:
|
|
211
|
+
await _write_msg(writer, {"ok": False, "error": str(exc)})
|
|
212
|
+
|
|
213
|
+
else:
|
|
214
|
+
await _write_msg(writer, {"ok": False, "error": f"Unknown command: {cmd}"})
|
|
215
|
+
finally:
|
|
216
|
+
writer.close()
|
|
217
|
+
try:
|
|
218
|
+
await writer.wait_closed()
|
|
219
|
+
except Exception:
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
server = await asyncio.start_unix_server(handle_client, path=socket_path)
|
|
223
|
+
async with server:
|
|
224
|
+
await server.serve_forever()
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _spawn_worker(provider_pickle: bytes, socket_path: str) -> "asyncio.subprocess.Process":
|
|
228
|
+
"""Spawn the worker as a subprocess using the same Python interpreter."""
|
|
229
|
+
import openspeech
|
|
230
|
+
pkg_root = str(Path(openspeech.__file__).parent.parent)
|
|
231
|
+
|
|
232
|
+
# Build a self-contained script that imports _worker_main and runs it
|
|
233
|
+
script = (
|
|
234
|
+
"import sys, pickle\n"
|
|
235
|
+
f"sys.path.insert(0, {pkg_root!r})\n"
|
|
236
|
+
"from openspeech.dispatch.executors.subprocess_exec import _worker_main\n"
|
|
237
|
+
f"provider_pickle = {provider_pickle!r}\n"
|
|
238
|
+
f"socket_path = {socket_path!r}\n"
|
|
239
|
+
"_worker_main(provider_pickle, socket_path)\n"
|
|
240
|
+
)
|
|
241
|
+
return script
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# ---------------------------------------------------------------------------
|
|
245
|
+
# SubprocessExecutor
|
|
246
|
+
# ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
class SubprocessExecutor(Executor):
|
|
249
|
+
"""Executor that runs the provider inside a worker subprocess.
|
|
250
|
+
|
|
251
|
+
Communication uses Unix Domain Sockets with length-prefixed msgpack messages.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
def __init__(self, restart_on_crash: bool = True, restart_delay_s: float = 1.0) -> None:
|
|
255
|
+
self._provider_cls: type[SpeechProvider] | None = None
|
|
256
|
+
self._settings: BaseSettings | None = None
|
|
257
|
+
self._process: asyncio.subprocess.Process | None = None
|
|
258
|
+
self._socket_path: str | None = None
|
|
259
|
+
self._reader: asyncio.StreamReader | None = None
|
|
260
|
+
self._writer: asyncio.StreamWriter | None = None
|
|
261
|
+
self._restart_on_crash = restart_on_crash
|
|
262
|
+
self._restart_delay_s = restart_delay_s
|
|
263
|
+
self._started = False
|
|
264
|
+
self._tmpdir: tempfile.TemporaryDirectory | None = None
|
|
265
|
+
|
|
266
|
+
# ------------------------------------------------------------------
|
|
267
|
+
# Lifecycle
|
|
268
|
+
# ------------------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
async def start(self, provider_cls: type[SpeechProvider], settings: BaseSettings, *, http_client: Any = None) -> None:
|
|
271
|
+
self._provider_cls = provider_cls
|
|
272
|
+
self._settings = settings
|
|
273
|
+
self._tmpdir = tempfile.TemporaryDirectory(prefix="openspeech_")
|
|
274
|
+
self._socket_path = os.path.join(self._tmpdir.name, "worker.sock")
|
|
275
|
+
await self._launch_worker()
|
|
276
|
+
self._started = True
|
|
277
|
+
|
|
278
|
+
async def _launch_worker(self) -> None:
|
|
279
|
+
"""Spawn the subprocess and establish the UDS connection."""
|
|
280
|
+
assert self._provider_cls is not None
|
|
281
|
+
assert self._settings is not None
|
|
282
|
+
assert self._socket_path is not None
|
|
283
|
+
|
|
284
|
+
# Instantiate provider (no start) and pickle it
|
|
285
|
+
provider_instance = self._provider_cls(settings=self._settings)
|
|
286
|
+
provider_pickle = pickle.dumps(provider_instance)
|
|
287
|
+
|
|
288
|
+
import openspeech
|
|
289
|
+
pkg_root = str(Path(openspeech.__file__).parent.parent)
|
|
290
|
+
|
|
291
|
+
script = (
|
|
292
|
+
"import sys, pickle\n"
|
|
293
|
+
f"sys.path.insert(0, {pkg_root!r})\n"
|
|
294
|
+
"from openspeech.dispatch.executors.subprocess_exec import _worker_main\n"
|
|
295
|
+
f"provider_pickle = {provider_pickle!r}\n"
|
|
296
|
+
f"socket_path = {self._socket_path!r}\n"
|
|
297
|
+
"_worker_main(provider_pickle, socket_path)\n"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
env = dict(os.environ)
|
|
301
|
+
for key in _PROXY_ENV_KEYS:
|
|
302
|
+
env.pop(key, None)
|
|
303
|
+
|
|
304
|
+
self._process = await asyncio.create_subprocess_exec(
|
|
305
|
+
sys.executable, "-c", script,
|
|
306
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
307
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
308
|
+
env=env,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Wait for socket to appear (worker needs a moment to bind)
|
|
312
|
+
await self._wait_for_socket(self._socket_path, timeout=10.0)
|
|
313
|
+
|
|
314
|
+
self._reader, self._writer = await asyncio.open_unix_connection(self._socket_path)
|
|
315
|
+
|
|
316
|
+
# Tell the worker to start the provider
|
|
317
|
+
await _write_msg(self._writer, {"cmd": "start"})
|
|
318
|
+
resp = await _read_msg(self._reader)
|
|
319
|
+
if not resp.get("ok"):
|
|
320
|
+
raise RuntimeError(f"Worker start failed: {resp.get('error')}")
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
async def _wait_for_socket(path: str, timeout: float = 10.0) -> None:
|
|
324
|
+
deadline = time.monotonic() + timeout
|
|
325
|
+
while time.monotonic() < deadline:
|
|
326
|
+
if os.path.exists(path):
|
|
327
|
+
return
|
|
328
|
+
await asyncio.sleep(0.05)
|
|
329
|
+
raise RuntimeError(f"Worker socket did not appear at {path!r} within {timeout}s")
|
|
330
|
+
|
|
331
|
+
async def stop(self) -> None:
|
|
332
|
+
self._started = False
|
|
333
|
+
await self._close_connection(send_stop=True)
|
|
334
|
+
await self._kill_process()
|
|
335
|
+
if self._tmpdir is not None:
|
|
336
|
+
try:
|
|
337
|
+
self._tmpdir.cleanup()
|
|
338
|
+
except Exception:
|
|
339
|
+
pass
|
|
340
|
+
self._tmpdir = None
|
|
341
|
+
|
|
342
|
+
async def _close_connection(self, send_stop: bool = False) -> None:
|
|
343
|
+
if self._writer is not None:
|
|
344
|
+
try:
|
|
345
|
+
if send_stop:
|
|
346
|
+
await _write_msg(self._writer, {"cmd": "stop"})
|
|
347
|
+
try:
|
|
348
|
+
await asyncio.wait_for(_read_msg(self._reader), timeout=2.0)
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
except Exception:
|
|
352
|
+
pass
|
|
353
|
+
try:
|
|
354
|
+
self._writer.close()
|
|
355
|
+
await asyncio.wait_for(self._writer.wait_closed(), timeout=1.0)
|
|
356
|
+
except Exception:
|
|
357
|
+
pass
|
|
358
|
+
self._reader = None
|
|
359
|
+
self._writer = None
|
|
360
|
+
|
|
361
|
+
async def _kill_process(self) -> None:
|
|
362
|
+
if self._process is not None:
|
|
363
|
+
try:
|
|
364
|
+
self._process.terminate()
|
|
365
|
+
await asyncio.wait_for(self._process.wait(), timeout=3.0)
|
|
366
|
+
except Exception:
|
|
367
|
+
try:
|
|
368
|
+
self._process.kill()
|
|
369
|
+
except Exception:
|
|
370
|
+
pass
|
|
371
|
+
self._process = None
|
|
372
|
+
|
|
373
|
+
# ------------------------------------------------------------------
|
|
374
|
+
# Crash detection & restart
|
|
375
|
+
# ------------------------------------------------------------------
|
|
376
|
+
|
|
377
|
+
async def _check_alive(self) -> bool:
|
|
378
|
+
if self._process is None:
|
|
379
|
+
return False
|
|
380
|
+
return self._process.returncode is None
|
|
381
|
+
|
|
382
|
+
async def _maybe_restart(self) -> None:
|
|
383
|
+
if not self._restart_on_crash or not self._started:
|
|
384
|
+
raise RuntimeError("Worker process crashed and restart is disabled")
|
|
385
|
+
await asyncio.sleep(self._restart_delay_s)
|
|
386
|
+
# Clean up old connection
|
|
387
|
+
await self._close_connection(send_stop=False)
|
|
388
|
+
await self._kill_process()
|
|
389
|
+
# New socket path
|
|
390
|
+
self._socket_path = os.path.join(self._tmpdir.name, f"worker_{time.monotonic_ns()}.sock")
|
|
391
|
+
await self._launch_worker()
|
|
392
|
+
|
|
393
|
+
# ------------------------------------------------------------------
|
|
394
|
+
# Operations
|
|
395
|
+
# ------------------------------------------------------------------
|
|
396
|
+
|
|
397
|
+
async def invoke(self, method: str, **kwargs: Any) -> Any:
|
|
398
|
+
if not self._started or self._writer is None:
|
|
399
|
+
raise RuntimeError("Executor not started")
|
|
400
|
+
|
|
401
|
+
if not await self._check_alive():
|
|
402
|
+
await self._maybe_restart()
|
|
403
|
+
|
|
404
|
+
with PerfTimer(Event.PROVIDER_TOTAL, method=method, exec_mode="subprocess"):
|
|
405
|
+
serialized_kwargs = {k: _serialize_arg(v) for k, v in kwargs.items()}
|
|
406
|
+
await _write_msg(self._writer, {"cmd": "invoke", "method": method, "kwargs": serialized_kwargs})
|
|
407
|
+
resp = await _read_msg(self._reader)
|
|
408
|
+
if not resp.get("ok"):
|
|
409
|
+
raise RuntimeError(resp.get("error", "Unknown error"))
|
|
410
|
+
return _deserialize_arg(resp.get("result"))
|
|
411
|
+
|
|
412
|
+
async def invoke_stream(self, method: str, **kwargs: Any) -> AsyncIterator[Any]:
|
|
413
|
+
if not self._started or self._writer is None:
|
|
414
|
+
raise RuntimeError("Executor not started")
|
|
415
|
+
|
|
416
|
+
if not await self._check_alive():
|
|
417
|
+
await self._maybe_restart()
|
|
418
|
+
|
|
419
|
+
with PerfTimer(Event.PROVIDER_TOTAL, method=method, exec_mode="subprocess") as t:
|
|
420
|
+
serialized_kwargs = {k: _serialize_arg(v) for k, v in kwargs.items()}
|
|
421
|
+
await _write_msg(self._writer, {"cmd": "invoke_stream", "method": method, "kwargs": serialized_kwargs})
|
|
422
|
+
|
|
423
|
+
count = 0
|
|
424
|
+
while True:
|
|
425
|
+
resp = await _read_msg(self._reader)
|
|
426
|
+
if not resp.get("ok"):
|
|
427
|
+
raise RuntimeError(resp.get("error", "Unknown error"))
|
|
428
|
+
if resp.get("done"):
|
|
429
|
+
break
|
|
430
|
+
count += 1
|
|
431
|
+
yield _deserialize_arg(resp["chunk"])
|
|
432
|
+
t.add(items=count)
|
|
433
|
+
|
|
434
|
+
async def health_check(self) -> bool:
|
|
435
|
+
if not self._started or self._writer is None:
|
|
436
|
+
return False
|
|
437
|
+
if not await self._check_alive():
|
|
438
|
+
return False
|
|
439
|
+
try:
|
|
440
|
+
await _write_msg(self._writer, {"cmd": "health"})
|
|
441
|
+
resp = await asyncio.wait_for(_read_msg(self._reader), timeout=5.0)
|
|
442
|
+
if not resp.get("ok"):
|
|
443
|
+
return False
|
|
444
|
+
return bool(resp.get("result", False))
|
|
445
|
+
except Exception:
|
|
446
|
+
return False
|