codex-autorunner 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codex_autorunner/__init__.py +3 -0
- codex_autorunner/bootstrap.py +151 -0
- codex_autorunner/cli.py +886 -0
- codex_autorunner/codex_cli.py +79 -0
- codex_autorunner/codex_runner.py +17 -0
- codex_autorunner/core/__init__.py +1 -0
- codex_autorunner/core/about_car.py +125 -0
- codex_autorunner/core/codex_runner.py +100 -0
- codex_autorunner/core/config.py +1465 -0
- codex_autorunner/core/doc_chat.py +547 -0
- codex_autorunner/core/docs.py +37 -0
- codex_autorunner/core/engine.py +720 -0
- codex_autorunner/core/git_utils.py +206 -0
- codex_autorunner/core/hub.py +756 -0
- codex_autorunner/core/injected_context.py +9 -0
- codex_autorunner/core/locks.py +57 -0
- codex_autorunner/core/logging_utils.py +158 -0
- codex_autorunner/core/notifications.py +465 -0
- codex_autorunner/core/optional_dependencies.py +41 -0
- codex_autorunner/core/prompt.py +107 -0
- codex_autorunner/core/prompts.py +275 -0
- codex_autorunner/core/request_context.py +21 -0
- codex_autorunner/core/runner_controller.py +116 -0
- codex_autorunner/core/runner_process.py +29 -0
- codex_autorunner/core/snapshot.py +576 -0
- codex_autorunner/core/state.py +156 -0
- codex_autorunner/core/update.py +567 -0
- codex_autorunner/core/update_runner.py +44 -0
- codex_autorunner/core/usage.py +1221 -0
- codex_autorunner/core/utils.py +108 -0
- codex_autorunner/discovery.py +102 -0
- codex_autorunner/housekeeping.py +423 -0
- codex_autorunner/integrations/__init__.py +1 -0
- codex_autorunner/integrations/app_server/__init__.py +6 -0
- codex_autorunner/integrations/app_server/client.py +1386 -0
- codex_autorunner/integrations/app_server/supervisor.py +206 -0
- codex_autorunner/integrations/github/__init__.py +10 -0
- codex_autorunner/integrations/github/service.py +889 -0
- codex_autorunner/integrations/telegram/__init__.py +1 -0
- codex_autorunner/integrations/telegram/adapter.py +1401 -0
- codex_autorunner/integrations/telegram/commands_registry.py +104 -0
- codex_autorunner/integrations/telegram/config.py +450 -0
- codex_autorunner/integrations/telegram/constants.py +154 -0
- codex_autorunner/integrations/telegram/dispatch.py +162 -0
- codex_autorunner/integrations/telegram/handlers/__init__.py +0 -0
- codex_autorunner/integrations/telegram/handlers/approvals.py +241 -0
- codex_autorunner/integrations/telegram/handlers/callbacks.py +72 -0
- codex_autorunner/integrations/telegram/handlers/commands.py +160 -0
- codex_autorunner/integrations/telegram/handlers/commands_runtime.py +5262 -0
- codex_autorunner/integrations/telegram/handlers/messages.py +477 -0
- codex_autorunner/integrations/telegram/handlers/selections.py +545 -0
- codex_autorunner/integrations/telegram/helpers.py +2084 -0
- codex_autorunner/integrations/telegram/notifications.py +164 -0
- codex_autorunner/integrations/telegram/outbox.py +174 -0
- codex_autorunner/integrations/telegram/rendering.py +102 -0
- codex_autorunner/integrations/telegram/retry.py +37 -0
- codex_autorunner/integrations/telegram/runtime.py +270 -0
- codex_autorunner/integrations/telegram/service.py +921 -0
- codex_autorunner/integrations/telegram/state.py +1223 -0
- codex_autorunner/integrations/telegram/transport.py +318 -0
- codex_autorunner/integrations/telegram/types.py +57 -0
- codex_autorunner/integrations/telegram/voice.py +413 -0
- codex_autorunner/manifest.py +150 -0
- codex_autorunner/routes/__init__.py +53 -0
- codex_autorunner/routes/base.py +470 -0
- codex_autorunner/routes/docs.py +275 -0
- codex_autorunner/routes/github.py +197 -0
- codex_autorunner/routes/repos.py +121 -0
- codex_autorunner/routes/sessions.py +137 -0
- codex_autorunner/routes/shared.py +137 -0
- codex_autorunner/routes/system.py +175 -0
- codex_autorunner/routes/terminal_images.py +107 -0
- codex_autorunner/routes/voice.py +128 -0
- codex_autorunner/server.py +23 -0
- codex_autorunner/spec_ingest.py +113 -0
- codex_autorunner/static/app.js +95 -0
- codex_autorunner/static/autoRefresh.js +209 -0
- codex_autorunner/static/bootstrap.js +105 -0
- codex_autorunner/static/bus.js +23 -0
- codex_autorunner/static/cache.js +52 -0
- codex_autorunner/static/constants.js +48 -0
- codex_autorunner/static/dashboard.js +795 -0
- codex_autorunner/static/docs.js +1514 -0
- codex_autorunner/static/env.js +99 -0
- codex_autorunner/static/github.js +168 -0
- codex_autorunner/static/hub.js +1511 -0
- codex_autorunner/static/index.html +622 -0
- codex_autorunner/static/loader.js +28 -0
- codex_autorunner/static/logs.js +690 -0
- codex_autorunner/static/mobileCompact.js +300 -0
- codex_autorunner/static/snapshot.js +116 -0
- codex_autorunner/static/state.js +87 -0
- codex_autorunner/static/styles.css +4966 -0
- codex_autorunner/static/tabs.js +50 -0
- codex_autorunner/static/terminal.js +21 -0
- codex_autorunner/static/terminalManager.js +3535 -0
- codex_autorunner/static/todoPreview.js +25 -0
- codex_autorunner/static/types.d.ts +8 -0
- codex_autorunner/static/utils.js +597 -0
- codex_autorunner/static/vendor/LICENSE.xterm +24 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-greek.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-vietnamese.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-greek.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-vietnamese.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-greek.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-vietnamese.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/OFL.txt +93 -0
- codex_autorunner/static/vendor/xterm-addon-fit.js +2 -0
- codex_autorunner/static/vendor/xterm.css +209 -0
- codex_autorunner/static/vendor/xterm.js +2 -0
- codex_autorunner/static/voice.js +591 -0
- codex_autorunner/voice/__init__.py +39 -0
- codex_autorunner/voice/capture.py +349 -0
- codex_autorunner/voice/config.py +167 -0
- codex_autorunner/voice/provider.py +66 -0
- codex_autorunner/voice/providers/__init__.py +7 -0
- codex_autorunner/voice/providers/openai_whisper.py +345 -0
- codex_autorunner/voice/resolver.py +36 -0
- codex_autorunner/voice/service.py +210 -0
- codex_autorunner/web/__init__.py +1 -0
- codex_autorunner/web/app.py +1037 -0
- codex_autorunner/web/hub_jobs.py +181 -0
- codex_autorunner/web/middleware.py +552 -0
- codex_autorunner/web/pty_session.py +357 -0
- codex_autorunner/web/runner_manager.py +25 -0
- codex_autorunner/web/schemas.py +253 -0
- codex_autorunner/web/static_assets.py +430 -0
- codex_autorunner/web/terminal_sessions.py +78 -0
- codex_autorunner/workspace.py +16 -0
- codex_autorunner-0.1.0.dist-info/METADATA +240 -0
- codex_autorunner-0.1.0.dist-info/RECORD +147 -0
- codex_autorunner-0.1.0.dist-info/WHEEL +5 -0
- codex_autorunner-0.1.0.dist-info/entry_points.txt +3 -0
- codex_autorunner-0.1.0.dist-info/licenses/LICENSE +21 -0
- codex_autorunner-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from typing import Any, Callable, Dict, Iterable, Mapping, Optional, cast
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from ..provider import (
|
|
14
|
+
AudioChunk,
|
|
15
|
+
SpeechProvider,
|
|
16
|
+
SpeechSessionMetadata,
|
|
17
|
+
TranscriptionEvent,
|
|
18
|
+
TranscriptionStream,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
RequestFn = Callable[[bytes, Mapping[str, Any]], Dict[str, Any]]
|
|
22
|
+
|
|
23
|
+
_EXT_TO_CONTENT_TYPE: dict[str, str] = {
|
|
24
|
+
# Keep these aligned with OpenAI's documented accepted formats for /audio/transcriptions.
|
|
25
|
+
"webm": "audio/webm",
|
|
26
|
+
"ogg": "audio/ogg",
|
|
27
|
+
"wav": "audio/wav",
|
|
28
|
+
"mp3": "audio/mpeg",
|
|
29
|
+
"mpeg": "audio/mpeg",
|
|
30
|
+
"mpga": "audio/mpeg",
|
|
31
|
+
"m4a": "audio/mp4",
|
|
32
|
+
"mp4": "audio/mp4",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _normalize_content_type(raw: Optional[str]) -> Optional[str]:
|
|
37
|
+
"""
|
|
38
|
+
Normalize potentially noisy MIME types.
|
|
39
|
+
|
|
40
|
+
- Browsers may include codec parameters (e.g. "audio/webm;codecs=opus")
|
|
41
|
+
- Python's mimetypes may emit unusual values (e.g. "audio/mp4a-latm" for .m4a)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
if not raw:
|
|
45
|
+
return None
|
|
46
|
+
base = raw.split(";", 1)[0].strip().lower()
|
|
47
|
+
if not base:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
# Map common-but-unhelpful values to canonical ones OpenAI reliably accepts.
|
|
51
|
+
if base == "video/webm":
|
|
52
|
+
return "audio/webm"
|
|
53
|
+
if base in ("audio/mp4a-latm", "audio/x-m4a"):
|
|
54
|
+
return "audio/mp4"
|
|
55
|
+
if base == "audio/x-wav":
|
|
56
|
+
return "audio/wav"
|
|
57
|
+
if base == "video/mp4":
|
|
58
|
+
return "audio/mp4"
|
|
59
|
+
|
|
60
|
+
return base
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _content_type_from_filename(filename: str) -> str:
|
|
64
|
+
lower = (filename or "").lower()
|
|
65
|
+
if "." in lower:
|
|
66
|
+
ext = lower.rsplit(".", 1)[-1]
|
|
67
|
+
if ext in _EXT_TO_CONTENT_TYPE:
|
|
68
|
+
return _EXT_TO_CONTENT_TYPE[ext]
|
|
69
|
+
return "application/octet-stream"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _pick_upload_content_type(filename: str, provided: Optional[str]) -> str:
|
|
73
|
+
normalized = _normalize_content_type(provided)
|
|
74
|
+
return normalized or _content_type_from_filename(filename)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _extract_http_error_detail(
|
|
78
|
+
exc: Exception,
|
|
79
|
+
) -> tuple[Optional[int], Optional[str]]:
|
|
80
|
+
if not isinstance(exc, httpx.HTTPStatusError) or exc.response is None:
|
|
81
|
+
return None, None
|
|
82
|
+
|
|
83
|
+
status_code = exc.response.status_code
|
|
84
|
+
detail: Optional[str] = None
|
|
85
|
+
try:
|
|
86
|
+
payload = exc.response.json()
|
|
87
|
+
# OpenAI typically returns {"error": {"message": "...", "type": "...", ...}}
|
|
88
|
+
if isinstance(payload, dict):
|
|
89
|
+
err = payload.get("error")
|
|
90
|
+
if isinstance(err, dict) and err.get("message"):
|
|
91
|
+
detail = str(err["message"])
|
|
92
|
+
else:
|
|
93
|
+
detail = json.dumps(payload, ensure_ascii=False)
|
|
94
|
+
else:
|
|
95
|
+
detail = json.dumps(payload, ensure_ascii=False)
|
|
96
|
+
except Exception:
|
|
97
|
+
try:
|
|
98
|
+
detail = exc.response.text
|
|
99
|
+
except Exception:
|
|
100
|
+
detail = None
|
|
101
|
+
|
|
102
|
+
if detail is not None:
|
|
103
|
+
detail = detail.strip()
|
|
104
|
+
if len(detail) > 600:
|
|
105
|
+
detail = f"{detail[:600]}…"
|
|
106
|
+
return status_code, detail
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclasses.dataclass
|
|
110
|
+
class OpenAIWhisperSettings:
|
|
111
|
+
api_key_env: str = "OPENAI_API_KEY"
|
|
112
|
+
model: str = "whisper-1"
|
|
113
|
+
base_url: Optional[str] = None
|
|
114
|
+
temperature: float = 0.0
|
|
115
|
+
language: Optional[str] = None
|
|
116
|
+
redact_request: bool = True
|
|
117
|
+
timeout_s: float = 60.0
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def from_mapping(cls, raw: Mapping[str, Any]) -> "OpenAIWhisperSettings":
|
|
121
|
+
return cls(
|
|
122
|
+
api_key_env=str(raw.get("api_key_env", "OPENAI_API_KEY")),
|
|
123
|
+
model=str(raw.get("model", "whisper-1")),
|
|
124
|
+
base_url=raw.get("base_url"),
|
|
125
|
+
temperature=float(raw.get("temperature", 0.0)),
|
|
126
|
+
language=raw.get("language"),
|
|
127
|
+
redact_request=bool(raw.get("redact_request", True)),
|
|
128
|
+
timeout_s=float(raw.get("timeout_s", 60.0)),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class OpenAIWhisperProvider(SpeechProvider):
|
|
133
|
+
"""
|
|
134
|
+
Whisper transcription provider behind the SpeechProvider abstraction.
|
|
135
|
+
|
|
136
|
+
This keeps raw audio in-memory only and redacts request metadata by default.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
name = "openai_whisper"
|
|
140
|
+
supports_streaming = (
|
|
141
|
+
False # OpenAI Whisper is request/response; we buffer chunks locally.
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
settings: OpenAIWhisperSettings,
|
|
147
|
+
env: Optional[Mapping[str, str]] = None,
|
|
148
|
+
warn_on_remote_api: bool = True,
|
|
149
|
+
logger: Optional[logging.Logger] = None,
|
|
150
|
+
request_fn: Optional[RequestFn] = None,
|
|
151
|
+
) -> None:
|
|
152
|
+
self._settings = settings
|
|
153
|
+
self._env = env or os.environ
|
|
154
|
+
self._warn_on_remote_api = warn_on_remote_api
|
|
155
|
+
self._logger = logger or logging.getLogger(__name__)
|
|
156
|
+
self._request_fn: RequestFn = request_fn or self._default_request
|
|
157
|
+
|
|
158
|
+
def start_stream(self, session: SpeechSessionMetadata) -> TranscriptionStream:
|
|
159
|
+
api_key = self._env.get(self._settings.api_key_env)
|
|
160
|
+
if api_key:
|
|
161
|
+
# Defensive normalization: .env / launchd / shells sometimes introduce
|
|
162
|
+
# trailing newlines or quoting that can yield 401s.
|
|
163
|
+
api_key = api_key.strip().strip('"').strip("'").strip("`").strip()
|
|
164
|
+
if not api_key:
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f"OpenAI Whisper provider requires API key env '{self._settings.api_key_env}' to be set"
|
|
167
|
+
)
|
|
168
|
+
return _OpenAIWhisperStream(
|
|
169
|
+
api_key=api_key,
|
|
170
|
+
settings=self._settings,
|
|
171
|
+
session=session,
|
|
172
|
+
warn_on_remote_api=self._warn_on_remote_api,
|
|
173
|
+
logger=self._logger,
|
|
174
|
+
request_fn=self._request_fn,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _default_request(
|
|
178
|
+
self, audio_bytes: bytes, payload: Mapping[str, Any]
|
|
179
|
+
) -> Dict[str, Any]:
|
|
180
|
+
headers = {"Authorization": f"Bearer {payload['api_key']}"}
|
|
181
|
+
url = f"{payload['base_url'].rstrip('/')}/v1/audio/transcriptions"
|
|
182
|
+
data: Dict[str, Any] = {
|
|
183
|
+
"model": payload["model"],
|
|
184
|
+
"temperature": payload["temperature"],
|
|
185
|
+
}
|
|
186
|
+
if payload.get("language"):
|
|
187
|
+
data["language"] = payload["language"]
|
|
188
|
+
|
|
189
|
+
filename = payload.get("filename", "audio.webm")
|
|
190
|
+
content_type = _pick_upload_content_type(filename, payload.get("content_type"))
|
|
191
|
+
files = {
|
|
192
|
+
"file": (
|
|
193
|
+
filename,
|
|
194
|
+
BytesIO(audio_bytes),
|
|
195
|
+
content_type,
|
|
196
|
+
)
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
timeout_s = float(payload.get("timeout_s", 60.0))
|
|
200
|
+
response = httpx.post(
|
|
201
|
+
url, headers=headers, data=data, files=files, timeout=timeout_s
|
|
202
|
+
)
|
|
203
|
+
response.raise_for_status()
|
|
204
|
+
return cast(Dict[str, Any], response.json())
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class _OpenAIWhisperStream(TranscriptionStream):
|
|
208
|
+
def __init__(
|
|
209
|
+
self,
|
|
210
|
+
api_key: str,
|
|
211
|
+
settings: OpenAIWhisperSettings,
|
|
212
|
+
session: SpeechSessionMetadata,
|
|
213
|
+
warn_on_remote_api: bool,
|
|
214
|
+
logger: logging.Logger,
|
|
215
|
+
request_fn: RequestFn,
|
|
216
|
+
) -> None:
|
|
217
|
+
self._api_key = api_key
|
|
218
|
+
self._settings = settings
|
|
219
|
+
self._session = session
|
|
220
|
+
self._warn_on_remote_api = warn_on_remote_api
|
|
221
|
+
self._logger = logger
|
|
222
|
+
self._request_fn = request_fn
|
|
223
|
+
self._started_at = time.monotonic()
|
|
224
|
+
self._chunks: list[bytes] = []
|
|
225
|
+
self._aborted = False
|
|
226
|
+
|
|
227
|
+
def send_chunk(self, chunk: AudioChunk) -> Iterable[TranscriptionEvent]:
|
|
228
|
+
# Only retain raw bytes in-memory until the final request to avoid persistence.
|
|
229
|
+
if self._aborted:
|
|
230
|
+
return []
|
|
231
|
+
self._chunks.append(chunk.data)
|
|
232
|
+
return []
|
|
233
|
+
|
|
234
|
+
def flush_final(self) -> Iterable[TranscriptionEvent]:
|
|
235
|
+
if self._aborted:
|
|
236
|
+
return []
|
|
237
|
+
if not self._chunks:
|
|
238
|
+
return []
|
|
239
|
+
|
|
240
|
+
audio_bytes = b"".join(self._chunks)
|
|
241
|
+
if self._warn_on_remote_api:
|
|
242
|
+
self._logger.warning(
|
|
243
|
+
"Sending audio to OpenAI Whisper (%s); audio bytes are not logged or persisted.",
|
|
244
|
+
self._settings.model,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
payload = self._build_payload()
|
|
248
|
+
status_code: Optional[int] = None
|
|
249
|
+
error_detail: Optional[str] = None
|
|
250
|
+
try:
|
|
251
|
+
started = time.monotonic()
|
|
252
|
+
result = self._request_fn(audio_bytes, payload)
|
|
253
|
+
latency_ms = int((time.monotonic() - started) * 1000)
|
|
254
|
+
text = (result or {}).get("text", "") if isinstance(result, Mapping) else ""
|
|
255
|
+
return [TranscriptionEvent(text=text, is_final=True, latency_ms=latency_ms)]
|
|
256
|
+
except Exception as exc:
|
|
257
|
+
status_code, error_detail = _extract_http_error_detail(exc)
|
|
258
|
+
if status_code is None and isinstance(exc, httpx.HTTPStatusError):
|
|
259
|
+
status_code = (
|
|
260
|
+
exc.response.status_code if exc.response is not None else None
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if error_detail:
|
|
264
|
+
self._logger.error(
|
|
265
|
+
"OpenAI Whisper transcription failed (HTTP %s): %s",
|
|
266
|
+
status_code if status_code is not None else "n/a",
|
|
267
|
+
error_detail,
|
|
268
|
+
exc_info=False,
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
self._logger.error(
|
|
272
|
+
"OpenAI Whisper transcription failed: %s", exc, exc_info=False
|
|
273
|
+
)
|
|
274
|
+
# Avoid retry loops for credential errors; surface explicit reasons.
|
|
275
|
+
if status_code == 401:
|
|
276
|
+
return [
|
|
277
|
+
TranscriptionEvent(text="", is_final=True, error="unauthorized")
|
|
278
|
+
]
|
|
279
|
+
if status_code == 403:
|
|
280
|
+
return [TranscriptionEvent(text="", is_final=True, error="forbidden")]
|
|
281
|
+
if status_code == 400:
|
|
282
|
+
# Usually indicates invalid/unsupported audio format or malformed params.
|
|
283
|
+
return [
|
|
284
|
+
TranscriptionEvent(text="", is_final=True, error="invalid_audio")
|
|
285
|
+
]
|
|
286
|
+
if status_code == 413:
|
|
287
|
+
return [
|
|
288
|
+
TranscriptionEvent(text="", is_final=True, error="audio_too_large")
|
|
289
|
+
]
|
|
290
|
+
if status_code == 429:
|
|
291
|
+
return [
|
|
292
|
+
TranscriptionEvent(text="", is_final=True, error="rate_limited")
|
|
293
|
+
]
|
|
294
|
+
return [TranscriptionEvent(text="", is_final=True, error="provider_error")]
|
|
295
|
+
finally:
|
|
296
|
+
# Release buffered bytes to avoid accidental reuse.
|
|
297
|
+
self._chunks = []
|
|
298
|
+
|
|
299
|
+
def abort(self, reason: Optional[str] = None) -> None:
|
|
300
|
+
self._aborted = True
|
|
301
|
+
self._chunks = []
|
|
302
|
+
if reason:
|
|
303
|
+
self._logger.info("OpenAI Whisper stream aborted: %s", reason)
|
|
304
|
+
|
|
305
|
+
def _build_payload(self) -> Dict[str, Any]:
|
|
306
|
+
base_url = self._settings.base_url or "https://api.openai.com"
|
|
307
|
+
payload = {
|
|
308
|
+
"api_key": self._api_key,
|
|
309
|
+
"base_url": base_url,
|
|
310
|
+
"model": self._settings.model,
|
|
311
|
+
"temperature": self._settings.temperature,
|
|
312
|
+
"language": self._settings.language or self._session.language,
|
|
313
|
+
"timeout_s": self._settings.timeout_s,
|
|
314
|
+
}
|
|
315
|
+
if self._session.filename:
|
|
316
|
+
payload["filename"] = self._session.filename
|
|
317
|
+
if self._session.content_type:
|
|
318
|
+
payload["content_type"] = self._session.content_type
|
|
319
|
+
|
|
320
|
+
if not self._settings.redact_request:
|
|
321
|
+
payload.update(
|
|
322
|
+
{
|
|
323
|
+
"client": self._session.client,
|
|
324
|
+
"session_id": self._session.session_id,
|
|
325
|
+
}
|
|
326
|
+
)
|
|
327
|
+
return payload
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def build_speech_provider(
|
|
331
|
+
config: Mapping[str, Any],
|
|
332
|
+
warn_on_remote_api: bool = True,
|
|
333
|
+
env: Optional[Mapping[str, str]] = None,
|
|
334
|
+
logger: Optional[logging.Logger] = None,
|
|
335
|
+
) -> OpenAIWhisperProvider:
|
|
336
|
+
"""
|
|
337
|
+
Factory used by voice resolver to construct the Whisper provider from config mappings.
|
|
338
|
+
"""
|
|
339
|
+
settings = OpenAIWhisperSettings.from_mapping(config)
|
|
340
|
+
return OpenAIWhisperProvider(
|
|
341
|
+
settings=settings,
|
|
342
|
+
env=env,
|
|
343
|
+
warn_on_remote_api=warn_on_remote_api,
|
|
344
|
+
logger=logger,
|
|
345
|
+
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from typing import Mapping, Optional
|
|
6
|
+
|
|
7
|
+
from .config import VoiceConfig
|
|
8
|
+
from .provider import SpeechProvider
|
|
9
|
+
from .providers import OpenAIWhisperProvider, build_speech_provider
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def resolve_speech_provider(
|
|
13
|
+
voice_config: VoiceConfig,
|
|
14
|
+
logger: Optional[logging.Logger] = None,
|
|
15
|
+
env: Optional[Mapping[str, str]] = None,
|
|
16
|
+
) -> SpeechProvider:
|
|
17
|
+
"""
|
|
18
|
+
Resolve the configured speech provider. Raises when disabled or unknown.
|
|
19
|
+
"""
|
|
20
|
+
if not voice_config.enabled:
|
|
21
|
+
raise ValueError("Voice features are disabled in config")
|
|
22
|
+
|
|
23
|
+
provider_name = voice_config.provider
|
|
24
|
+
provider_configs = voice_config.providers or {}
|
|
25
|
+
if not provider_name:
|
|
26
|
+
raise ValueError("No voice provider configured")
|
|
27
|
+
|
|
28
|
+
if provider_name == OpenAIWhisperProvider.name:
|
|
29
|
+
return build_speech_provider(
|
|
30
|
+
provider_configs.get(provider_name, {}),
|
|
31
|
+
warn_on_remote_api=voice_config.warn_on_remote_api,
|
|
32
|
+
env=env or os.environ,
|
|
33
|
+
logger=logger,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
raise ValueError(f"Unsupported voice provider '{provider_name}'")
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import uuid
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
from .capture import CaptureCallbacks, CaptureState, PushToTalkCapture
|
|
9
|
+
from .config import VoiceConfig
|
|
10
|
+
from .provider import SpeechSessionMetadata
|
|
11
|
+
from .resolver import resolve_speech_provider
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class VoiceServiceError(Exception):
|
|
15
|
+
"""Raised when voice transcription fails at the service boundary."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, reason: str, detail: str):
|
|
18
|
+
super().__init__(detail)
|
|
19
|
+
self.reason = reason
|
|
20
|
+
self.detail = detail
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class VoiceService:
|
|
24
|
+
"""
|
|
25
|
+
Thin wrapper that wires the shared PushToTalkCapture into HTTP handlers.
|
|
26
|
+
This keeps raw audio in-memory only and centralizes provider wiring/error mapping.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
config: VoiceConfig,
|
|
32
|
+
logger: Optional[logging.Logger] = None,
|
|
33
|
+
provider_resolver: Callable[[VoiceConfig], object] = resolve_speech_provider,
|
|
34
|
+
provider: Optional[object] = None,
|
|
35
|
+
env: Optional[dict] = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
self.config = config
|
|
38
|
+
self._logger = logger or logging.getLogger(__name__)
|
|
39
|
+
self._provider_resolver = provider_resolver
|
|
40
|
+
self._provider = provider
|
|
41
|
+
self._env = env if env is not None else os.environ
|
|
42
|
+
|
|
43
|
+
def config_payload(self) -> dict:
|
|
44
|
+
"""Expose safe config fields to the UI."""
|
|
45
|
+
# Check if API key is configured for status display
|
|
46
|
+
provider_cfg = self.config.providers.get(
|
|
47
|
+
self.config.provider or "openai_whisper", {}
|
|
48
|
+
)
|
|
49
|
+
api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
|
|
50
|
+
has_api_key = bool(self._env.get(api_key_env))
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
"enabled": self.config.enabled,
|
|
54
|
+
"provider": self.config.provider,
|
|
55
|
+
"latency_mode": self.config.latency_mode,
|
|
56
|
+
"chunk_ms": self.config.chunk_ms,
|
|
57
|
+
"sample_rate": self.config.sample_rate,
|
|
58
|
+
"warn_on_remote_api": self.config.warn_on_remote_api,
|
|
59
|
+
"has_api_key": has_api_key,
|
|
60
|
+
"api_key_env": api_key_env,
|
|
61
|
+
"push_to_talk": {
|
|
62
|
+
"max_ms": self.config.push_to_talk.max_ms,
|
|
63
|
+
"silence_auto_stop_ms": self.config.push_to_talk.silence_auto_stop_ms,
|
|
64
|
+
"min_hold_ms": self.config.push_to_talk.min_hold_ms,
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def transcribe(
|
|
69
|
+
self,
|
|
70
|
+
audio_bytes: bytes,
|
|
71
|
+
*,
|
|
72
|
+
client: str = "web",
|
|
73
|
+
user_agent: Optional[str] = None,
|
|
74
|
+
language: Optional[str] = None,
|
|
75
|
+
filename: Optional[str] = None,
|
|
76
|
+
content_type: Optional[str] = None,
|
|
77
|
+
) -> dict:
|
|
78
|
+
if not self.config.enabled:
|
|
79
|
+
raise VoiceServiceError("disabled", "Voice is disabled")
|
|
80
|
+
if not audio_bytes:
|
|
81
|
+
raise VoiceServiceError("empty_audio", "No audio received")
|
|
82
|
+
|
|
83
|
+
provider = self._resolve_provider()
|
|
84
|
+
buffer = _TranscriptionBuffer()
|
|
85
|
+
capture = PushToTalkCapture(
|
|
86
|
+
provider=provider,
|
|
87
|
+
config=self.config,
|
|
88
|
+
callbacks=buffer.callbacks,
|
|
89
|
+
permission_requester=lambda: True,
|
|
90
|
+
client=client,
|
|
91
|
+
logger=self._logger,
|
|
92
|
+
session_builder=lambda: self._build_session_metadata(
|
|
93
|
+
provider_name=provider.name,
|
|
94
|
+
language=language,
|
|
95
|
+
client=client,
|
|
96
|
+
user_agent=user_agent,
|
|
97
|
+
filename=filename,
|
|
98
|
+
content_type=content_type,
|
|
99
|
+
),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
capture.begin_capture()
|
|
103
|
+
if capture.state == CaptureState.ERROR:
|
|
104
|
+
reason = buffer.error_reason or "capture_failed"
|
|
105
|
+
raise VoiceServiceError(reason, reason.replace("_", " "))
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
capture.handle_chunk(audio_bytes)
|
|
109
|
+
capture.end_capture("client_stop")
|
|
110
|
+
except Exception as exc:
|
|
111
|
+
raise VoiceServiceError("provider_error", str(exc)) from exc
|
|
112
|
+
|
|
113
|
+
if buffer.error_reason:
|
|
114
|
+
if buffer.error_reason in ("unauthorized", "forbidden"):
|
|
115
|
+
provider_cfg = self.config.providers.get(
|
|
116
|
+
self.config.provider or "openai_whisper", {}
|
|
117
|
+
)
|
|
118
|
+
api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
|
|
119
|
+
raise VoiceServiceError(
|
|
120
|
+
buffer.error_reason,
|
|
121
|
+
f"OpenAI API key rejected ({buffer.error_reason}); check {api_key_env}",
|
|
122
|
+
)
|
|
123
|
+
if buffer.error_reason == "invalid_audio":
|
|
124
|
+
meta = ""
|
|
125
|
+
if filename or content_type:
|
|
126
|
+
meta = f" (file={filename or 'audio'}, type={content_type or 'unknown'})"
|
|
127
|
+
raise VoiceServiceError(
|
|
128
|
+
"invalid_audio",
|
|
129
|
+
"OpenAI rejected the audio upload (bad request). "
|
|
130
|
+
f"Try re-recording or switching formats/browsers{meta}.",
|
|
131
|
+
)
|
|
132
|
+
if buffer.error_reason == "audio_too_large":
|
|
133
|
+
raise VoiceServiceError(
|
|
134
|
+
"audio_too_large",
|
|
135
|
+
"Audio upload too large; record a shorter clip and try again.",
|
|
136
|
+
)
|
|
137
|
+
if buffer.error_reason == "rate_limited":
|
|
138
|
+
raise VoiceServiceError(
|
|
139
|
+
"rate_limited",
|
|
140
|
+
"OpenAI rate limited the request; wait a moment and try again.",
|
|
141
|
+
)
|
|
142
|
+
raise VoiceServiceError(
|
|
143
|
+
buffer.error_reason, buffer.error_reason.replace("_", " ")
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
transcript = buffer.final_text or buffer.partial_text or ""
|
|
147
|
+
return {
|
|
148
|
+
"text": transcript,
|
|
149
|
+
"warnings": buffer.warnings,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def _resolve_provider(self):
|
|
153
|
+
if self._provider is None:
|
|
154
|
+
try:
|
|
155
|
+
self._provider = self._provider_resolver(
|
|
156
|
+
self.config, logger=self._logger
|
|
157
|
+
)
|
|
158
|
+
except TypeError:
|
|
159
|
+
self._provider = self._provider_resolver(self.config)
|
|
160
|
+
return self._provider
|
|
161
|
+
|
|
162
|
+
def _build_session_metadata(
|
|
163
|
+
self,
|
|
164
|
+
*,
|
|
165
|
+
provider_name: str,
|
|
166
|
+
language: Optional[str],
|
|
167
|
+
client: Optional[str],
|
|
168
|
+
user_agent: Optional[str],
|
|
169
|
+
filename: Optional[str] = None,
|
|
170
|
+
content_type: Optional[str] = None,
|
|
171
|
+
) -> SpeechSessionMetadata:
|
|
172
|
+
return SpeechSessionMetadata(
|
|
173
|
+
session_id=str(uuid.uuid4()),
|
|
174
|
+
provider=provider_name,
|
|
175
|
+
latency_mode=self.config.latency_mode,
|
|
176
|
+
language=language,
|
|
177
|
+
client=client,
|
|
178
|
+
user_agent=user_agent,
|
|
179
|
+
filename=filename,
|
|
180
|
+
content_type=content_type,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class _TranscriptionBuffer:
|
|
185
|
+
def __init__(self) -> None:
|
|
186
|
+
self.partial_text = ""
|
|
187
|
+
self.final_text = ""
|
|
188
|
+
self.warnings: list[str] = []
|
|
189
|
+
self.error_reason: Optional[str] = None
|
|
190
|
+
self.callbacks = CaptureCallbacks(
|
|
191
|
+
on_partial=self._on_partial,
|
|
192
|
+
on_final=self._on_final,
|
|
193
|
+
on_warning=self._on_warning,
|
|
194
|
+
on_error=self._on_error,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _on_partial(self, text: str) -> None:
|
|
198
|
+
if text:
|
|
199
|
+
self.partial_text = text
|
|
200
|
+
|
|
201
|
+
def _on_final(self, text: str) -> None:
|
|
202
|
+
if text:
|
|
203
|
+
self.final_text = text
|
|
204
|
+
|
|
205
|
+
def _on_warning(self, message: str) -> None:
|
|
206
|
+
if message:
|
|
207
|
+
self.warnings.append(message)
|
|
208
|
+
|
|
209
|
+
def _on_error(self, reason: str) -> None:
|
|
210
|
+
self.error_reason = reason
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Web server components."""
|