codex-autorunner 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. codex_autorunner/__init__.py +3 -0
  2. codex_autorunner/bootstrap.py +151 -0
  3. codex_autorunner/cli.py +886 -0
  4. codex_autorunner/codex_cli.py +79 -0
  5. codex_autorunner/codex_runner.py +17 -0
  6. codex_autorunner/core/__init__.py +1 -0
  7. codex_autorunner/core/about_car.py +125 -0
  8. codex_autorunner/core/codex_runner.py +100 -0
  9. codex_autorunner/core/config.py +1465 -0
  10. codex_autorunner/core/doc_chat.py +547 -0
  11. codex_autorunner/core/docs.py +37 -0
  12. codex_autorunner/core/engine.py +720 -0
  13. codex_autorunner/core/git_utils.py +206 -0
  14. codex_autorunner/core/hub.py +756 -0
  15. codex_autorunner/core/injected_context.py +9 -0
  16. codex_autorunner/core/locks.py +57 -0
  17. codex_autorunner/core/logging_utils.py +158 -0
  18. codex_autorunner/core/notifications.py +465 -0
  19. codex_autorunner/core/optional_dependencies.py +41 -0
  20. codex_autorunner/core/prompt.py +107 -0
  21. codex_autorunner/core/prompts.py +275 -0
  22. codex_autorunner/core/request_context.py +21 -0
  23. codex_autorunner/core/runner_controller.py +116 -0
  24. codex_autorunner/core/runner_process.py +29 -0
  25. codex_autorunner/core/snapshot.py +576 -0
  26. codex_autorunner/core/state.py +156 -0
  27. codex_autorunner/core/update.py +567 -0
  28. codex_autorunner/core/update_runner.py +44 -0
  29. codex_autorunner/core/usage.py +1221 -0
  30. codex_autorunner/core/utils.py +108 -0
  31. codex_autorunner/discovery.py +102 -0
  32. codex_autorunner/housekeeping.py +423 -0
  33. codex_autorunner/integrations/__init__.py +1 -0
  34. codex_autorunner/integrations/app_server/__init__.py +6 -0
  35. codex_autorunner/integrations/app_server/client.py +1386 -0
  36. codex_autorunner/integrations/app_server/supervisor.py +206 -0
  37. codex_autorunner/integrations/github/__init__.py +10 -0
  38. codex_autorunner/integrations/github/service.py +889 -0
  39. codex_autorunner/integrations/telegram/__init__.py +1 -0
  40. codex_autorunner/integrations/telegram/adapter.py +1401 -0
  41. codex_autorunner/integrations/telegram/commands_registry.py +104 -0
  42. codex_autorunner/integrations/telegram/config.py +450 -0
  43. codex_autorunner/integrations/telegram/constants.py +154 -0
  44. codex_autorunner/integrations/telegram/dispatch.py +162 -0
  45. codex_autorunner/integrations/telegram/handlers/__init__.py +0 -0
  46. codex_autorunner/integrations/telegram/handlers/approvals.py +241 -0
  47. codex_autorunner/integrations/telegram/handlers/callbacks.py +72 -0
  48. codex_autorunner/integrations/telegram/handlers/commands.py +160 -0
  49. codex_autorunner/integrations/telegram/handlers/commands_runtime.py +5262 -0
  50. codex_autorunner/integrations/telegram/handlers/messages.py +477 -0
  51. codex_autorunner/integrations/telegram/handlers/selections.py +545 -0
  52. codex_autorunner/integrations/telegram/helpers.py +2084 -0
  53. codex_autorunner/integrations/telegram/notifications.py +164 -0
  54. codex_autorunner/integrations/telegram/outbox.py +174 -0
  55. codex_autorunner/integrations/telegram/rendering.py +102 -0
  56. codex_autorunner/integrations/telegram/retry.py +37 -0
  57. codex_autorunner/integrations/telegram/runtime.py +270 -0
  58. codex_autorunner/integrations/telegram/service.py +921 -0
  59. codex_autorunner/integrations/telegram/state.py +1223 -0
  60. codex_autorunner/integrations/telegram/transport.py +318 -0
  61. codex_autorunner/integrations/telegram/types.py +57 -0
  62. codex_autorunner/integrations/telegram/voice.py +413 -0
  63. codex_autorunner/manifest.py +150 -0
  64. codex_autorunner/routes/__init__.py +53 -0
  65. codex_autorunner/routes/base.py +470 -0
  66. codex_autorunner/routes/docs.py +275 -0
  67. codex_autorunner/routes/github.py +197 -0
  68. codex_autorunner/routes/repos.py +121 -0
  69. codex_autorunner/routes/sessions.py +137 -0
  70. codex_autorunner/routes/shared.py +137 -0
  71. codex_autorunner/routes/system.py +175 -0
  72. codex_autorunner/routes/terminal_images.py +107 -0
  73. codex_autorunner/routes/voice.py +128 -0
  74. codex_autorunner/server.py +23 -0
  75. codex_autorunner/spec_ingest.py +113 -0
  76. codex_autorunner/static/app.js +95 -0
  77. codex_autorunner/static/autoRefresh.js +209 -0
  78. codex_autorunner/static/bootstrap.js +105 -0
  79. codex_autorunner/static/bus.js +23 -0
  80. codex_autorunner/static/cache.js +52 -0
  81. codex_autorunner/static/constants.js +48 -0
  82. codex_autorunner/static/dashboard.js +795 -0
  83. codex_autorunner/static/docs.js +1514 -0
  84. codex_autorunner/static/env.js +99 -0
  85. codex_autorunner/static/github.js +168 -0
  86. codex_autorunner/static/hub.js +1511 -0
  87. codex_autorunner/static/index.html +622 -0
  88. codex_autorunner/static/loader.js +28 -0
  89. codex_autorunner/static/logs.js +690 -0
  90. codex_autorunner/static/mobileCompact.js +300 -0
  91. codex_autorunner/static/snapshot.js +116 -0
  92. codex_autorunner/static/state.js +87 -0
  93. codex_autorunner/static/styles.css +4966 -0
  94. codex_autorunner/static/tabs.js +50 -0
  95. codex_autorunner/static/terminal.js +21 -0
  96. codex_autorunner/static/terminalManager.js +3535 -0
  97. codex_autorunner/static/todoPreview.js +25 -0
  98. codex_autorunner/static/types.d.ts +8 -0
  99. codex_autorunner/static/utils.js +597 -0
  100. codex_autorunner/static/vendor/LICENSE.xterm +24 -0
  101. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic-ext.woff2 +0 -0
  102. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic.woff2 +0 -0
  103. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-greek.woff2 +0 -0
  104. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin-ext.woff2 +0 -0
  105. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin.woff2 +0 -0
  106. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-vietnamese.woff2 +0 -0
  107. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic-ext.woff2 +0 -0
  108. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic.woff2 +0 -0
  109. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-greek.woff2 +0 -0
  110. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin-ext.woff2 +0 -0
  111. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin.woff2 +0 -0
  112. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-vietnamese.woff2 +0 -0
  113. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic-ext.woff2 +0 -0
  114. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic.woff2 +0 -0
  115. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-greek.woff2 +0 -0
  116. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin-ext.woff2 +0 -0
  117. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin.woff2 +0 -0
  118. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-vietnamese.woff2 +0 -0
  119. codex_autorunner/static/vendor/fonts/jetbrains-mono/OFL.txt +93 -0
  120. codex_autorunner/static/vendor/xterm-addon-fit.js +2 -0
  121. codex_autorunner/static/vendor/xterm.css +209 -0
  122. codex_autorunner/static/vendor/xterm.js +2 -0
  123. codex_autorunner/static/voice.js +591 -0
  124. codex_autorunner/voice/__init__.py +39 -0
  125. codex_autorunner/voice/capture.py +349 -0
  126. codex_autorunner/voice/config.py +167 -0
  127. codex_autorunner/voice/provider.py +66 -0
  128. codex_autorunner/voice/providers/__init__.py +7 -0
  129. codex_autorunner/voice/providers/openai_whisper.py +345 -0
  130. codex_autorunner/voice/resolver.py +36 -0
  131. codex_autorunner/voice/service.py +210 -0
  132. codex_autorunner/web/__init__.py +1 -0
  133. codex_autorunner/web/app.py +1037 -0
  134. codex_autorunner/web/hub_jobs.py +181 -0
  135. codex_autorunner/web/middleware.py +552 -0
  136. codex_autorunner/web/pty_session.py +357 -0
  137. codex_autorunner/web/runner_manager.py +25 -0
  138. codex_autorunner/web/schemas.py +253 -0
  139. codex_autorunner/web/static_assets.py +430 -0
  140. codex_autorunner/web/terminal_sessions.py +78 -0
  141. codex_autorunner/workspace.py +16 -0
  142. codex_autorunner-0.1.0.dist-info/METADATA +240 -0
  143. codex_autorunner-0.1.0.dist-info/RECORD +147 -0
  144. codex_autorunner-0.1.0.dist-info/WHEEL +5 -0
  145. codex_autorunner-0.1.0.dist-info/entry_points.txt +3 -0
  146. codex_autorunner-0.1.0.dist-info/licenses/LICENSE +21 -0
  147. codex_autorunner-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,345 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import json
5
+ import logging
6
+ import os
7
+ import time
8
+ from io import BytesIO
9
+ from typing import Any, Callable, Dict, Iterable, Mapping, Optional, cast
10
+
11
+ import httpx
12
+
13
+ from ..provider import (
14
+ AudioChunk,
15
+ SpeechProvider,
16
+ SpeechSessionMetadata,
17
+ TranscriptionEvent,
18
+ TranscriptionStream,
19
+ )
20
+
21
+ RequestFn = Callable[[bytes, Mapping[str, Any]], Dict[str, Any]]
22
+
23
+ _EXT_TO_CONTENT_TYPE: dict[str, str] = {
24
+ # Keep these aligned with OpenAI's documented accepted formats for /audio/transcriptions.
25
+ "webm": "audio/webm",
26
+ "ogg": "audio/ogg",
27
+ "wav": "audio/wav",
28
+ "mp3": "audio/mpeg",
29
+ "mpeg": "audio/mpeg",
30
+ "mpga": "audio/mpeg",
31
+ "m4a": "audio/mp4",
32
+ "mp4": "audio/mp4",
33
+ }
34
+
35
+
36
+ def _normalize_content_type(raw: Optional[str]) -> Optional[str]:
37
+ """
38
+ Normalize potentially noisy MIME types.
39
+
40
+ - Browsers may include codec parameters (e.g. "audio/webm;codecs=opus")
41
+ - Python's mimetypes may emit unusual values (e.g. "audio/mp4a-latm" for .m4a)
42
+ """
43
+
44
+ if not raw:
45
+ return None
46
+ base = raw.split(";", 1)[0].strip().lower()
47
+ if not base:
48
+ return None
49
+
50
+ # Map common-but-unhelpful values to canonical ones OpenAI reliably accepts.
51
+ if base == "video/webm":
52
+ return "audio/webm"
53
+ if base in ("audio/mp4a-latm", "audio/x-m4a"):
54
+ return "audio/mp4"
55
+ if base == "audio/x-wav":
56
+ return "audio/wav"
57
+ if base == "video/mp4":
58
+ return "audio/mp4"
59
+
60
+ return base
61
+
62
+
63
+ def _content_type_from_filename(filename: str) -> str:
64
+ lower = (filename or "").lower()
65
+ if "." in lower:
66
+ ext = lower.rsplit(".", 1)[-1]
67
+ if ext in _EXT_TO_CONTENT_TYPE:
68
+ return _EXT_TO_CONTENT_TYPE[ext]
69
+ return "application/octet-stream"
70
+
71
+
72
+ def _pick_upload_content_type(filename: str, provided: Optional[str]) -> str:
73
+ normalized = _normalize_content_type(provided)
74
+ return normalized or _content_type_from_filename(filename)
75
+
76
+
77
+ def _extract_http_error_detail(
78
+ exc: Exception,
79
+ ) -> tuple[Optional[int], Optional[str]]:
80
+ if not isinstance(exc, httpx.HTTPStatusError) or exc.response is None:
81
+ return None, None
82
+
83
+ status_code = exc.response.status_code
84
+ detail: Optional[str] = None
85
+ try:
86
+ payload = exc.response.json()
87
+ # OpenAI typically returns {"error": {"message": "...", "type": "...", ...}}
88
+ if isinstance(payload, dict):
89
+ err = payload.get("error")
90
+ if isinstance(err, dict) and err.get("message"):
91
+ detail = str(err["message"])
92
+ else:
93
+ detail = json.dumps(payload, ensure_ascii=False)
94
+ else:
95
+ detail = json.dumps(payload, ensure_ascii=False)
96
+ except Exception:
97
+ try:
98
+ detail = exc.response.text
99
+ except Exception:
100
+ detail = None
101
+
102
+ if detail is not None:
103
+ detail = detail.strip()
104
+ if len(detail) > 600:
105
+ detail = f"{detail[:600]}…"
106
+ return status_code, detail
107
+
108
+
109
+ @dataclasses.dataclass
110
+ class OpenAIWhisperSettings:
111
+ api_key_env: str = "OPENAI_API_KEY"
112
+ model: str = "whisper-1"
113
+ base_url: Optional[str] = None
114
+ temperature: float = 0.0
115
+ language: Optional[str] = None
116
+ redact_request: bool = True
117
+ timeout_s: float = 60.0
118
+
119
+ @classmethod
120
+ def from_mapping(cls, raw: Mapping[str, Any]) -> "OpenAIWhisperSettings":
121
+ return cls(
122
+ api_key_env=str(raw.get("api_key_env", "OPENAI_API_KEY")),
123
+ model=str(raw.get("model", "whisper-1")),
124
+ base_url=raw.get("base_url"),
125
+ temperature=float(raw.get("temperature", 0.0)),
126
+ language=raw.get("language"),
127
+ redact_request=bool(raw.get("redact_request", True)),
128
+ timeout_s=float(raw.get("timeout_s", 60.0)),
129
+ )
130
+
131
+
132
+ class OpenAIWhisperProvider(SpeechProvider):
133
+ """
134
+ Whisper transcription provider behind the SpeechProvider abstraction.
135
+
136
+ This keeps raw audio in-memory only and redacts request metadata by default.
137
+ """
138
+
139
+ name = "openai_whisper"
140
+ supports_streaming = (
141
+ False # OpenAI Whisper is request/response; we buffer chunks locally.
142
+ )
143
+
144
+ def __init__(
145
+ self,
146
+ settings: OpenAIWhisperSettings,
147
+ env: Optional[Mapping[str, str]] = None,
148
+ warn_on_remote_api: bool = True,
149
+ logger: Optional[logging.Logger] = None,
150
+ request_fn: Optional[RequestFn] = None,
151
+ ) -> None:
152
+ self._settings = settings
153
+ self._env = env or os.environ
154
+ self._warn_on_remote_api = warn_on_remote_api
155
+ self._logger = logger or logging.getLogger(__name__)
156
+ self._request_fn: RequestFn = request_fn or self._default_request
157
+
158
+ def start_stream(self, session: SpeechSessionMetadata) -> TranscriptionStream:
159
+ api_key = self._env.get(self._settings.api_key_env)
160
+ if api_key:
161
+ # Defensive normalization: .env / launchd / shells sometimes introduce
162
+ # trailing newlines or quoting that can yield 401s.
163
+ api_key = api_key.strip().strip('"').strip("'").strip("`").strip()
164
+ if not api_key:
165
+ raise ValueError(
166
+ f"OpenAI Whisper provider requires API key env '{self._settings.api_key_env}' to be set"
167
+ )
168
+ return _OpenAIWhisperStream(
169
+ api_key=api_key,
170
+ settings=self._settings,
171
+ session=session,
172
+ warn_on_remote_api=self._warn_on_remote_api,
173
+ logger=self._logger,
174
+ request_fn=self._request_fn,
175
+ )
176
+
177
+ def _default_request(
178
+ self, audio_bytes: bytes, payload: Mapping[str, Any]
179
+ ) -> Dict[str, Any]:
180
+ headers = {"Authorization": f"Bearer {payload['api_key']}"}
181
+ url = f"{payload['base_url'].rstrip('/')}/v1/audio/transcriptions"
182
+ data: Dict[str, Any] = {
183
+ "model": payload["model"],
184
+ "temperature": payload["temperature"],
185
+ }
186
+ if payload.get("language"):
187
+ data["language"] = payload["language"]
188
+
189
+ filename = payload.get("filename", "audio.webm")
190
+ content_type = _pick_upload_content_type(filename, payload.get("content_type"))
191
+ files = {
192
+ "file": (
193
+ filename,
194
+ BytesIO(audio_bytes),
195
+ content_type,
196
+ )
197
+ }
198
+
199
+ timeout_s = float(payload.get("timeout_s", 60.0))
200
+ response = httpx.post(
201
+ url, headers=headers, data=data, files=files, timeout=timeout_s
202
+ )
203
+ response.raise_for_status()
204
+ return cast(Dict[str, Any], response.json())
205
+
206
+
207
+ class _OpenAIWhisperStream(TranscriptionStream):
208
+ def __init__(
209
+ self,
210
+ api_key: str,
211
+ settings: OpenAIWhisperSettings,
212
+ session: SpeechSessionMetadata,
213
+ warn_on_remote_api: bool,
214
+ logger: logging.Logger,
215
+ request_fn: RequestFn,
216
+ ) -> None:
217
+ self._api_key = api_key
218
+ self._settings = settings
219
+ self._session = session
220
+ self._warn_on_remote_api = warn_on_remote_api
221
+ self._logger = logger
222
+ self._request_fn = request_fn
223
+ self._started_at = time.monotonic()
224
+ self._chunks: list[bytes] = []
225
+ self._aborted = False
226
+
227
+ def send_chunk(self, chunk: AudioChunk) -> Iterable[TranscriptionEvent]:
228
+ # Only retain raw bytes in-memory until the final request to avoid persistence.
229
+ if self._aborted:
230
+ return []
231
+ self._chunks.append(chunk.data)
232
+ return []
233
+
234
+ def flush_final(self) -> Iterable[TranscriptionEvent]:
235
+ if self._aborted:
236
+ return []
237
+ if not self._chunks:
238
+ return []
239
+
240
+ audio_bytes = b"".join(self._chunks)
241
+ if self._warn_on_remote_api:
242
+ self._logger.warning(
243
+ "Sending audio to OpenAI Whisper (%s); audio bytes are not logged or persisted.",
244
+ self._settings.model,
245
+ )
246
+
247
+ payload = self._build_payload()
248
+ status_code: Optional[int] = None
249
+ error_detail: Optional[str] = None
250
+ try:
251
+ started = time.monotonic()
252
+ result = self._request_fn(audio_bytes, payload)
253
+ latency_ms = int((time.monotonic() - started) * 1000)
254
+ text = (result or {}).get("text", "") if isinstance(result, Mapping) else ""
255
+ return [TranscriptionEvent(text=text, is_final=True, latency_ms=latency_ms)]
256
+ except Exception as exc:
257
+ status_code, error_detail = _extract_http_error_detail(exc)
258
+ if status_code is None and isinstance(exc, httpx.HTTPStatusError):
259
+ status_code = (
260
+ exc.response.status_code if exc.response is not None else None
261
+ )
262
+
263
+ if error_detail:
264
+ self._logger.error(
265
+ "OpenAI Whisper transcription failed (HTTP %s): %s",
266
+ status_code if status_code is not None else "n/a",
267
+ error_detail,
268
+ exc_info=False,
269
+ )
270
+ else:
271
+ self._logger.error(
272
+ "OpenAI Whisper transcription failed: %s", exc, exc_info=False
273
+ )
274
+ # Avoid retry loops for credential errors; surface explicit reasons.
275
+ if status_code == 401:
276
+ return [
277
+ TranscriptionEvent(text="", is_final=True, error="unauthorized")
278
+ ]
279
+ if status_code == 403:
280
+ return [TranscriptionEvent(text="", is_final=True, error="forbidden")]
281
+ if status_code == 400:
282
+ # Usually indicates invalid/unsupported audio format or malformed params.
283
+ return [
284
+ TranscriptionEvent(text="", is_final=True, error="invalid_audio")
285
+ ]
286
+ if status_code == 413:
287
+ return [
288
+ TranscriptionEvent(text="", is_final=True, error="audio_too_large")
289
+ ]
290
+ if status_code == 429:
291
+ return [
292
+ TranscriptionEvent(text="", is_final=True, error="rate_limited")
293
+ ]
294
+ return [TranscriptionEvent(text="", is_final=True, error="provider_error")]
295
+ finally:
296
+ # Release buffered bytes to avoid accidental reuse.
297
+ self._chunks = []
298
+
299
+ def abort(self, reason: Optional[str] = None) -> None:
300
+ self._aborted = True
301
+ self._chunks = []
302
+ if reason:
303
+ self._logger.info("OpenAI Whisper stream aborted: %s", reason)
304
+
305
+ def _build_payload(self) -> Dict[str, Any]:
306
+ base_url = self._settings.base_url or "https://api.openai.com"
307
+ payload = {
308
+ "api_key": self._api_key,
309
+ "base_url": base_url,
310
+ "model": self._settings.model,
311
+ "temperature": self._settings.temperature,
312
+ "language": self._settings.language or self._session.language,
313
+ "timeout_s": self._settings.timeout_s,
314
+ }
315
+ if self._session.filename:
316
+ payload["filename"] = self._session.filename
317
+ if self._session.content_type:
318
+ payload["content_type"] = self._session.content_type
319
+
320
+ if not self._settings.redact_request:
321
+ payload.update(
322
+ {
323
+ "client": self._session.client,
324
+ "session_id": self._session.session_id,
325
+ }
326
+ )
327
+ return payload
328
+
329
+
330
+ def build_speech_provider(
331
+ config: Mapping[str, Any],
332
+ warn_on_remote_api: bool = True,
333
+ env: Optional[Mapping[str, str]] = None,
334
+ logger: Optional[logging.Logger] = None,
335
+ ) -> OpenAIWhisperProvider:
336
+ """
337
+ Factory used by voice resolver to construct the Whisper provider from config mappings.
338
+ """
339
+ settings = OpenAIWhisperSettings.from_mapping(config)
340
+ return OpenAIWhisperProvider(
341
+ settings=settings,
342
+ env=env,
343
+ warn_on_remote_api=warn_on_remote_api,
344
+ logger=logger,
345
+ )
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from typing import Mapping, Optional
6
+
7
+ from .config import VoiceConfig
8
+ from .provider import SpeechProvider
9
+ from .providers import OpenAIWhisperProvider, build_speech_provider
10
+
11
+
12
+ def resolve_speech_provider(
13
+ voice_config: VoiceConfig,
14
+ logger: Optional[logging.Logger] = None,
15
+ env: Optional[Mapping[str, str]] = None,
16
+ ) -> SpeechProvider:
17
+ """
18
+ Resolve the configured speech provider. Raises when disabled or unknown.
19
+ """
20
+ if not voice_config.enabled:
21
+ raise ValueError("Voice features are disabled in config")
22
+
23
+ provider_name = voice_config.provider
24
+ provider_configs = voice_config.providers or {}
25
+ if not provider_name:
26
+ raise ValueError("No voice provider configured")
27
+
28
+ if provider_name == OpenAIWhisperProvider.name:
29
+ return build_speech_provider(
30
+ provider_configs.get(provider_name, {}),
31
+ warn_on_remote_api=voice_config.warn_on_remote_api,
32
+ env=env or os.environ,
33
+ logger=logger,
34
+ )
35
+
36
+ raise ValueError(f"Unsupported voice provider '{provider_name}'")
@@ -0,0 +1,210 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import uuid
6
+ from typing import Callable, Optional
7
+
8
+ from .capture import CaptureCallbacks, CaptureState, PushToTalkCapture
9
+ from .config import VoiceConfig
10
+ from .provider import SpeechSessionMetadata
11
+ from .resolver import resolve_speech_provider
12
+
13
+
14
+ class VoiceServiceError(Exception):
15
+ """Raised when voice transcription fails at the service boundary."""
16
+
17
+ def __init__(self, reason: str, detail: str):
18
+ super().__init__(detail)
19
+ self.reason = reason
20
+ self.detail = detail
21
+
22
+
23
+ class VoiceService:
24
+ """
25
+ Thin wrapper that wires the shared PushToTalkCapture into HTTP handlers.
26
+ This keeps raw audio in-memory only and centralizes provider wiring/error mapping.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ config: VoiceConfig,
32
+ logger: Optional[logging.Logger] = None,
33
+ provider_resolver: Callable[[VoiceConfig], object] = resolve_speech_provider,
34
+ provider: Optional[object] = None,
35
+ env: Optional[dict] = None,
36
+ ) -> None:
37
+ self.config = config
38
+ self._logger = logger or logging.getLogger(__name__)
39
+ self._provider_resolver = provider_resolver
40
+ self._provider = provider
41
+ self._env = env if env is not None else os.environ
42
+
43
+ def config_payload(self) -> dict:
44
+ """Expose safe config fields to the UI."""
45
+ # Check if API key is configured for status display
46
+ provider_cfg = self.config.providers.get(
47
+ self.config.provider or "openai_whisper", {}
48
+ )
49
+ api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
50
+ has_api_key = bool(self._env.get(api_key_env))
51
+
52
+ return {
53
+ "enabled": self.config.enabled,
54
+ "provider": self.config.provider,
55
+ "latency_mode": self.config.latency_mode,
56
+ "chunk_ms": self.config.chunk_ms,
57
+ "sample_rate": self.config.sample_rate,
58
+ "warn_on_remote_api": self.config.warn_on_remote_api,
59
+ "has_api_key": has_api_key,
60
+ "api_key_env": api_key_env,
61
+ "push_to_talk": {
62
+ "max_ms": self.config.push_to_talk.max_ms,
63
+ "silence_auto_stop_ms": self.config.push_to_talk.silence_auto_stop_ms,
64
+ "min_hold_ms": self.config.push_to_talk.min_hold_ms,
65
+ },
66
+ }
67
+
68
+ def transcribe(
69
+ self,
70
+ audio_bytes: bytes,
71
+ *,
72
+ client: str = "web",
73
+ user_agent: Optional[str] = None,
74
+ language: Optional[str] = None,
75
+ filename: Optional[str] = None,
76
+ content_type: Optional[str] = None,
77
+ ) -> dict:
78
+ if not self.config.enabled:
79
+ raise VoiceServiceError("disabled", "Voice is disabled")
80
+ if not audio_bytes:
81
+ raise VoiceServiceError("empty_audio", "No audio received")
82
+
83
+ provider = self._resolve_provider()
84
+ buffer = _TranscriptionBuffer()
85
+ capture = PushToTalkCapture(
86
+ provider=provider,
87
+ config=self.config,
88
+ callbacks=buffer.callbacks,
89
+ permission_requester=lambda: True,
90
+ client=client,
91
+ logger=self._logger,
92
+ session_builder=lambda: self._build_session_metadata(
93
+ provider_name=provider.name,
94
+ language=language,
95
+ client=client,
96
+ user_agent=user_agent,
97
+ filename=filename,
98
+ content_type=content_type,
99
+ ),
100
+ )
101
+
102
+ capture.begin_capture()
103
+ if capture.state == CaptureState.ERROR:
104
+ reason = buffer.error_reason or "capture_failed"
105
+ raise VoiceServiceError(reason, reason.replace("_", " "))
106
+
107
+ try:
108
+ capture.handle_chunk(audio_bytes)
109
+ capture.end_capture("client_stop")
110
+ except Exception as exc:
111
+ raise VoiceServiceError("provider_error", str(exc)) from exc
112
+
113
+ if buffer.error_reason:
114
+ if buffer.error_reason in ("unauthorized", "forbidden"):
115
+ provider_cfg = self.config.providers.get(
116
+ self.config.provider or "openai_whisper", {}
117
+ )
118
+ api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
119
+ raise VoiceServiceError(
120
+ buffer.error_reason,
121
+ f"OpenAI API key rejected ({buffer.error_reason}); check {api_key_env}",
122
+ )
123
+ if buffer.error_reason == "invalid_audio":
124
+ meta = ""
125
+ if filename or content_type:
126
+ meta = f" (file={filename or 'audio'}, type={content_type or 'unknown'})"
127
+ raise VoiceServiceError(
128
+ "invalid_audio",
129
+ "OpenAI rejected the audio upload (bad request). "
130
+ f"Try re-recording or switching formats/browsers{meta}.",
131
+ )
132
+ if buffer.error_reason == "audio_too_large":
133
+ raise VoiceServiceError(
134
+ "audio_too_large",
135
+ "Audio upload too large; record a shorter clip and try again.",
136
+ )
137
+ if buffer.error_reason == "rate_limited":
138
+ raise VoiceServiceError(
139
+ "rate_limited",
140
+ "OpenAI rate limited the request; wait a moment and try again.",
141
+ )
142
+ raise VoiceServiceError(
143
+ buffer.error_reason, buffer.error_reason.replace("_", " ")
144
+ )
145
+
146
+ transcript = buffer.final_text or buffer.partial_text or ""
147
+ return {
148
+ "text": transcript,
149
+ "warnings": buffer.warnings,
150
+ }
151
+
152
+ def _resolve_provider(self):
153
+ if self._provider is None:
154
+ try:
155
+ self._provider = self._provider_resolver(
156
+ self.config, logger=self._logger
157
+ )
158
+ except TypeError:
159
+ self._provider = self._provider_resolver(self.config)
160
+ return self._provider
161
+
162
+ def _build_session_metadata(
163
+ self,
164
+ *,
165
+ provider_name: str,
166
+ language: Optional[str],
167
+ client: Optional[str],
168
+ user_agent: Optional[str],
169
+ filename: Optional[str] = None,
170
+ content_type: Optional[str] = None,
171
+ ) -> SpeechSessionMetadata:
172
+ return SpeechSessionMetadata(
173
+ session_id=str(uuid.uuid4()),
174
+ provider=provider_name,
175
+ latency_mode=self.config.latency_mode,
176
+ language=language,
177
+ client=client,
178
+ user_agent=user_agent,
179
+ filename=filename,
180
+ content_type=content_type,
181
+ )
182
+
183
+
184
+ class _TranscriptionBuffer:
185
+ def __init__(self) -> None:
186
+ self.partial_text = ""
187
+ self.final_text = ""
188
+ self.warnings: list[str] = []
189
+ self.error_reason: Optional[str] = None
190
+ self.callbacks = CaptureCallbacks(
191
+ on_partial=self._on_partial,
192
+ on_final=self._on_final,
193
+ on_warning=self._on_warning,
194
+ on_error=self._on_error,
195
+ )
196
+
197
+ def _on_partial(self, text: str) -> None:
198
+ if text:
199
+ self.partial_text = text
200
+
201
+ def _on_final(self, text: str) -> None:
202
+ if text:
203
+ self.final_text = text
204
+
205
+ def _on_warning(self, message: str) -> None:
206
+ if message:
207
+ self.warnings.append(message)
208
+
209
+ def _on_error(self, reason: str) -> None:
210
+ self.error_reason = reason
@@ -0,0 +1 @@
1
+ """Web server components."""