codex-autorunner 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codex_autorunner/__init__.py +3 -0
- codex_autorunner/bootstrap.py +151 -0
- codex_autorunner/cli.py +886 -0
- codex_autorunner/codex_cli.py +79 -0
- codex_autorunner/codex_runner.py +17 -0
- codex_autorunner/core/__init__.py +1 -0
- codex_autorunner/core/about_car.py +125 -0
- codex_autorunner/core/codex_runner.py +100 -0
- codex_autorunner/core/config.py +1465 -0
- codex_autorunner/core/doc_chat.py +547 -0
- codex_autorunner/core/docs.py +37 -0
- codex_autorunner/core/engine.py +720 -0
- codex_autorunner/core/git_utils.py +206 -0
- codex_autorunner/core/hub.py +756 -0
- codex_autorunner/core/injected_context.py +9 -0
- codex_autorunner/core/locks.py +57 -0
- codex_autorunner/core/logging_utils.py +158 -0
- codex_autorunner/core/notifications.py +465 -0
- codex_autorunner/core/optional_dependencies.py +41 -0
- codex_autorunner/core/prompt.py +107 -0
- codex_autorunner/core/prompts.py +275 -0
- codex_autorunner/core/request_context.py +21 -0
- codex_autorunner/core/runner_controller.py +116 -0
- codex_autorunner/core/runner_process.py +29 -0
- codex_autorunner/core/snapshot.py +576 -0
- codex_autorunner/core/state.py +156 -0
- codex_autorunner/core/update.py +567 -0
- codex_autorunner/core/update_runner.py +44 -0
- codex_autorunner/core/usage.py +1221 -0
- codex_autorunner/core/utils.py +108 -0
- codex_autorunner/discovery.py +102 -0
- codex_autorunner/housekeeping.py +423 -0
- codex_autorunner/integrations/__init__.py +1 -0
- codex_autorunner/integrations/app_server/__init__.py +6 -0
- codex_autorunner/integrations/app_server/client.py +1386 -0
- codex_autorunner/integrations/app_server/supervisor.py +206 -0
- codex_autorunner/integrations/github/__init__.py +10 -0
- codex_autorunner/integrations/github/service.py +889 -0
- codex_autorunner/integrations/telegram/__init__.py +1 -0
- codex_autorunner/integrations/telegram/adapter.py +1401 -0
- codex_autorunner/integrations/telegram/commands_registry.py +104 -0
- codex_autorunner/integrations/telegram/config.py +450 -0
- codex_autorunner/integrations/telegram/constants.py +154 -0
- codex_autorunner/integrations/telegram/dispatch.py +162 -0
- codex_autorunner/integrations/telegram/handlers/__init__.py +0 -0
- codex_autorunner/integrations/telegram/handlers/approvals.py +241 -0
- codex_autorunner/integrations/telegram/handlers/callbacks.py +72 -0
- codex_autorunner/integrations/telegram/handlers/commands.py +160 -0
- codex_autorunner/integrations/telegram/handlers/commands_runtime.py +5262 -0
- codex_autorunner/integrations/telegram/handlers/messages.py +477 -0
- codex_autorunner/integrations/telegram/handlers/selections.py +545 -0
- codex_autorunner/integrations/telegram/helpers.py +2084 -0
- codex_autorunner/integrations/telegram/notifications.py +164 -0
- codex_autorunner/integrations/telegram/outbox.py +174 -0
- codex_autorunner/integrations/telegram/rendering.py +102 -0
- codex_autorunner/integrations/telegram/retry.py +37 -0
- codex_autorunner/integrations/telegram/runtime.py +270 -0
- codex_autorunner/integrations/telegram/service.py +921 -0
- codex_autorunner/integrations/telegram/state.py +1223 -0
- codex_autorunner/integrations/telegram/transport.py +318 -0
- codex_autorunner/integrations/telegram/types.py +57 -0
- codex_autorunner/integrations/telegram/voice.py +413 -0
- codex_autorunner/manifest.py +150 -0
- codex_autorunner/routes/__init__.py +53 -0
- codex_autorunner/routes/base.py +470 -0
- codex_autorunner/routes/docs.py +275 -0
- codex_autorunner/routes/github.py +197 -0
- codex_autorunner/routes/repos.py +121 -0
- codex_autorunner/routes/sessions.py +137 -0
- codex_autorunner/routes/shared.py +137 -0
- codex_autorunner/routes/system.py +175 -0
- codex_autorunner/routes/terminal_images.py +107 -0
- codex_autorunner/routes/voice.py +128 -0
- codex_autorunner/server.py +23 -0
- codex_autorunner/spec_ingest.py +113 -0
- codex_autorunner/static/app.js +95 -0
- codex_autorunner/static/autoRefresh.js +209 -0
- codex_autorunner/static/bootstrap.js +105 -0
- codex_autorunner/static/bus.js +23 -0
- codex_autorunner/static/cache.js +52 -0
- codex_autorunner/static/constants.js +48 -0
- codex_autorunner/static/dashboard.js +795 -0
- codex_autorunner/static/docs.js +1514 -0
- codex_autorunner/static/env.js +99 -0
- codex_autorunner/static/github.js +168 -0
- codex_autorunner/static/hub.js +1511 -0
- codex_autorunner/static/index.html +622 -0
- codex_autorunner/static/loader.js +28 -0
- codex_autorunner/static/logs.js +690 -0
- codex_autorunner/static/mobileCompact.js +300 -0
- codex_autorunner/static/snapshot.js +116 -0
- codex_autorunner/static/state.js +87 -0
- codex_autorunner/static/styles.css +4966 -0
- codex_autorunner/static/tabs.js +50 -0
- codex_autorunner/static/terminal.js +21 -0
- codex_autorunner/static/terminalManager.js +3535 -0
- codex_autorunner/static/todoPreview.js +25 -0
- codex_autorunner/static/types.d.ts +8 -0
- codex_autorunner/static/utils.js +597 -0
- codex_autorunner/static/vendor/LICENSE.xterm +24 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-greek.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-vietnamese.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-greek.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-vietnamese.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-greek.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin-ext.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-vietnamese.woff2 +0 -0
- codex_autorunner/static/vendor/fonts/jetbrains-mono/OFL.txt +93 -0
- codex_autorunner/static/vendor/xterm-addon-fit.js +2 -0
- codex_autorunner/static/vendor/xterm.css +209 -0
- codex_autorunner/static/vendor/xterm.js +2 -0
- codex_autorunner/static/voice.js +591 -0
- codex_autorunner/voice/__init__.py +39 -0
- codex_autorunner/voice/capture.py +349 -0
- codex_autorunner/voice/config.py +167 -0
- codex_autorunner/voice/provider.py +66 -0
- codex_autorunner/voice/providers/__init__.py +7 -0
- codex_autorunner/voice/providers/openai_whisper.py +345 -0
- codex_autorunner/voice/resolver.py +36 -0
- codex_autorunner/voice/service.py +210 -0
- codex_autorunner/web/__init__.py +1 -0
- codex_autorunner/web/app.py +1037 -0
- codex_autorunner/web/hub_jobs.py +181 -0
- codex_autorunner/web/middleware.py +552 -0
- codex_autorunner/web/pty_session.py +357 -0
- codex_autorunner/web/runner_manager.py +25 -0
- codex_autorunner/web/schemas.py +253 -0
- codex_autorunner/web/static_assets.py +430 -0
- codex_autorunner/web/terminal_sessions.py +78 -0
- codex_autorunner/workspace.py +16 -0
- codex_autorunner-0.1.0.dist-info/METADATA +240 -0
- codex_autorunner-0.1.0.dist-info/RECORD +147 -0
- codex_autorunner-0.1.0.dist-info/WHEEL +5 -0
- codex_autorunner-0.1.0.dist-info/entry_points.txt +3 -0
- codex_autorunner-0.1.0.dist-info/licenses/LICENSE +21 -0
- codex_autorunner-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Callable, Iterable, Optional, Protocol
|
|
9
|
+
|
|
10
|
+
from .config import VoiceConfig
|
|
11
|
+
from .provider import (
|
|
12
|
+
AudioChunk,
|
|
13
|
+
SpeechProvider,
|
|
14
|
+
SpeechSessionMetadata,
|
|
15
|
+
TranscriptionEvent,
|
|
16
|
+
TranscriptionStream,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CaptureState(str, Enum):
|
|
21
|
+
IDLE = "idle"
|
|
22
|
+
AWAITING_PERMISSION = "awaiting_permission"
|
|
23
|
+
RECORDING = "recording"
|
|
24
|
+
STREAMING = "streaming"
|
|
25
|
+
FINALIZING = "finalizing"
|
|
26
|
+
ERROR = "error"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclasses.dataclass
|
|
30
|
+
class CaptureCallbacks:
|
|
31
|
+
on_state: Optional[Callable[[CaptureState], None]] = None
|
|
32
|
+
on_partial: Optional[Callable[[str], None]] = None
|
|
33
|
+
on_final: Optional[Callable[[str], None]] = None
|
|
34
|
+
on_error: Optional[Callable[[str], None]] = None
|
|
35
|
+
on_warning: Optional[Callable[[str], None]] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class VoiceCaptureSession(Protocol):
|
|
39
|
+
"""
|
|
40
|
+
Push-to-talk lifecycle contract shared by web and TUI surfaces.
|
|
41
|
+
|
|
42
|
+
Implementations should be thin wrappers around platform-specific recorders.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def request_permission(self) -> None:
|
|
46
|
+
"""Prompt for microphone permission if needed."""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
def begin_capture(self) -> None:
|
|
50
|
+
"""Transition to recording and prepare buffers."""
|
|
51
|
+
...
|
|
52
|
+
|
|
53
|
+
def handle_chunk(self, data: bytes) -> None:
|
|
54
|
+
"""Accept raw PCM/encoded chunk and forward to the provider stream."""
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
def end_capture(self, reason: Optional[str] = None) -> None:
|
|
58
|
+
"""Stop recording and flush final transcription."""
|
|
59
|
+
...
|
|
60
|
+
|
|
61
|
+
def fail(self, reason: str) -> None:
|
|
62
|
+
"""Force-fail the session and surface the reason to the UI."""
|
|
63
|
+
...
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class PushToTalkCapture(VoiceCaptureSession):
|
|
67
|
+
"""
|
|
68
|
+
Cross-platform push-to-talk controller that sits between UI recorders and a SpeechProvider.
|
|
69
|
+
|
|
70
|
+
This keeps raw audio in-memory only and exposes explicit states so both TUI and web can
|
|
71
|
+
render consistent UX.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
provider: SpeechProvider,
|
|
77
|
+
config: VoiceConfig,
|
|
78
|
+
callbacks: Optional[CaptureCallbacks] = None,
|
|
79
|
+
permission_requester: Optional[Callable[[], bool]] = None,
|
|
80
|
+
client: Optional[str] = None,
|
|
81
|
+
logger: Optional[logging.Logger] = None,
|
|
82
|
+
now_fn: Callable[[], float] = time.monotonic,
|
|
83
|
+
max_retries: int = 1,
|
|
84
|
+
session_builder: Optional[Callable[[], SpeechSessionMetadata]] = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
self._provider = provider
|
|
87
|
+
self._config = config
|
|
88
|
+
self._callbacks = callbacks or CaptureCallbacks()
|
|
89
|
+
self._permission_requester = permission_requester or (lambda: True)
|
|
90
|
+
self._client = client
|
|
91
|
+
self._logger = logger or logging.getLogger(__name__)
|
|
92
|
+
self._now = now_fn
|
|
93
|
+
self._max_retries = max_retries
|
|
94
|
+
self._session_builder = session_builder
|
|
95
|
+
|
|
96
|
+
self._state: CaptureState = CaptureState.IDLE
|
|
97
|
+
self._permission_granted = False
|
|
98
|
+
self._stream: Optional[TranscriptionStream] = None
|
|
99
|
+
self._retry_attempts = 0
|
|
100
|
+
self._chunks: list[AudioChunk] = []
|
|
101
|
+
self._sequence = 0
|
|
102
|
+
self._started_at: Optional[float] = None
|
|
103
|
+
self._last_chunk_at: Optional[float] = None
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def state(self) -> CaptureState:
|
|
107
|
+
return self._state
|
|
108
|
+
|
|
109
|
+
def request_permission(self) -> None:
|
|
110
|
+
if self._state not in (CaptureState.IDLE, CaptureState.ERROR):
|
|
111
|
+
return
|
|
112
|
+
self._emit_state(CaptureState.AWAITING_PERMISSION)
|
|
113
|
+
try:
|
|
114
|
+
granted = bool(self._permission_requester())
|
|
115
|
+
except Exception as exc:
|
|
116
|
+
self.fail("permission_error")
|
|
117
|
+
self._logger.error(
|
|
118
|
+
"Microphone permission request failed: %s", exc, exc_info=False
|
|
119
|
+
)
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
if not granted:
|
|
123
|
+
self.fail("permission_denied")
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
self._permission_granted = True
|
|
127
|
+
self._emit_state(CaptureState.IDLE)
|
|
128
|
+
|
|
129
|
+
def begin_capture(self) -> None:
|
|
130
|
+
if not self._permission_granted:
|
|
131
|
+
self.request_permission()
|
|
132
|
+
if not self._permission_granted:
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
if self._state in (
|
|
136
|
+
CaptureState.RECORDING,
|
|
137
|
+
CaptureState.STREAMING,
|
|
138
|
+
CaptureState.FINALIZING,
|
|
139
|
+
):
|
|
140
|
+
self.fail("already_recording")
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
stream = self._provider.start_stream(self._build_session_metadata())
|
|
145
|
+
self._stream = stream
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
self.fail("provider_error")
|
|
148
|
+
self._logger.error(
|
|
149
|
+
"Failed to start transcription stream: %s", exc, exc_info=False
|
|
150
|
+
)
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
now = self._now()
|
|
154
|
+
self._started_at = now
|
|
155
|
+
self._last_chunk_at = now
|
|
156
|
+
self._sequence = 0
|
|
157
|
+
self._retry_attempts = 0
|
|
158
|
+
self._chunks = []
|
|
159
|
+
self._emit_state(CaptureState.RECORDING)
|
|
160
|
+
|
|
161
|
+
def handle_chunk(self, data: bytes) -> None:
|
|
162
|
+
if self._stream is None:
|
|
163
|
+
self.fail("not_started")
|
|
164
|
+
return
|
|
165
|
+
if self._state not in (CaptureState.RECORDING, CaptureState.STREAMING):
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
chunk = AudioChunk(
|
|
169
|
+
data=data,
|
|
170
|
+
sample_rate=self._config.sample_rate,
|
|
171
|
+
start_ms=self._sequence * self._config.chunk_ms,
|
|
172
|
+
end_ms=(self._sequence + 1) * self._config.chunk_ms,
|
|
173
|
+
sequence=self._sequence,
|
|
174
|
+
)
|
|
175
|
+
self._chunks.append(chunk)
|
|
176
|
+
self._sequence += 1
|
|
177
|
+
self._last_chunk_at = self._now()
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
events = self._stream.send_chunk(chunk)
|
|
181
|
+
self._emit_state(CaptureState.STREAMING)
|
|
182
|
+
self._handle_events(events)
|
|
183
|
+
except Exception as exc:
|
|
184
|
+
self._logger.warning(
|
|
185
|
+
"Transcription chunk failed; will retry if allowed: %s",
|
|
186
|
+
exc,
|
|
187
|
+
exc_info=False,
|
|
188
|
+
)
|
|
189
|
+
if not self._fail_with_retry("provider_error"):
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
self._check_timeouts()
|
|
193
|
+
|
|
194
|
+
def tick(self) -> None:
|
|
195
|
+
"""
|
|
196
|
+
Allows hosts to poll for silence/timeout without spawning timers.
|
|
197
|
+
Call from UI loops to auto-stop after silence or max duration.
|
|
198
|
+
"""
|
|
199
|
+
self._check_timeouts()
|
|
200
|
+
|
|
201
|
+
def end_capture(self, reason: Optional[str] = None) -> None:
|
|
202
|
+
if self._stream is None:
|
|
203
|
+
self._emit_state(CaptureState.IDLE)
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
while True:
|
|
207
|
+
self._emit_state(CaptureState.FINALIZING)
|
|
208
|
+
prior_retries = self._retry_attempts
|
|
209
|
+
try:
|
|
210
|
+
events = self._stream.flush_final()
|
|
211
|
+
self._handle_events(events)
|
|
212
|
+
except Exception as exc:
|
|
213
|
+
self._logger.error(
|
|
214
|
+
"Final transcription flush failed: %s", exc, exc_info=False
|
|
215
|
+
)
|
|
216
|
+
if self._fail_with_retry("provider_error"):
|
|
217
|
+
continue
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
# If _handle_events triggered a retry due to an error event, we restarted the
|
|
221
|
+
# stream and replayed chunks. We must attempt the final flush again on the
|
|
222
|
+
# restarted stream, otherwise transcription will never be produced.
|
|
223
|
+
if self._state == CaptureState.ERROR:
|
|
224
|
+
return
|
|
225
|
+
if self._retry_attempts > prior_retries:
|
|
226
|
+
continue
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
self._reset()
|
|
230
|
+
self._emit_state(CaptureState.IDLE)
|
|
231
|
+
|
|
232
|
+
def fail(self, reason: str) -> None:
|
|
233
|
+
if self._stream is not None:
|
|
234
|
+
try:
|
|
235
|
+
self._stream.abort(reason)
|
|
236
|
+
except Exception:
|
|
237
|
+
# Abort failures should not mask the root cause.
|
|
238
|
+
pass
|
|
239
|
+
self._reset()
|
|
240
|
+
self._emit_error(reason)
|
|
241
|
+
self._emit_state(CaptureState.ERROR)
|
|
242
|
+
|
|
243
|
+
def _build_session_metadata(self) -> SpeechSessionMetadata:
|
|
244
|
+
if self._session_builder:
|
|
245
|
+
return self._session_builder()
|
|
246
|
+
return SpeechSessionMetadata(
|
|
247
|
+
session_id=str(uuid.uuid4()),
|
|
248
|
+
provider=self._provider.name,
|
|
249
|
+
latency_mode=self._config.latency_mode,
|
|
250
|
+
client=self._client,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def _handle_events(self, events: Iterable[TranscriptionEvent]) -> None:
|
|
254
|
+
for event in events:
|
|
255
|
+
if event.error:
|
|
256
|
+
if not self._fail_with_retry(event.error):
|
|
257
|
+
return
|
|
258
|
+
continue
|
|
259
|
+
if event.is_final:
|
|
260
|
+
if event.text:
|
|
261
|
+
self._emit_final(event.text)
|
|
262
|
+
else:
|
|
263
|
+
if event.text:
|
|
264
|
+
self._emit_partial(event.text)
|
|
265
|
+
|
|
266
|
+
def _emit_state(self, state: CaptureState) -> None:
|
|
267
|
+
if state == self._state:
|
|
268
|
+
return
|
|
269
|
+
self._state = state
|
|
270
|
+
if self._callbacks.on_state:
|
|
271
|
+
self._callbacks.on_state(state)
|
|
272
|
+
|
|
273
|
+
def _emit_partial(self, text: str) -> None:
|
|
274
|
+
if self._callbacks.on_partial:
|
|
275
|
+
self._callbacks.on_partial(text)
|
|
276
|
+
|
|
277
|
+
def _emit_final(self, text: str) -> None:
|
|
278
|
+
if self._callbacks.on_final:
|
|
279
|
+
self._callbacks.on_final(text)
|
|
280
|
+
|
|
281
|
+
def _emit_error(self, reason: str) -> None:
|
|
282
|
+
if self._callbacks.on_error:
|
|
283
|
+
self._callbacks.on_error(reason)
|
|
284
|
+
|
|
285
|
+
def _emit_warning(self, message: str) -> None:
|
|
286
|
+
if self._callbacks.on_warning:
|
|
287
|
+
self._callbacks.on_warning(message)
|
|
288
|
+
|
|
289
|
+
def _check_timeouts(self) -> None:
|
|
290
|
+
if self._state not in (CaptureState.RECORDING, CaptureState.STREAMING):
|
|
291
|
+
return
|
|
292
|
+
now = self._now()
|
|
293
|
+
if (
|
|
294
|
+
self._started_at is not None
|
|
295
|
+
and (now - self._started_at) * 1000 >= self._config.push_to_talk.max_ms
|
|
296
|
+
):
|
|
297
|
+
self.end_capture("max_duration")
|
|
298
|
+
return
|
|
299
|
+
if (
|
|
300
|
+
self._last_chunk_at is not None
|
|
301
|
+
and (now - self._last_chunk_at) * 1000
|
|
302
|
+
>= self._config.push_to_talk.silence_auto_stop_ms
|
|
303
|
+
):
|
|
304
|
+
self.end_capture("silence")
|
|
305
|
+
|
|
306
|
+
def _fail_with_retry(self, reason: str) -> bool:
|
|
307
|
+
if reason in (
|
|
308
|
+
"unauthorized",
|
|
309
|
+
"forbidden",
|
|
310
|
+
"invalid_audio",
|
|
311
|
+
"audio_too_large",
|
|
312
|
+
"rate_limited",
|
|
313
|
+
):
|
|
314
|
+
self.fail(reason)
|
|
315
|
+
return False
|
|
316
|
+
if self._retry_attempts >= self._max_retries:
|
|
317
|
+
self.fail(reason)
|
|
318
|
+
return False
|
|
319
|
+
|
|
320
|
+
self._retry_attempts += 1
|
|
321
|
+
self._emit_warning(f"{reason}_retry")
|
|
322
|
+
try:
|
|
323
|
+
self._restart_stream()
|
|
324
|
+
return True
|
|
325
|
+
except Exception as exc:
|
|
326
|
+
self._logger.error(
|
|
327
|
+
"Retrying transcription stream failed: %s", exc, exc_info=False
|
|
328
|
+
)
|
|
329
|
+
self.fail(reason)
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
def _restart_stream(self) -> None:
|
|
333
|
+
stream = self._provider.start_stream(self._build_session_metadata())
|
|
334
|
+
self._stream = stream
|
|
335
|
+
replayed_state = (
|
|
336
|
+
CaptureState.RECORDING if not self._chunks else CaptureState.STREAMING
|
|
337
|
+
)
|
|
338
|
+
for chunk in self._chunks:
|
|
339
|
+
events = stream.send_chunk(chunk)
|
|
340
|
+
self._handle_events(events)
|
|
341
|
+
self._emit_state(replayed_state)
|
|
342
|
+
self._last_chunk_at = self._now()
|
|
343
|
+
|
|
344
|
+
def _reset(self) -> None:
|
|
345
|
+
self._stream = None
|
|
346
|
+
self._chunks = []
|
|
347
|
+
self._sequence = 0
|
|
348
|
+
self._started_at = None
|
|
349
|
+
self._last_chunk_at = None
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Dict, Mapping, MutableMapping, Optional
|
|
6
|
+
|
|
7
|
+
LatencyMode = str # Alias to keep config typed without importing Literal everywhere
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
DEFAULT_PROVIDER_CONFIG: Dict[str, Dict[str, Any]] = {
|
|
11
|
+
"openai_whisper": {
|
|
12
|
+
"api_key_env": "OPENAI_API_KEY",
|
|
13
|
+
"model": "whisper-1",
|
|
14
|
+
"base_url": None,
|
|
15
|
+
"temperature": 0,
|
|
16
|
+
"language": None,
|
|
17
|
+
"redact_request": True,
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class PushToTalkConfig:
|
|
24
|
+
max_ms: int = 15_000
|
|
25
|
+
silence_auto_stop_ms: int = 1_200
|
|
26
|
+
min_hold_ms: int = 150
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclasses.dataclass
|
|
30
|
+
class VoiceConfig:
|
|
31
|
+
enabled: bool
|
|
32
|
+
provider: Optional[str]
|
|
33
|
+
latency_mode: LatencyMode
|
|
34
|
+
chunk_ms: int
|
|
35
|
+
sample_rate: int
|
|
36
|
+
warn_on_remote_api: bool
|
|
37
|
+
push_to_talk: PushToTalkConfig
|
|
38
|
+
providers: Dict[str, Dict[str, Any]]
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_raw(
|
|
42
|
+
cls,
|
|
43
|
+
raw: Optional[Mapping[str, Any]],
|
|
44
|
+
env: Optional[Mapping[str, str]] = None,
|
|
45
|
+
) -> "VoiceConfig":
|
|
46
|
+
"""
|
|
47
|
+
Build a normalized VoiceConfig from config.yml voice section and env overrides.
|
|
48
|
+
This does not touch global config to keep voice optional until integrated.
|
|
49
|
+
"""
|
|
50
|
+
env = env or os.environ
|
|
51
|
+
merged: MutableMapping[str, Any] = {
|
|
52
|
+
"enabled": False,
|
|
53
|
+
"provider": "openai_whisper",
|
|
54
|
+
"latency_mode": "balanced",
|
|
55
|
+
"chunk_ms": 600,
|
|
56
|
+
"sample_rate": 16_000,
|
|
57
|
+
"warn_on_remote_api": False,
|
|
58
|
+
"push_to_talk": {
|
|
59
|
+
"max_ms": 15_000,
|
|
60
|
+
"silence_auto_stop_ms": 1_200,
|
|
61
|
+
"min_hold_ms": 150,
|
|
62
|
+
},
|
|
63
|
+
"providers": dict(DEFAULT_PROVIDER_CONFIG),
|
|
64
|
+
}
|
|
65
|
+
if isinstance(raw, Mapping):
|
|
66
|
+
merged.update(raw)
|
|
67
|
+
base_pt = merged.get("push_to_talk")
|
|
68
|
+
pt_defaults: dict[str, Any] = (
|
|
69
|
+
dict(base_pt) if isinstance(base_pt, Mapping) else {}
|
|
70
|
+
)
|
|
71
|
+
pt_overrides_raw = raw.get("push_to_talk")
|
|
72
|
+
pt_overrides: dict[str, Any] = (
|
|
73
|
+
dict(pt_overrides_raw) if isinstance(pt_overrides_raw, Mapping) else {}
|
|
74
|
+
)
|
|
75
|
+
merged["push_to_talk"] = {**pt_defaults, **pt_overrides}
|
|
76
|
+
|
|
77
|
+
providers = merged.get("providers", {})
|
|
78
|
+
merged["providers"] = dict(DEFAULT_PROVIDER_CONFIG)
|
|
79
|
+
if isinstance(providers, Mapping):
|
|
80
|
+
for key, value in providers.items():
|
|
81
|
+
if isinstance(value, Mapping):
|
|
82
|
+
merged["providers"][key] = {
|
|
83
|
+
**merged["providers"].get(key, {}),
|
|
84
|
+
**dict(value),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Auto-enable voice if API key is available (unless explicitly disabled via env/config)
|
|
88
|
+
explicit_enabled = env.get("CODEX_AUTORUNNER_VOICE_ENABLED")
|
|
89
|
+
if explicit_enabled is not None:
|
|
90
|
+
merged["enabled"] = _env_bool(explicit_enabled, merged["enabled"])
|
|
91
|
+
elif not merged.get("enabled"):
|
|
92
|
+
# Auto-enable if the provider's API key is available
|
|
93
|
+
provider_name = env.get(
|
|
94
|
+
"CODEX_AUTORUNNER_VOICE_PROVIDER",
|
|
95
|
+
merged.get("provider", "openai_whisper"),
|
|
96
|
+
)
|
|
97
|
+
provider_cfg = merged.get("providers", {}).get(provider_name, {})
|
|
98
|
+
api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
|
|
99
|
+
if env.get(api_key_env):
|
|
100
|
+
merged["enabled"] = True
|
|
101
|
+
merged["provider"] = env.get(
|
|
102
|
+
"CODEX_AUTORUNNER_VOICE_PROVIDER", merged.get("provider")
|
|
103
|
+
)
|
|
104
|
+
merged["latency_mode"] = env.get(
|
|
105
|
+
"CODEX_AUTORUNNER_VOICE_LATENCY", merged.get("latency_mode", "balanced")
|
|
106
|
+
)
|
|
107
|
+
merged["chunk_ms"] = _env_int(
|
|
108
|
+
env.get("CODEX_AUTORUNNER_VOICE_CHUNK_MS"), merged["chunk_ms"]
|
|
109
|
+
)
|
|
110
|
+
merged["sample_rate"] = _env_int(
|
|
111
|
+
env.get("CODEX_AUTORUNNER_VOICE_SAMPLE_RATE"), merged["sample_rate"]
|
|
112
|
+
)
|
|
113
|
+
# If API key is already set, don't show the warning popup (user has already configured it)
|
|
114
|
+
explicit_warn = env.get("CODEX_AUTORUNNER_VOICE_WARN_REMOTE")
|
|
115
|
+
if explicit_warn is not None:
|
|
116
|
+
merged["warn_on_remote_api"] = _env_bool(explicit_warn, True)
|
|
117
|
+
else:
|
|
118
|
+
# Auto-disable warning if API key is present (user has intentionally configured it)
|
|
119
|
+
provider_name = merged.get("provider", "openai_whisper")
|
|
120
|
+
provider_cfg = merged.get("providers", {}).get(provider_name, {})
|
|
121
|
+
api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
|
|
122
|
+
if env.get(api_key_env):
|
|
123
|
+
merged["warn_on_remote_api"] = False
|
|
124
|
+
else:
|
|
125
|
+
merged["warn_on_remote_api"] = merged.get("warn_on_remote_api", True)
|
|
126
|
+
|
|
127
|
+
pt = merged.get("push_to_talk", {}) or {}
|
|
128
|
+
push_to_talk = PushToTalkConfig(
|
|
129
|
+
max_ms=_env_int(
|
|
130
|
+
env.get("CODEX_AUTORUNNER_VOICE_MAX_MS"), pt.get("max_ms", 15_000)
|
|
131
|
+
),
|
|
132
|
+
silence_auto_stop_ms=_env_int(
|
|
133
|
+
env.get("CODEX_AUTORUNNER_VOICE_SILENCE_MS"),
|
|
134
|
+
pt.get("silence_auto_stop_ms", 1_200),
|
|
135
|
+
),
|
|
136
|
+
min_hold_ms=_env_int(
|
|
137
|
+
env.get("CODEX_AUTORUNNER_VOICE_MIN_HOLD_MS"),
|
|
138
|
+
pt.get("min_hold_ms", 150),
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
providers = dict(merged.get("providers") or {})
|
|
143
|
+
return cls(
|
|
144
|
+
enabled=bool(merged.get("enabled")),
|
|
145
|
+
provider=merged.get("provider"),
|
|
146
|
+
latency_mode=str(merged.get("latency_mode", "balanced")),
|
|
147
|
+
chunk_ms=int(merged.get("chunk_ms", 600)),
|
|
148
|
+
sample_rate=int(merged.get("sample_rate", 16_000)),
|
|
149
|
+
warn_on_remote_api=bool(merged.get("warn_on_remote_api", True)),
|
|
150
|
+
push_to_talk=push_to_talk,
|
|
151
|
+
providers=providers,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _env_bool(raw: Optional[str], default: bool) -> bool:
|
|
156
|
+
if raw is None:
|
|
157
|
+
return default
|
|
158
|
+
return raw.strip().lower() in ("1", "true", "yes", "on")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _env_int(raw: Optional[str], default: int) -> int:
|
|
162
|
+
if raw is None:
|
|
163
|
+
return default
|
|
164
|
+
try:
|
|
165
|
+
return int(raw.strip())
|
|
166
|
+
except (TypeError, ValueError):
|
|
167
|
+
return default
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
from typing import Iterable, Optional, Protocol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclasses.dataclass
|
|
8
|
+
class SpeechSessionMetadata:
|
|
9
|
+
"""Context passed to providers to keep sessions auditable without leaking audio."""
|
|
10
|
+
|
|
11
|
+
session_id: str
|
|
12
|
+
provider: str
|
|
13
|
+
latency_mode: str
|
|
14
|
+
language: Optional[str] = None
|
|
15
|
+
client: Optional[str] = None # e.g., "web", "tui"
|
|
16
|
+
user_agent: Optional[str] = None
|
|
17
|
+
filename: Optional[str] = None
|
|
18
|
+
content_type: Optional[str] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclasses.dataclass
|
|
22
|
+
class AudioChunk:
|
|
23
|
+
"""
|
|
24
|
+
Representation of an audio chunk pushed into the provider.
|
|
25
|
+
|
|
26
|
+
Only lightweight metadata is stored to avoid persisting raw audio outside memory.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
data: bytes
|
|
30
|
+
sample_rate: int
|
|
31
|
+
start_ms: int
|
|
32
|
+
end_ms: int
|
|
33
|
+
sequence: int
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclasses.dataclass
|
|
37
|
+
class TranscriptionEvent:
|
|
38
|
+
text: str
|
|
39
|
+
is_final: bool
|
|
40
|
+
latency_ms: Optional[int] = None
|
|
41
|
+
error: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TranscriptionStream(Protocol):
|
|
45
|
+
"""Streaming handle for a single push-to-talk session."""
|
|
46
|
+
|
|
47
|
+
def send_chunk(self, chunk: AudioChunk) -> Iterable[TranscriptionEvent]: ...
|
|
48
|
+
|
|
49
|
+
def flush_final(self) -> Iterable[TranscriptionEvent]:
|
|
50
|
+
"""Send end-of-input and return any remaining events."""
|
|
51
|
+
...
|
|
52
|
+
|
|
53
|
+
def abort(self, reason: Optional[str] = None) -> None:
|
|
54
|
+
"""Abort the stream; providers should clean up remote resources."""
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class SpeechProvider(Protocol):
|
|
59
|
+
"""Provider abstraction so TUI and web can share the same transcription backend."""
|
|
60
|
+
|
|
61
|
+
name: str
|
|
62
|
+
supports_streaming: bool
|
|
63
|
+
|
|
64
|
+
def start_stream(self, session: SpeechSessionMetadata) -> TranscriptionStream:
|
|
65
|
+
"""Begin a streaming session for a given request."""
|
|
66
|
+
...
|