codex-autorunner 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. codex_autorunner/__init__.py +3 -0
  2. codex_autorunner/bootstrap.py +151 -0
  3. codex_autorunner/cli.py +886 -0
  4. codex_autorunner/codex_cli.py +79 -0
  5. codex_autorunner/codex_runner.py +17 -0
  6. codex_autorunner/core/__init__.py +1 -0
  7. codex_autorunner/core/about_car.py +125 -0
  8. codex_autorunner/core/codex_runner.py +100 -0
  9. codex_autorunner/core/config.py +1465 -0
  10. codex_autorunner/core/doc_chat.py +547 -0
  11. codex_autorunner/core/docs.py +37 -0
  12. codex_autorunner/core/engine.py +720 -0
  13. codex_autorunner/core/git_utils.py +206 -0
  14. codex_autorunner/core/hub.py +756 -0
  15. codex_autorunner/core/injected_context.py +9 -0
  16. codex_autorunner/core/locks.py +57 -0
  17. codex_autorunner/core/logging_utils.py +158 -0
  18. codex_autorunner/core/notifications.py +465 -0
  19. codex_autorunner/core/optional_dependencies.py +41 -0
  20. codex_autorunner/core/prompt.py +107 -0
  21. codex_autorunner/core/prompts.py +275 -0
  22. codex_autorunner/core/request_context.py +21 -0
  23. codex_autorunner/core/runner_controller.py +116 -0
  24. codex_autorunner/core/runner_process.py +29 -0
  25. codex_autorunner/core/snapshot.py +576 -0
  26. codex_autorunner/core/state.py +156 -0
  27. codex_autorunner/core/update.py +567 -0
  28. codex_autorunner/core/update_runner.py +44 -0
  29. codex_autorunner/core/usage.py +1221 -0
  30. codex_autorunner/core/utils.py +108 -0
  31. codex_autorunner/discovery.py +102 -0
  32. codex_autorunner/housekeeping.py +423 -0
  33. codex_autorunner/integrations/__init__.py +1 -0
  34. codex_autorunner/integrations/app_server/__init__.py +6 -0
  35. codex_autorunner/integrations/app_server/client.py +1386 -0
  36. codex_autorunner/integrations/app_server/supervisor.py +206 -0
  37. codex_autorunner/integrations/github/__init__.py +10 -0
  38. codex_autorunner/integrations/github/service.py +889 -0
  39. codex_autorunner/integrations/telegram/__init__.py +1 -0
  40. codex_autorunner/integrations/telegram/adapter.py +1401 -0
  41. codex_autorunner/integrations/telegram/commands_registry.py +104 -0
  42. codex_autorunner/integrations/telegram/config.py +450 -0
  43. codex_autorunner/integrations/telegram/constants.py +154 -0
  44. codex_autorunner/integrations/telegram/dispatch.py +162 -0
  45. codex_autorunner/integrations/telegram/handlers/__init__.py +0 -0
  46. codex_autorunner/integrations/telegram/handlers/approvals.py +241 -0
  47. codex_autorunner/integrations/telegram/handlers/callbacks.py +72 -0
  48. codex_autorunner/integrations/telegram/handlers/commands.py +160 -0
  49. codex_autorunner/integrations/telegram/handlers/commands_runtime.py +5262 -0
  50. codex_autorunner/integrations/telegram/handlers/messages.py +477 -0
  51. codex_autorunner/integrations/telegram/handlers/selections.py +545 -0
  52. codex_autorunner/integrations/telegram/helpers.py +2084 -0
  53. codex_autorunner/integrations/telegram/notifications.py +164 -0
  54. codex_autorunner/integrations/telegram/outbox.py +174 -0
  55. codex_autorunner/integrations/telegram/rendering.py +102 -0
  56. codex_autorunner/integrations/telegram/retry.py +37 -0
  57. codex_autorunner/integrations/telegram/runtime.py +270 -0
  58. codex_autorunner/integrations/telegram/service.py +921 -0
  59. codex_autorunner/integrations/telegram/state.py +1223 -0
  60. codex_autorunner/integrations/telegram/transport.py +318 -0
  61. codex_autorunner/integrations/telegram/types.py +57 -0
  62. codex_autorunner/integrations/telegram/voice.py +413 -0
  63. codex_autorunner/manifest.py +150 -0
  64. codex_autorunner/routes/__init__.py +53 -0
  65. codex_autorunner/routes/base.py +470 -0
  66. codex_autorunner/routes/docs.py +275 -0
  67. codex_autorunner/routes/github.py +197 -0
  68. codex_autorunner/routes/repos.py +121 -0
  69. codex_autorunner/routes/sessions.py +137 -0
  70. codex_autorunner/routes/shared.py +137 -0
  71. codex_autorunner/routes/system.py +175 -0
  72. codex_autorunner/routes/terminal_images.py +107 -0
  73. codex_autorunner/routes/voice.py +128 -0
  74. codex_autorunner/server.py +23 -0
  75. codex_autorunner/spec_ingest.py +113 -0
  76. codex_autorunner/static/app.js +95 -0
  77. codex_autorunner/static/autoRefresh.js +209 -0
  78. codex_autorunner/static/bootstrap.js +105 -0
  79. codex_autorunner/static/bus.js +23 -0
  80. codex_autorunner/static/cache.js +52 -0
  81. codex_autorunner/static/constants.js +48 -0
  82. codex_autorunner/static/dashboard.js +795 -0
  83. codex_autorunner/static/docs.js +1514 -0
  84. codex_autorunner/static/env.js +99 -0
  85. codex_autorunner/static/github.js +168 -0
  86. codex_autorunner/static/hub.js +1511 -0
  87. codex_autorunner/static/index.html +622 -0
  88. codex_autorunner/static/loader.js +28 -0
  89. codex_autorunner/static/logs.js +690 -0
  90. codex_autorunner/static/mobileCompact.js +300 -0
  91. codex_autorunner/static/snapshot.js +116 -0
  92. codex_autorunner/static/state.js +87 -0
  93. codex_autorunner/static/styles.css +4966 -0
  94. codex_autorunner/static/tabs.js +50 -0
  95. codex_autorunner/static/terminal.js +21 -0
  96. codex_autorunner/static/terminalManager.js +3535 -0
  97. codex_autorunner/static/todoPreview.js +25 -0
  98. codex_autorunner/static/types.d.ts +8 -0
  99. codex_autorunner/static/utils.js +597 -0
  100. codex_autorunner/static/vendor/LICENSE.xterm +24 -0
  101. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic-ext.woff2 +0 -0
  102. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-cyrillic.woff2 +0 -0
  103. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-greek.woff2 +0 -0
  104. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin-ext.woff2 +0 -0
  105. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-latin.woff2 +0 -0
  106. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-400-vietnamese.woff2 +0 -0
  107. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic-ext.woff2 +0 -0
  108. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-cyrillic.woff2 +0 -0
  109. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-greek.woff2 +0 -0
  110. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin-ext.woff2 +0 -0
  111. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-latin.woff2 +0 -0
  112. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-500-vietnamese.woff2 +0 -0
  113. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic-ext.woff2 +0 -0
  114. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-cyrillic.woff2 +0 -0
  115. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-greek.woff2 +0 -0
  116. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin-ext.woff2 +0 -0
  117. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-latin.woff2 +0 -0
  118. codex_autorunner/static/vendor/fonts/jetbrains-mono/JetBrainsMono-600-vietnamese.woff2 +0 -0
  119. codex_autorunner/static/vendor/fonts/jetbrains-mono/OFL.txt +93 -0
  120. codex_autorunner/static/vendor/xterm-addon-fit.js +2 -0
  121. codex_autorunner/static/vendor/xterm.css +209 -0
  122. codex_autorunner/static/vendor/xterm.js +2 -0
  123. codex_autorunner/static/voice.js +591 -0
  124. codex_autorunner/voice/__init__.py +39 -0
  125. codex_autorunner/voice/capture.py +349 -0
  126. codex_autorunner/voice/config.py +167 -0
  127. codex_autorunner/voice/provider.py +66 -0
  128. codex_autorunner/voice/providers/__init__.py +7 -0
  129. codex_autorunner/voice/providers/openai_whisper.py +345 -0
  130. codex_autorunner/voice/resolver.py +36 -0
  131. codex_autorunner/voice/service.py +210 -0
  132. codex_autorunner/web/__init__.py +1 -0
  133. codex_autorunner/web/app.py +1037 -0
  134. codex_autorunner/web/hub_jobs.py +181 -0
  135. codex_autorunner/web/middleware.py +552 -0
  136. codex_autorunner/web/pty_session.py +357 -0
  137. codex_autorunner/web/runner_manager.py +25 -0
  138. codex_autorunner/web/schemas.py +253 -0
  139. codex_autorunner/web/static_assets.py +430 -0
  140. codex_autorunner/web/terminal_sessions.py +78 -0
  141. codex_autorunner/workspace.py +16 -0
  142. codex_autorunner-0.1.0.dist-info/METADATA +240 -0
  143. codex_autorunner-0.1.0.dist-info/RECORD +147 -0
  144. codex_autorunner-0.1.0.dist-info/WHEEL +5 -0
  145. codex_autorunner-0.1.0.dist-info/entry_points.txt +3 -0
  146. codex_autorunner-0.1.0.dist-info/licenses/LICENSE +21 -0
  147. codex_autorunner-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,349 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import logging
5
+ import time
6
+ import uuid
7
+ from enum import Enum
8
+ from typing import Callable, Iterable, Optional, Protocol
9
+
10
+ from .config import VoiceConfig
11
+ from .provider import (
12
+ AudioChunk,
13
+ SpeechProvider,
14
+ SpeechSessionMetadata,
15
+ TranscriptionEvent,
16
+ TranscriptionStream,
17
+ )
18
+
19
+
20
+ class CaptureState(str, Enum):
21
+ IDLE = "idle"
22
+ AWAITING_PERMISSION = "awaiting_permission"
23
+ RECORDING = "recording"
24
+ STREAMING = "streaming"
25
+ FINALIZING = "finalizing"
26
+ ERROR = "error"
27
+
28
+
29
+ @dataclasses.dataclass
30
+ class CaptureCallbacks:
31
+ on_state: Optional[Callable[[CaptureState], None]] = None
32
+ on_partial: Optional[Callable[[str], None]] = None
33
+ on_final: Optional[Callable[[str], None]] = None
34
+ on_error: Optional[Callable[[str], None]] = None
35
+ on_warning: Optional[Callable[[str], None]] = None
36
+
37
+
38
+ class VoiceCaptureSession(Protocol):
39
+ """
40
+ Push-to-talk lifecycle contract shared by web and TUI surfaces.
41
+
42
+ Implementations should be thin wrappers around platform-specific recorders.
43
+ """
44
+
45
+ def request_permission(self) -> None:
46
+ """Prompt for microphone permission if needed."""
47
+ ...
48
+
49
+ def begin_capture(self) -> None:
50
+ """Transition to recording and prepare buffers."""
51
+ ...
52
+
53
+ def handle_chunk(self, data: bytes) -> None:
54
+ """Accept raw PCM/encoded chunk and forward to the provider stream."""
55
+ ...
56
+
57
+ def end_capture(self, reason: Optional[str] = None) -> None:
58
+ """Stop recording and flush final transcription."""
59
+ ...
60
+
61
+ def fail(self, reason: str) -> None:
62
+ """Force-fail the session and surface the reason to the UI."""
63
+ ...
64
+
65
+
66
+ class PushToTalkCapture(VoiceCaptureSession):
67
+ """
68
+ Cross-platform push-to-talk controller that sits between UI recorders and a SpeechProvider.
69
+
70
+ This keeps raw audio in-memory only and exposes explicit states so both TUI and web can
71
+ render consistent UX.
72
+ """
73
+
74
+ def __init__(
75
+ self,
76
+ provider: SpeechProvider,
77
+ config: VoiceConfig,
78
+ callbacks: Optional[CaptureCallbacks] = None,
79
+ permission_requester: Optional[Callable[[], bool]] = None,
80
+ client: Optional[str] = None,
81
+ logger: Optional[logging.Logger] = None,
82
+ now_fn: Callable[[], float] = time.monotonic,
83
+ max_retries: int = 1,
84
+ session_builder: Optional[Callable[[], SpeechSessionMetadata]] = None,
85
+ ) -> None:
86
+ self._provider = provider
87
+ self._config = config
88
+ self._callbacks = callbacks or CaptureCallbacks()
89
+ self._permission_requester = permission_requester or (lambda: True)
90
+ self._client = client
91
+ self._logger = logger or logging.getLogger(__name__)
92
+ self._now = now_fn
93
+ self._max_retries = max_retries
94
+ self._session_builder = session_builder
95
+
96
+ self._state: CaptureState = CaptureState.IDLE
97
+ self._permission_granted = False
98
+ self._stream: Optional[TranscriptionStream] = None
99
+ self._retry_attempts = 0
100
+ self._chunks: list[AudioChunk] = []
101
+ self._sequence = 0
102
+ self._started_at: Optional[float] = None
103
+ self._last_chunk_at: Optional[float] = None
104
+
105
+ @property
106
+ def state(self) -> CaptureState:
107
+ return self._state
108
+
109
+ def request_permission(self) -> None:
110
+ if self._state not in (CaptureState.IDLE, CaptureState.ERROR):
111
+ return
112
+ self._emit_state(CaptureState.AWAITING_PERMISSION)
113
+ try:
114
+ granted = bool(self._permission_requester())
115
+ except Exception as exc:
116
+ self.fail("permission_error")
117
+ self._logger.error(
118
+ "Microphone permission request failed: %s", exc, exc_info=False
119
+ )
120
+ return
121
+
122
+ if not granted:
123
+ self.fail("permission_denied")
124
+ return
125
+
126
+ self._permission_granted = True
127
+ self._emit_state(CaptureState.IDLE)
128
+
129
+ def begin_capture(self) -> None:
130
+ if not self._permission_granted:
131
+ self.request_permission()
132
+ if not self._permission_granted:
133
+ return
134
+
135
+ if self._state in (
136
+ CaptureState.RECORDING,
137
+ CaptureState.STREAMING,
138
+ CaptureState.FINALIZING,
139
+ ):
140
+ self.fail("already_recording")
141
+ return
142
+
143
+ try:
144
+ stream = self._provider.start_stream(self._build_session_metadata())
145
+ self._stream = stream
146
+ except Exception as exc:
147
+ self.fail("provider_error")
148
+ self._logger.error(
149
+ "Failed to start transcription stream: %s", exc, exc_info=False
150
+ )
151
+ return
152
+
153
+ now = self._now()
154
+ self._started_at = now
155
+ self._last_chunk_at = now
156
+ self._sequence = 0
157
+ self._retry_attempts = 0
158
+ self._chunks = []
159
+ self._emit_state(CaptureState.RECORDING)
160
+
161
+ def handle_chunk(self, data: bytes) -> None:
162
+ if self._stream is None:
163
+ self.fail("not_started")
164
+ return
165
+ if self._state not in (CaptureState.RECORDING, CaptureState.STREAMING):
166
+ return
167
+
168
+ chunk = AudioChunk(
169
+ data=data,
170
+ sample_rate=self._config.sample_rate,
171
+ start_ms=self._sequence * self._config.chunk_ms,
172
+ end_ms=(self._sequence + 1) * self._config.chunk_ms,
173
+ sequence=self._sequence,
174
+ )
175
+ self._chunks.append(chunk)
176
+ self._sequence += 1
177
+ self._last_chunk_at = self._now()
178
+
179
+ try:
180
+ events = self._stream.send_chunk(chunk)
181
+ self._emit_state(CaptureState.STREAMING)
182
+ self._handle_events(events)
183
+ except Exception as exc:
184
+ self._logger.warning(
185
+ "Transcription chunk failed; will retry if allowed: %s",
186
+ exc,
187
+ exc_info=False,
188
+ )
189
+ if not self._fail_with_retry("provider_error"):
190
+ return
191
+
192
+ self._check_timeouts()
193
+
194
+ def tick(self) -> None:
195
+ """
196
+ Allows hosts to poll for silence/timeout without spawning timers.
197
+ Call from UI loops to auto-stop after silence or max duration.
198
+ """
199
+ self._check_timeouts()
200
+
201
+ def end_capture(self, reason: Optional[str] = None) -> None:
202
+ if self._stream is None:
203
+ self._emit_state(CaptureState.IDLE)
204
+ return
205
+
206
+ while True:
207
+ self._emit_state(CaptureState.FINALIZING)
208
+ prior_retries = self._retry_attempts
209
+ try:
210
+ events = self._stream.flush_final()
211
+ self._handle_events(events)
212
+ except Exception as exc:
213
+ self._logger.error(
214
+ "Final transcription flush failed: %s", exc, exc_info=False
215
+ )
216
+ if self._fail_with_retry("provider_error"):
217
+ continue
218
+ return
219
+
220
+ # If _handle_events triggered a retry due to an error event, we restarted the
221
+ # stream and replayed chunks. We must attempt the final flush again on the
222
+ # restarted stream, otherwise transcription will never be produced.
223
+ if self._state == CaptureState.ERROR:
224
+ return
225
+ if self._retry_attempts > prior_retries:
226
+ continue
227
+ break
228
+
229
+ self._reset()
230
+ self._emit_state(CaptureState.IDLE)
231
+
232
+ def fail(self, reason: str) -> None:
233
+ if self._stream is not None:
234
+ try:
235
+ self._stream.abort(reason)
236
+ except Exception:
237
+ # Abort failures should not mask the root cause.
238
+ pass
239
+ self._reset()
240
+ self._emit_error(reason)
241
+ self._emit_state(CaptureState.ERROR)
242
+
243
+ def _build_session_metadata(self) -> SpeechSessionMetadata:
244
+ if self._session_builder:
245
+ return self._session_builder()
246
+ return SpeechSessionMetadata(
247
+ session_id=str(uuid.uuid4()),
248
+ provider=self._provider.name,
249
+ latency_mode=self._config.latency_mode,
250
+ client=self._client,
251
+ )
252
+
253
+ def _handle_events(self, events: Iterable[TranscriptionEvent]) -> None:
254
+ for event in events:
255
+ if event.error:
256
+ if not self._fail_with_retry(event.error):
257
+ return
258
+ continue
259
+ if event.is_final:
260
+ if event.text:
261
+ self._emit_final(event.text)
262
+ else:
263
+ if event.text:
264
+ self._emit_partial(event.text)
265
+
266
+ def _emit_state(self, state: CaptureState) -> None:
267
+ if state == self._state:
268
+ return
269
+ self._state = state
270
+ if self._callbacks.on_state:
271
+ self._callbacks.on_state(state)
272
+
273
+ def _emit_partial(self, text: str) -> None:
274
+ if self._callbacks.on_partial:
275
+ self._callbacks.on_partial(text)
276
+
277
+ def _emit_final(self, text: str) -> None:
278
+ if self._callbacks.on_final:
279
+ self._callbacks.on_final(text)
280
+
281
+ def _emit_error(self, reason: str) -> None:
282
+ if self._callbacks.on_error:
283
+ self._callbacks.on_error(reason)
284
+
285
+ def _emit_warning(self, message: str) -> None:
286
+ if self._callbacks.on_warning:
287
+ self._callbacks.on_warning(message)
288
+
289
+ def _check_timeouts(self) -> None:
290
+ if self._state not in (CaptureState.RECORDING, CaptureState.STREAMING):
291
+ return
292
+ now = self._now()
293
+ if (
294
+ self._started_at is not None
295
+ and (now - self._started_at) * 1000 >= self._config.push_to_talk.max_ms
296
+ ):
297
+ self.end_capture("max_duration")
298
+ return
299
+ if (
300
+ self._last_chunk_at is not None
301
+ and (now - self._last_chunk_at) * 1000
302
+ >= self._config.push_to_talk.silence_auto_stop_ms
303
+ ):
304
+ self.end_capture("silence")
305
+
306
+ def _fail_with_retry(self, reason: str) -> bool:
307
+ if reason in (
308
+ "unauthorized",
309
+ "forbidden",
310
+ "invalid_audio",
311
+ "audio_too_large",
312
+ "rate_limited",
313
+ ):
314
+ self.fail(reason)
315
+ return False
316
+ if self._retry_attempts >= self._max_retries:
317
+ self.fail(reason)
318
+ return False
319
+
320
+ self._retry_attempts += 1
321
+ self._emit_warning(f"{reason}_retry")
322
+ try:
323
+ self._restart_stream()
324
+ return True
325
+ except Exception as exc:
326
+ self._logger.error(
327
+ "Retrying transcription stream failed: %s", exc, exc_info=False
328
+ )
329
+ self.fail(reason)
330
+ return False
331
+
332
+ def _restart_stream(self) -> None:
333
+ stream = self._provider.start_stream(self._build_session_metadata())
334
+ self._stream = stream
335
+ replayed_state = (
336
+ CaptureState.RECORDING if not self._chunks else CaptureState.STREAMING
337
+ )
338
+ for chunk in self._chunks:
339
+ events = stream.send_chunk(chunk)
340
+ self._handle_events(events)
341
+ self._emit_state(replayed_state)
342
+ self._last_chunk_at = self._now()
343
+
344
+ def _reset(self) -> None:
345
+ self._stream = None
346
+ self._chunks = []
347
+ self._sequence = 0
348
+ self._started_at = None
349
+ self._last_chunk_at = None
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import os
5
+ from typing import Any, Dict, Mapping, MutableMapping, Optional
6
+
7
+ LatencyMode = str # Alias to keep config typed without importing Literal everywhere
8
+
9
+
10
+ DEFAULT_PROVIDER_CONFIG: Dict[str, Dict[str, Any]] = {
11
+ "openai_whisper": {
12
+ "api_key_env": "OPENAI_API_KEY",
13
+ "model": "whisper-1",
14
+ "base_url": None,
15
+ "temperature": 0,
16
+ "language": None,
17
+ "redact_request": True,
18
+ }
19
+ }
20
+
21
+
22
+ @dataclasses.dataclass
23
+ class PushToTalkConfig:
24
+ max_ms: int = 15_000
25
+ silence_auto_stop_ms: int = 1_200
26
+ min_hold_ms: int = 150
27
+
28
+
29
+ @dataclasses.dataclass
30
+ class VoiceConfig:
31
+ enabled: bool
32
+ provider: Optional[str]
33
+ latency_mode: LatencyMode
34
+ chunk_ms: int
35
+ sample_rate: int
36
+ warn_on_remote_api: bool
37
+ push_to_talk: PushToTalkConfig
38
+ providers: Dict[str, Dict[str, Any]]
39
+
40
+ @classmethod
41
+ def from_raw(
42
+ cls,
43
+ raw: Optional[Mapping[str, Any]],
44
+ env: Optional[Mapping[str, str]] = None,
45
+ ) -> "VoiceConfig":
46
+ """
47
+ Build a normalized VoiceConfig from config.yml voice section and env overrides.
48
+ This does not touch global config to keep voice optional until integrated.
49
+ """
50
+ env = env or os.environ
51
+ merged: MutableMapping[str, Any] = {
52
+ "enabled": False,
53
+ "provider": "openai_whisper",
54
+ "latency_mode": "balanced",
55
+ "chunk_ms": 600,
56
+ "sample_rate": 16_000,
57
+ "warn_on_remote_api": False,
58
+ "push_to_talk": {
59
+ "max_ms": 15_000,
60
+ "silence_auto_stop_ms": 1_200,
61
+ "min_hold_ms": 150,
62
+ },
63
+ "providers": dict(DEFAULT_PROVIDER_CONFIG),
64
+ }
65
+ if isinstance(raw, Mapping):
66
+ merged.update(raw)
67
+ base_pt = merged.get("push_to_talk")
68
+ pt_defaults: dict[str, Any] = (
69
+ dict(base_pt) if isinstance(base_pt, Mapping) else {}
70
+ )
71
+ pt_overrides_raw = raw.get("push_to_talk")
72
+ pt_overrides: dict[str, Any] = (
73
+ dict(pt_overrides_raw) if isinstance(pt_overrides_raw, Mapping) else {}
74
+ )
75
+ merged["push_to_talk"] = {**pt_defaults, **pt_overrides}
76
+
77
+ providers = merged.get("providers", {})
78
+ merged["providers"] = dict(DEFAULT_PROVIDER_CONFIG)
79
+ if isinstance(providers, Mapping):
80
+ for key, value in providers.items():
81
+ if isinstance(value, Mapping):
82
+ merged["providers"][key] = {
83
+ **merged["providers"].get(key, {}),
84
+ **dict(value),
85
+ }
86
+
87
+ # Auto-enable voice if API key is available (unless explicitly disabled via env/config)
88
+ explicit_enabled = env.get("CODEX_AUTORUNNER_VOICE_ENABLED")
89
+ if explicit_enabled is not None:
90
+ merged["enabled"] = _env_bool(explicit_enabled, merged["enabled"])
91
+ elif not merged.get("enabled"):
92
+ # Auto-enable if the provider's API key is available
93
+ provider_name = env.get(
94
+ "CODEX_AUTORUNNER_VOICE_PROVIDER",
95
+ merged.get("provider", "openai_whisper"),
96
+ )
97
+ provider_cfg = merged.get("providers", {}).get(provider_name, {})
98
+ api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
99
+ if env.get(api_key_env):
100
+ merged["enabled"] = True
101
+ merged["provider"] = env.get(
102
+ "CODEX_AUTORUNNER_VOICE_PROVIDER", merged.get("provider")
103
+ )
104
+ merged["latency_mode"] = env.get(
105
+ "CODEX_AUTORUNNER_VOICE_LATENCY", merged.get("latency_mode", "balanced")
106
+ )
107
+ merged["chunk_ms"] = _env_int(
108
+ env.get("CODEX_AUTORUNNER_VOICE_CHUNK_MS"), merged["chunk_ms"]
109
+ )
110
+ merged["sample_rate"] = _env_int(
111
+ env.get("CODEX_AUTORUNNER_VOICE_SAMPLE_RATE"), merged["sample_rate"]
112
+ )
113
+ # If API key is already set, don't show the warning popup (user has already configured it)
114
+ explicit_warn = env.get("CODEX_AUTORUNNER_VOICE_WARN_REMOTE")
115
+ if explicit_warn is not None:
116
+ merged["warn_on_remote_api"] = _env_bool(explicit_warn, True)
117
+ else:
118
+ # Auto-disable warning if API key is present (user has intentionally configured it)
119
+ provider_name = merged.get("provider", "openai_whisper")
120
+ provider_cfg = merged.get("providers", {}).get(provider_name, {})
121
+ api_key_env = provider_cfg.get("api_key_env", "OPENAI_API_KEY")
122
+ if env.get(api_key_env):
123
+ merged["warn_on_remote_api"] = False
124
+ else:
125
+ merged["warn_on_remote_api"] = merged.get("warn_on_remote_api", True)
126
+
127
+ pt = merged.get("push_to_talk", {}) or {}
128
+ push_to_talk = PushToTalkConfig(
129
+ max_ms=_env_int(
130
+ env.get("CODEX_AUTORUNNER_VOICE_MAX_MS"), pt.get("max_ms", 15_000)
131
+ ),
132
+ silence_auto_stop_ms=_env_int(
133
+ env.get("CODEX_AUTORUNNER_VOICE_SILENCE_MS"),
134
+ pt.get("silence_auto_stop_ms", 1_200),
135
+ ),
136
+ min_hold_ms=_env_int(
137
+ env.get("CODEX_AUTORUNNER_VOICE_MIN_HOLD_MS"),
138
+ pt.get("min_hold_ms", 150),
139
+ ),
140
+ )
141
+
142
+ providers = dict(merged.get("providers") or {})
143
+ return cls(
144
+ enabled=bool(merged.get("enabled")),
145
+ provider=merged.get("provider"),
146
+ latency_mode=str(merged.get("latency_mode", "balanced")),
147
+ chunk_ms=int(merged.get("chunk_ms", 600)),
148
+ sample_rate=int(merged.get("sample_rate", 16_000)),
149
+ warn_on_remote_api=bool(merged.get("warn_on_remote_api", True)),
150
+ push_to_talk=push_to_talk,
151
+ providers=providers,
152
+ )
153
+
154
+
155
+ def _env_bool(raw: Optional[str], default: bool) -> bool:
156
+ if raw is None:
157
+ return default
158
+ return raw.strip().lower() in ("1", "true", "yes", "on")
159
+
160
+
161
+ def _env_int(raw: Optional[str], default: int) -> int:
162
+ if raw is None:
163
+ return default
164
+ try:
165
+ return int(raw.strip())
166
+ except (TypeError, ValueError):
167
+ return default
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ from typing import Iterable, Optional, Protocol
5
+
6
+
7
+ @dataclasses.dataclass
8
+ class SpeechSessionMetadata:
9
+ """Context passed to providers to keep sessions auditable without leaking audio."""
10
+
11
+ session_id: str
12
+ provider: str
13
+ latency_mode: str
14
+ language: Optional[str] = None
15
+ client: Optional[str] = None # e.g., "web", "tui"
16
+ user_agent: Optional[str] = None
17
+ filename: Optional[str] = None
18
+ content_type: Optional[str] = None
19
+
20
+
21
+ @dataclasses.dataclass
22
+ class AudioChunk:
23
+ """
24
+ Representation of an audio chunk pushed into the provider.
25
+
26
+ Only lightweight metadata is stored to avoid persisting raw audio outside memory.
27
+ """
28
+
29
+ data: bytes
30
+ sample_rate: int
31
+ start_ms: int
32
+ end_ms: int
33
+ sequence: int
34
+
35
+
36
+ @dataclasses.dataclass
37
+ class TranscriptionEvent:
38
+ text: str
39
+ is_final: bool
40
+ latency_ms: Optional[int] = None
41
+ error: Optional[str] = None
42
+
43
+
44
+ class TranscriptionStream(Protocol):
45
+ """Streaming handle for a single push-to-talk session."""
46
+
47
+ def send_chunk(self, chunk: AudioChunk) -> Iterable[TranscriptionEvent]: ...
48
+
49
+ def flush_final(self) -> Iterable[TranscriptionEvent]:
50
+ """Send end-of-input and return any remaining events."""
51
+ ...
52
+
53
+ def abort(self, reason: Optional[str] = None) -> None:
54
+ """Abort the stream; providers should clean up remote resources."""
55
+ ...
56
+
57
+
58
+ class SpeechProvider(Protocol):
59
+ """Provider abstraction so TUI and web can share the same transcription backend."""
60
+
61
+ name: str
62
+ supports_streaming: bool
63
+
64
+ def start_stream(self, session: SpeechSessionMetadata) -> TranscriptionStream:
65
+ """Begin a streaming session for a given request."""
66
+ ...
@@ -0,0 +1,7 @@
1
+ from .openai_whisper import (
2
+ OpenAIWhisperProvider,
3
+ OpenAIWhisperSettings,
4
+ build_speech_provider,
5
+ )
6
+
7
+ __all__ = ["OpenAIWhisperProvider", "OpenAIWhisperSettings", "build_speech_provider"]