@phenx-inc/ctlsurf 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/electron-vite.config.ts +5 -0
  2. package/out/headless/index.mjs +2 -1
  3. package/out/headless/index.mjs.map +2 -2
  4. package/out/main/index.js +38 -6
  5. package/out/preload/index.js +3 -0
  6. package/out/renderer/assets/{cssMode-DkmdBgO7.js → cssMode-DbMmcl1h.js} +3 -3
  7. package/out/renderer/assets/{freemarker2-CI-gkP-3.js → freemarker2-CvaHiy92.js} +1 -1
  8. package/out/renderer/assets/{handlebars-D5tEqanR.js → handlebars-D58lUIOu.js} +1 -1
  9. package/out/renderer/assets/{html-fH93EYfn.js → html-D1h1aJbM.js} +1 -1
  10. package/out/renderer/assets/{htmlMode-CRicxcwK.js → htmlMode-BdkAp9qr.js} +3 -3
  11. package/out/renderer/assets/{index-BOOvUI7u.js → index-B60JU1yI.js} +461 -111
  12. package/out/renderer/assets/{index-ezC-iarf.css → index-DJFYmHjz.css} +89 -0
  13. package/out/renderer/assets/{javascript-D1Baz4fV.js → javascript-CXqZcnvb.js} +2 -2
  14. package/out/renderer/assets/{jsonMode-Bquqf3QN.js → jsonMode-BuVr-eSl.js} +3 -3
  15. package/out/renderer/assets/{liquid-ByOcPjBF.js → liquid-LKu0Wd0B.js} +1 -1
  16. package/out/renderer/assets/{lspLanguageFeatures-BxPLl0yy.js → lspLanguageFeatures-Cjr_4HGs.js} +1 -1
  17. package/out/renderer/assets/{mdx-yuNgx0rM.js → mdx-Bl84ILla.js} +1 -1
  18. package/out/renderer/assets/ort-wasm-simd-threaded.asyncify-DMmc6YqF.wasm +0 -0
  19. package/out/renderer/assets/{python-2OakgLlA.js → python-0sFd9G1k.js} +1 -1
  20. package/out/renderer/assets/{razor-DnIVMSwa.js → razor-Cqcu1rLJ.js} +1 -1
  21. package/out/renderer/assets/transformers.web-DtSCnG36.js +33668 -0
  22. package/out/renderer/assets/{tsMode-CRIrHuii.js → tsMode-CYd3NUkW.js} +1 -1
  23. package/out/renderer/assets/{typescript-DJ3C8Yly.js → typescript-rkc9lhpi.js} +1 -1
  24. package/out/renderer/assets/{xml-CalvD5_C.js → xml-EsHEUps1.js} +1 -1
  25. package/out/renderer/assets/{yaml-Cgs8pdVp.js → yaml-B9-nQ_s2.js} +1 -1
  26. package/out/renderer/index.html +2 -2
  27. package/package.json +2 -1
  28. package/src/main/index.ts +49 -2
  29. package/src/preload/index.ts +4 -0
  30. package/src/renderer/App.tsx +34 -2
  31. package/src/renderer/components/CtlsurfPanel.tsx +19 -3
  32. package/src/renderer/components/TerminalPanel.tsx +32 -1
  33. package/src/renderer/components/VoiceInput.tsx +313 -0
  34. package/src/renderer/lib/localWhisper.ts +88 -0
  35. package/src/renderer/styles.css +89 -0
@@ -1,4 +1,4 @@
1
- import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-BOOvUI7u.js";
1
+ import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-B60JU1yI.js";
2
2
  class WorkerManager {
3
3
  constructor(_modeId, _defaults) {
4
4
  this._modeId = _modeId;
@@ -1,4 +1,4 @@
1
- import { l as languages } from "./index-BOOvUI7u.js";
1
+ import { l as languages } from "./index-B60JU1yI.js";
2
2
  const conf = {
3
3
  wordPattern: /(-?\d*\.\d\w*)|([^\`\~\!\@\#\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s]+)/g,
4
4
  comments: {
@@ -1,4 +1,4 @@
1
- import { l as languages } from "./index-BOOvUI7u.js";
1
+ import { l as languages } from "./index-B60JU1yI.js";
2
2
  const conf = {
3
3
  comments: {
4
4
  blockComment: ["<!--", "-->"]
@@ -1,4 +1,4 @@
1
- import { l as languages } from "./index-BOOvUI7u.js";
1
+ import { l as languages } from "./index-B60JU1yI.js";
2
2
  const conf = {
3
3
  comments: {
4
4
  lineComment: "#"
@@ -4,8 +4,8 @@
4
4
  <meta charset="UTF-8" />
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
6
  <title>ctlsurf-worker</title>
7
- <script type="module" crossorigin src="./assets/index-BOOvUI7u.js"></script>
8
- <link rel="stylesheet" crossorigin href="./assets/index-ezC-iarf.css">
7
+ <script type="module" crossorigin src="./assets/index-B60JU1yI.js"></script>
8
+ <link rel="stylesheet" crossorigin href="./assets/index-DJFYmHjz.css">
9
9
  </head>
10
10
  <body>
11
11
  <div id="root"></div>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@phenx-inc/ctlsurf",
3
- "version": "0.5.1",
3
+ "version": "0.6.0",
4
4
  "description": "Agent-agnostic terminal and desktop app for ctlsurf — run Claude Code, Codex, or any coding agent with live session logging and remote control",
5
5
  "main": "out/main/index.js",
6
6
  "bin": {
@@ -43,6 +43,7 @@
43
43
  "node": ">=18"
44
44
  },
45
45
  "dependencies": {
46
+ "@huggingface/transformers": "^4.2.0",
46
47
  "@monaco-editor/react": "^4.7.0",
47
48
  "@xterm/addon-fit": "^0.10.0",
48
49
  "@xterm/addon-serialize": "^0.14.0",
package/src/main/index.ts CHANGED
@@ -132,6 +132,13 @@ function createWindow(): void {
132
132
  }
133
133
  })
134
134
 
135
+ // Voice typing needs microphone access. Approve permission requests (there
136
+ // was no handler before, so the renderer already had the default-permissive
137
+ // behavior — this just ensures the mic request resolves to "allow").
138
+ mainWindow.webContents.session.setPermissionRequestHandler((_wc, _permission, callback) => {
139
+ callback(true)
140
+ })
141
+
135
142
  if (process.env.ELECTRON_RENDERER_URL) {
136
143
  mainWindow.loadURL(process.env.ELECTRON_RENDERER_URL)
137
144
  } else {
@@ -300,12 +307,18 @@ ipcMain.handle('worker:createProject', async () => {
300
307
  }
301
308
  })
302
309
 
303
- ipcMain.handle('worker:getWebviewInfo', async () => {
310
+ // Resolve the ctlsurf frontend URL (the origin the embedded webview loads from)
311
+ // from the active profile, applying the dev localhost:8000 → :88 rewrite.
312
+ function getCtlsurfFrontendUrl(): string {
304
313
  const profile = orchestrator.getActiveProfile()
305
314
  const baseUrl = profile.baseUrl || process.env.CTLSURF_BASE_URL || 'https://app.ctlsurf.com'
306
- const frontendUrl = baseUrl.includes('localhost:8000')
315
+ return baseUrl.includes('localhost:8000')
307
316
  ? baseUrl.replace(':8000', ':88')
308
317
  : baseUrl
318
+ }
319
+
320
+ ipcMain.handle('worker:getWebviewInfo', async () => {
321
+ const frontendUrl = getCtlsurfFrontendUrl()
309
322
 
310
323
  if (!orchestrator.ctlsurfApi.getApiKey()) {
311
324
  return { frontendUrl: `${frontendUrl}?embed=1`, authenticated: false }
@@ -348,6 +361,40 @@ ipcMain.handle('worker:getWebviewInfo', async () => {
348
361
  }
349
362
  })
350
363
 
364
+ // Given a URL the agent printed in the terminal, return an authenticated embed
365
+ // URL if it points at the ctlsurf frontend, else null (so the caller opens it
366
+ // in the default browser instead).
367
+ ipcMain.handle('worker:getEmbedUrl', async (_event, targetUrl: string) => {
368
+ try {
369
+ if (!targetUrl) return null
370
+ const frontendUrl = getCtlsurfFrontendUrl()
371
+ const profile = orchestrator.getActiveProfile()
372
+ const rawBaseUrl = profile.baseUrl || process.env.CTLSURF_BASE_URL || 'https://app.ctlsurf.com'
373
+
374
+ const target = new URL(targetUrl)
375
+ const front = new URL(frontendUrl)
376
+
377
+ // Treat both the frontend origin and the raw (dev :8000) base as ctlsurf.
378
+ const origins = new Set<string>([front.origin])
379
+ try { origins.add(new URL(rawBaseUrl).origin) } catch { /* ignore */ }
380
+ if (!origins.has(target.origin)) return null
381
+
382
+ // Rebuild on the frontend origin (handles the dev :8000 → :88 case) so the
383
+ // webview loads from the same origin its auth code is valid for.
384
+ const out = new URL(target.pathname + target.search + target.hash, front.origin)
385
+ out.searchParams.set('embed', '1')
386
+
387
+ if (orchestrator.ctlsurfApi.getApiKey()) {
388
+ const { code } = await orchestrator.ctlsurfApi.getAuthCode()
389
+ out.searchParams.set('_code', code)
390
+ }
391
+ return out.toString()
392
+ } catch (err: any) {
393
+ log('[worker] getEmbedUrl failed:', err?.message)
394
+ return null
395
+ }
396
+ })
397
+
351
398
  // ─── Profile IPC ──────────────────────────────────
352
399
 
353
400
  ipcMain.handle('profiles:list', () => orchestrator.listProfiles())
@@ -129,6 +129,10 @@ const api = {
129
129
  pageUrl?: string;
130
130
  authenticated: boolean;
131
131
  }> => ipcRenderer.invoke('worker:getWebviewInfo'),
132
+ // Resolve a terminal-printed URL to an authenticated ctlsurf embed URL, or
133
+ // null if it isn't a ctlsurf link (caller opens those externally).
134
+ getEmbedUrl: (targetUrl: string): Promise<string | null> =>
135
+ ipcRenderer.invoke('worker:getEmbedUrl', targetUrl),
132
136
 
133
137
  onWorkerStatus: (callback: (status: string) => void) => {
134
138
  const listener = (_event: Electron.IpcRendererEvent, status: string) => callback(status)
@@ -1,5 +1,6 @@
1
1
  import { useState, useEffect, useCallback, useRef } from 'react'
2
- import { TerminalPanel, destroyTerminal } from './components/TerminalPanel'
2
+ import { TerminalPanel, destroyTerminal, focusTerminal } from './components/TerminalPanel'
3
+ import { VoiceInput } from './components/VoiceInput'
3
4
  import { CtlsurfPanel } from './components/CtlsurfPanel'
4
5
  import { EditorPanel } from './components/EditorPanel'
5
6
  import { AgentPicker } from './components/AgentPicker'
@@ -59,6 +60,7 @@ declare global {
59
60
  getWebviewInfo: () => Promise<{
60
61
  frontendUrl: string; pageUrl?: string; authenticated: boolean;
61
62
  }>
63
+ getEmbedUrl: (targetUrl: string) => Promise<string | null>
62
64
  getWorkerStatus: () => Promise<string>
63
65
  getWorkerId: () => Promise<string | null>
64
66
  onWorkerStatus: (callback: (status: string) => void) => () => void
@@ -117,6 +119,11 @@ export default function App() {
117
119
  const [cwd, setCwd] = useState<string | null>(null)
118
120
  const [projectName, setProjectName] = useState<string | null>(null)
119
121
  const [updateInfo, setUpdateInfo] = useState<UpdateInfo | null>(null)
122
+ // A ctlsurf link clicked in the terminal: the authenticated URL to load in
123
+ // the ctlsurf panel. `n` bumps on every request so re-clicking the same URL
124
+ // still triggers a navigation.
125
+ const [ctlsurfNav, setCtlsurfNav] = useState<{ url: string; n: number } | null>(null)
126
+ const ctlsurfNavCounter = useRef(0)
120
127
 
121
128
  // Multi-tab state
122
129
  const [tabs, setTabs] = useState<TabInfo[]>(() => {
@@ -200,6 +207,16 @@ export default function App() {
200
207
  }
201
208
  }, [trackingActive])
202
209
 
210
+ // Voice typing: inject the transcribed text into the active terminal exactly
211
+ // as if it were typed (no auto-submit), then refocus so the user can press
212
+ // Enter to send it.
213
+ const handleVoiceTranscript = useCallback((text: string) => {
214
+ const trimmed = text.trim()
215
+ if (!trimmed) return
216
+ window.worker.writePty(activeTabId, trimmed)
217
+ focusTerminal(activeTabId)
218
+ }, [activeTabId])
219
+
203
220
  const cwdRef = useRef<string | null>(null)
204
221
 
205
222
  const handleSpawn = useCallback(async (tabId: string, agent: AgentConfig) => {
@@ -349,6 +366,20 @@ export default function App() {
349
366
  return () => window.removeEventListener('keydown', handleKeyDown)
350
367
  }, [togglePane])
351
368
 
369
+ // A ctlsurf link was clicked in the terminal — reveal the ctlsurf pane if
370
+ // hidden and hand the authenticated URL to the panel to load.
371
+ useEffect(() => {
372
+ const onOpen = (e: Event) => {
373
+ const url = (e as CustomEvent).detail?.url as string | undefined
374
+ if (!url) return
375
+ if (!findPaneIds(layout).includes('ctlsurf')) togglePane('ctlsurf')
376
+ ctlsurfNavCounter.current += 1
377
+ setCtlsurfNav({ url, n: ctlsurfNavCounter.current })
378
+ }
379
+ window.addEventListener('ctlsurf-open-url', onOpen)
380
+ return () => window.removeEventListener('ctlsurf-open-url', onOpen)
381
+ }, [layout, togglePane])
382
+
352
383
  // Build pane contents (always rendered, layout controls visibility)
353
384
  const panes: PaneContent[] = [
354
385
  { id: 'editor', label: 'Editor', content: <EditorPanel cwd={cwd} /> },
@@ -409,7 +440,7 @@ export default function App() {
409
440
  </div>
410
441
  ),
411
442
  },
412
- { id: 'ctlsurf', label: 'ctlsurf', content: <CtlsurfPanel /> },
443
+ { id: 'ctlsurf', label: 'ctlsurf', content: <CtlsurfPanel navigate={ctlsurfNav} /> },
413
444
  ]
414
445
 
415
446
  return (
@@ -451,6 +482,7 @@ export default function App() {
451
482
  </svg>
452
483
  <span>Tickets</span>
453
484
  </button>
485
+ <VoiceInput onTranscript={handleVoiceTranscript} />
454
486
  <span className="titlebar-separator" />
455
487
  {agents.map(a => {
456
488
  const activeTab = tabs.find(t => t.id === activeTabId)
@@ -1,9 +1,15 @@
1
1
  import { useRef, useEffect, useState } from 'react'
2
2
 
3
- export function CtlsurfPanel() {
3
+ interface CtlsurfPanelProps {
4
+ // An authenticated ctlsurf URL to load (e.g. from a terminal link click).
5
+ // `n` bumps per request so repeat navigations to the same URL still apply.
6
+ navigate?: { url: string; n: number } | null
7
+ }
8
+
9
+ export function CtlsurfPanel({ navigate }: CtlsurfPanelProps) {
4
10
  const webviewRef = useRef<HTMLWebViewElement>(null)
5
11
  const [url, setUrl] = useState<string | null>(null)
6
- const [key, setKey] = useState(0) // force remount on cwd change
12
+ const [key, setKey] = useState(0) // force remount on cwd change / navigation
7
13
 
8
14
  const loadUrl = async () => {
9
15
  try {
@@ -15,7 +21,17 @@ export function CtlsurfPanel() {
15
21
  }
16
22
  }
17
23
 
18
- useEffect(() => { loadUrl() }, [])
24
+ // Default-load the project page on mount, unless a navigation is already
25
+ // queued (e.g. the pane was just revealed to show a clicked link).
26
+ useEffect(() => { if (!navigate?.url) loadUrl() }, [])
27
+
28
+ // Navigate to a specific authenticated URL when requested.
29
+ useEffect(() => {
30
+ if (navigate?.url) {
31
+ setUrl(navigate.url)
32
+ setKey(k => k + 1)
33
+ }
34
+ }, [navigate?.n])
19
35
 
20
36
  // Reload webview when cwd changes
21
37
  useEffect(() => {
@@ -22,6 +22,31 @@ function copySelectionAsEmailTable(terminal: Terminal): void {
22
22
  )
23
23
  }
24
24
 
25
+ // Open a URL in the system browser, mirroring WebLinksAddon's safe default
26
+ // (clear the opener so the new page can't reach back into this window).
27
+ function openExternal(uri: string): void {
28
+ const win = window.open()
29
+ if (win) {
30
+ try { (win as { opener: unknown }).opener = null } catch { /* ignore */ }
31
+ win.location.href = uri
32
+ }
33
+ }
34
+
35
+ // Custom handler for links clicked in the terminal. ctlsurf page links are
36
+ // routed into the embedded (authenticated) ctlsurf panel via a window event;
37
+ // everything else opens in the default browser as before.
38
+ function handleTerminalLink(_event: MouseEvent, uri: string): void {
39
+ window.worker.getEmbedUrl(uri)
40
+ .then((embedUrl) => {
41
+ if (embedUrl) {
42
+ window.dispatchEvent(new CustomEvent('ctlsurf-open-url', { detail: { url: embedUrl } }))
43
+ } else {
44
+ openExternal(uri)
45
+ }
46
+ })
47
+ .catch(() => openExternal(uri))
48
+ }
49
+
25
50
  interface AgentConfig {
26
51
  id: string
27
52
  name: string
@@ -105,7 +130,7 @@ function getOrCreateTerminal(tabId: string, onExit: (tabId: string) => void): {
105
130
 
106
131
  const fitAddon = new FitAddon()
107
132
  terminal.loadAddon(fitAddon)
108
- terminal.loadAddon(new WebLinksAddon())
133
+ terminal.loadAddon(new WebLinksAddon(handleTerminalLink))
109
134
 
110
135
  // ⌘/Ctrl+Shift+E → convert the current selection to an email table. Handled
111
136
  // here (not as a DOM listener) so it works even while a mouse-tracking TUI
@@ -160,6 +185,12 @@ function getOrCreateTerminal(tabId: string, onExit: (tabId: string) => void): {
160
185
  return { terminal, fitAddon }
161
186
  }
162
187
 
188
+ // Return keyboard focus to a tab's terminal (e.g. after inserting voice text
189
+ // so the user can immediately press Enter to submit).
190
+ export function focusTerminal(tabId: string): void {
191
+ _terminals.get(tabId)?.terminal.focus()
192
+ }
193
+
163
194
  export function destroyTerminal(tabId: string): void {
164
195
  const state = _terminals.get(tabId)
165
196
  if (!state) return
@@ -0,0 +1,313 @@
1
+ import { useCallback, useEffect, useRef, useState } from 'react'
2
+ import { transcribeBlob, type ModelProgress } from '../lib/localWhisper'
3
+
4
+ // ─── Minimal Web Speech API typings ──────────────────
5
+ // webkitSpeechRecognition isn't in the standard DOM lib, so declare just the
6
+ // surface we use. This API is frequently unavailable inside Electron (Chromium
7
+ // ships without Google's speech backend); when it fails we fall back to a local
8
+ // Whisper model (see ../lib/localWhisper).
9
+
10
+ interface SpeechRecognitionResult {
11
+ isFinal: boolean
12
+ 0: { transcript: string }
13
+ }
14
+ interface SpeechRecognitionEvent {
15
+ resultIndex: number
16
+ results: { length: number;[index: number]: SpeechRecognitionResult }
17
+ }
18
+ interface SpeechRecognitionErrorEvent { error: string }
19
+ interface SpeechRecognitionLike {
20
+ lang: string
21
+ continuous: boolean
22
+ interimResults: boolean
23
+ start: () => void
24
+ stop: () => void
25
+ abort: () => void
26
+ onresult: ((e: SpeechRecognitionEvent) => void) | null
27
+ onerror: ((e: SpeechRecognitionErrorEvent) => void) | null
28
+ onend: (() => void) | null
29
+ }
30
+ type SpeechRecognitionCtor = new () => SpeechRecognitionLike
31
+
32
+ function getRecognitionCtor(): SpeechRecognitionCtor | null {
33
+ const w = window as unknown as {
34
+ SpeechRecognition?: SpeechRecognitionCtor
35
+ webkitSpeechRecognition?: SpeechRecognitionCtor
36
+ }
37
+ return w.SpeechRecognition || w.webkitSpeechRecognition || null
38
+ }
39
+
40
+ // ─── Capabilities & engine selection ─────────────────
41
+
42
+ type Engine = 'web-speech' | 'local'
43
+ type Phase = 'idle' | 'listening' | 'transcribing'
44
+
45
+ const ENGINE_KEY = 'ctlsurf.voiceEngine'
46
+
47
+ const WEB_SPEECH_SUPPORTED = getRecognitionCtor() !== null
48
+ const LOCAL_SUPPORTED =
49
+ typeof navigator !== 'undefined' &&
50
+ !!navigator.mediaDevices?.getUserMedia &&
51
+ typeof MediaRecorder !== 'undefined' &&
52
+ typeof OfflineAudioContext !== 'undefined'
53
+ const ANY_SUPPORTED = WEB_SPEECH_SUPPORTED || LOCAL_SUPPORTED
54
+
55
+ function loadInitialEngine(): Engine {
56
+ if (!WEB_SPEECH_SUPPORTED && LOCAL_SUPPORTED) return 'local'
57
+ try {
58
+ if (localStorage.getItem(ENGINE_KEY) === 'local' && LOCAL_SUPPORTED) return 'local'
59
+ } catch { /* ignore */ }
60
+ return WEB_SPEECH_SUPPORTED ? 'web-speech' : 'local'
61
+ }
62
+
63
+ // Web Speech errors that mean the engine itself is unreachable (vs. a mic
64
+ // permission/hardware problem, which would also break the local fallback).
65
+ function isEngineUnavailable(code: string): boolean {
66
+ return code === 'network' || code === 'service-not-allowed'
67
+ }
68
+
69
+ function describeMicError(err: unknown): string {
70
+ const name = (err as { name?: string })?.name
71
+ if (name === 'NotAllowedError' || name === 'SecurityError') return 'Microphone access denied'
72
+ if (name === 'NotFoundError') return 'No microphone found'
73
+ return 'Could not start microphone'
74
+ }
75
+
76
+ interface VoiceInputProps {
77
+ // Called once per push-to-talk session with the final transcribed text.
78
+ onTranscript: (text: string) => void
79
+ }
80
+
81
+ export function VoiceInput({ onTranscript }: VoiceInputProps) {
82
+ const [engine, setEngine] = useState<Engine>(loadInitialEngine)
83
+ const [phase, setPhase] = useState<Phase>('idle')
84
+ const [interim, setInterim] = useState('')
85
+ const [modelPct, setModelPct] = useState<number | null>(null)
86
+ const [error, setError] = useState<string | null>(null)
87
+ const [notice, setNotice] = useState<string | null>(null)
88
+
89
+ // Web Speech refs
90
+ const recognitionRef = useRef<SpeechRecognitionLike | null>(null)
91
+ const finalRef = useRef('')
92
+ // Local (Whisper) refs
93
+ const streamRef = useRef<MediaStream | null>(null)
94
+ const recorderRef = useRef<MediaRecorder | null>(null)
95
+ const chunksRef = useRef<Blob[]>([])
96
+ // Set true when the user releases before getUserMedia resolves (quick tap).
97
+ const cancelGestureRef = useRef(false)
98
+
99
+ const engineRef = useRef(engine)
100
+ useEffect(() => { engineRef.current = engine }, [engine])
101
+
102
+ const onTranscriptRef = useRef(onTranscript)
103
+ useEffect(() => { onTranscriptRef.current = onTranscript }, [onTranscript])
104
+
105
+ // Auto-dismiss transient chips.
106
+ useEffect(() => {
107
+ if (!error) return
108
+ const t = setTimeout(() => setError(null), 4500)
109
+ return () => clearTimeout(t)
110
+ }, [error])
111
+ useEffect(() => {
112
+ if (!notice) return
113
+ const t = setTimeout(() => setNotice(null), 5000)
114
+ return () => clearTimeout(t)
115
+ }, [notice])
116
+
117
+ const switchToLocal = useCallback((reason: string) => {
118
+ try { localStorage.setItem(ENGINE_KEY, 'local') } catch { /* ignore */ }
119
+ setEngine('local')
120
+ setNotice(reason)
121
+ }, [])
122
+
123
+ const stopStream = useCallback(() => {
124
+ streamRef.current?.getTracks().forEach((t) => t.stop())
125
+ streamRef.current = null
126
+ }, [])
127
+
128
+ // ─── Web Speech engine ─────────────────────────────
129
+
130
+ const startWebSpeech = useCallback(() => {
131
+ const Ctor = getRecognitionCtor()
132
+ if (!Ctor || recognitionRef.current) return
133
+ setError(null); setNotice(null); setInterim('')
134
+ finalRef.current = ''
135
+
136
+ const rec = new Ctor()
137
+ rec.lang = navigator.language || 'en-US'
138
+ rec.continuous = true
139
+ rec.interimResults = true
140
+
141
+ rec.onresult = (event) => {
142
+ let finalText = ''
143
+ let interimText = ''
144
+ for (let i = 0; i < event.results.length; i++) {
145
+ const res = event.results[i]
146
+ if (res.isFinal) finalText += res[0].transcript
147
+ else interimText += res[0].transcript
148
+ }
149
+ finalRef.current = finalText
150
+ setInterim(interimText)
151
+ }
152
+
153
+ rec.onerror = (event) => {
154
+ if (isEngineUnavailable(event.error) && LOCAL_SUPPORTED) {
155
+ // The streamed audio is gone; switch engines and ask for a retry.
156
+ finalRef.current = ''
157
+ switchToLocal('Voice service unavailable — switched to on-device. Press again.')
158
+ } else if (event.error !== 'no-speech' && event.error !== 'aborted') {
159
+ setError(event.error === 'not-allowed' ? 'Microphone access denied' : `Voice error: ${event.error}`)
160
+ }
161
+ }
162
+
163
+ rec.onend = () => {
164
+ const text = finalRef.current.trim()
165
+ recognitionRef.current = null
166
+ setPhase('idle')
167
+ setInterim('')
168
+ if (text) onTranscriptRef.current(text)
169
+ }
170
+
171
+ recognitionRef.current = rec
172
+ try {
173
+ rec.start()
174
+ setPhase('listening')
175
+ } catch (err) {
176
+ recognitionRef.current = null
177
+ setPhase('idle')
178
+ setError('Could not start microphone')
179
+ console.error('[voice] web speech start failed', err)
180
+ }
181
+ }, [switchToLocal])
182
+
183
+ const stopWebSpeech = useCallback(() => {
184
+ try { recognitionRef.current?.stop() } catch { /* already stopped */ }
185
+ }, [])
186
+
187
+ // ─── Local (Whisper) engine ────────────────────────
188
+
189
+ const handleModelProgress = useCallback((p: ModelProgress) => {
190
+ if (p.status === 'progress' && typeof p.progress === 'number') {
191
+ setModelPct(Math.min(100, Math.round(p.progress)))
192
+ }
193
+ }, [])
194
+
195
+ const runLocalTranscription = useCallback(async (rec: MediaRecorder) => {
196
+ stopStream()
197
+ const blob = new Blob(chunksRef.current, { type: rec.mimeType || 'audio/webm' })
198
+ chunksRef.current = []
199
+ recorderRef.current = null
200
+ if (blob.size === 0) { setPhase('idle'); return }
201
+
202
+ setPhase('transcribing')
203
+ setInterim('')
204
+ try {
205
+ const text = await transcribeBlob(blob, handleModelProgress)
206
+ if (text) onTranscriptRef.current(text)
207
+ } catch (err) {
208
+ setError('On-device transcription failed')
209
+ console.error('[voice] local transcription failed', err)
210
+ } finally {
211
+ setPhase('idle')
212
+ setModelPct(null)
213
+ }
214
+ }, [stopStream, handleModelProgress])
215
+
216
+ const startLocal = useCallback(async () => {
217
+ setError(null); setNotice(null); setInterim('')
218
+ cancelGestureRef.current = false
219
+ try {
220
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
221
+ // Released during the permission/await — don't record anything.
222
+ if (cancelGestureRef.current) {
223
+ stream.getTracks().forEach((t) => t.stop())
224
+ setPhase('idle')
225
+ return
226
+ }
227
+ streamRef.current = stream
228
+ chunksRef.current = []
229
+ const rec = new MediaRecorder(stream)
230
+ rec.ondataavailable = (e) => { if (e.data.size) chunksRef.current.push(e.data) }
231
+ rec.onstop = () => { void runLocalTranscription(rec) }
232
+ recorderRef.current = rec
233
+ rec.start()
234
+ setPhase('listening')
235
+ } catch (err) {
236
+ stopStream()
237
+ setPhase('idle')
238
+ setError(describeMicError(err))
239
+ console.error('[voice] getUserMedia failed', err)
240
+ }
241
+ }, [runLocalTranscription, stopStream])
242
+
243
+ const stopLocal = useCallback(() => {
244
+ cancelGestureRef.current = true
245
+ const rec = recorderRef.current
246
+ if (rec && rec.state !== 'inactive') {
247
+ try { rec.stop() } catch { /* ignore */ }
248
+ }
249
+ }, [])
250
+
251
+ // ─── Push-to-talk gesture ──────────────────────────
252
+
253
+ const handlePointerDown = (e: React.PointerEvent) => {
254
+ if (!ANY_SUPPORTED || phase !== 'idle') return
255
+ e.preventDefault()
256
+ e.currentTarget.setPointerCapture?.(e.pointerId)
257
+ if (engineRef.current === 'web-speech' && WEB_SPEECH_SUPPORTED) startWebSpeech()
258
+ else if (LOCAL_SUPPORTED) void startLocal()
259
+ }
260
+ const handlePointerUp = (e: React.PointerEvent) => {
261
+ e.currentTarget.releasePointerCapture?.(e.pointerId)
262
+ if (engineRef.current === 'web-speech') stopWebSpeech()
263
+ else stopLocal()
264
+ }
265
+
266
+ // Clean up on unmount.
267
+ useEffect(() => () => {
268
+ try { recognitionRef.current?.abort() } catch { /* ignore */ }
269
+ try { recorderRef.current?.stop() } catch { /* ignore */ }
270
+ streamRef.current?.getTracks().forEach((t) => t.stop())
271
+ }, [])
272
+
273
+ // ─── Render ────────────────────────────────────────
274
+
275
+ const listening = phase === 'listening'
276
+ const busy = phase === 'transcribing'
277
+
278
+ const title = !ANY_SUPPORTED
279
+ ? 'Voice typing not supported in this build'
280
+ : listening
281
+ ? 'Listening… release to insert'
282
+ : busy
283
+ ? 'Transcribing…'
284
+ : engine === 'local'
285
+ ? 'Hold to talk (on-device) — speech is typed into the terminal'
286
+ : 'Hold to talk — speech is typed into the terminal'
287
+
288
+ let chip: { kind: 'listening' | 'busy' | 'notice' | 'error'; text: string } | null = null
289
+ if (error && phase === 'idle') chip = { kind: 'error', text: error }
290
+ else if (notice && phase === 'idle') chip = { kind: 'notice', text: notice }
291
+ else if (listening) chip = { kind: 'listening', text: interim || (engine === 'local' ? 'Recording…' : 'Listening…') }
292
+ else if (busy) chip = { kind: 'busy', text: modelPct !== null ? `Downloading voice model… ${modelPct}%` : 'Transcribing…' }
293
+
294
+ return (
295
+ <div className="voice-input-wrap">
296
+ <button
297
+ type="button"
298
+ className={`titlebar-btn titlebar-icon-btn voice-btn ${listening ? 'listening' : ''} ${busy ? 'busy' : ''}`}
299
+ disabled={!ANY_SUPPORTED}
300
+ onPointerDown={handlePointerDown}
301
+ onPointerUp={handlePointerUp}
302
+ onPointerCancel={handlePointerUp}
303
+ onContextMenu={(e) => e.preventDefault()}
304
+ title={title}
305
+ aria-label="Voice typing (hold to talk)"
306
+ >
307
+ <span className="voice-icon" aria-hidden="true">🎤</span>
308
+ <span className={`voice-dot ${listening ? 'on' : busy ? 'busy' : 'off'}`} />
309
+ </button>
310
+ {chip && <div className={`voice-chip ${chip.kind}`}>{chip.text}</div>}
311
+ </div>
312
+ )
313
+ }