@phenx-inc/ctlsurf 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/electron-vite.config.ts +5 -0
  2. package/out/headless/index.mjs +2 -1
  3. package/out/headless/index.mjs.map +2 -2
  4. package/out/main/index.js +3 -0
  5. package/out/renderer/assets/{cssMode-D9-xaWSI.js → cssMode-eTXVdAkZ.js} +3 -3
  6. package/out/renderer/assets/{freemarker2-CoRAVxnv.js → freemarker2-B5BKaiK4.js} +1 -1
  7. package/out/renderer/assets/{handlebars-B0p9Wgkw.js → handlebars-BIdLd2wU.js} +1 -1
  8. package/out/renderer/assets/{html-D_XFJJtO.js → html-BXL4cnLS.js} +1 -1
  9. package/out/renderer/assets/{htmlMode-naWw6PWr.js → htmlMode-46N3XG2c.js} +3 -3
  10. package/out/renderer/assets/{index-ezC-iarf.css → index-Cf-RsxoC.css} +163 -0
  11. package/out/renderer/assets/{index-DBt_rov1.js → index-dRvutfbl.js} +572 -107
  12. package/out/renderer/assets/{javascript-DDLsFUr-.js → javascript-n_iZZzDX.js} +2 -2
  13. package/out/renderer/assets/{jsonMode-Ixhcm5I6.js → jsonMode-DXDczSNu.js} +3 -3
  14. package/out/renderer/assets/{liquid-BHgSYEHk.js → liquid-B1QweUh7.js} +1 -1
  15. package/out/renderer/assets/{lspLanguageFeatures-ClbEdD0U.js → lspLanguageFeatures-DqzMqkRk.js} +1 -1
  16. package/out/renderer/assets/{mdx-DMngMjHR.js → mdx-BCv8lm5e.js} +1 -1
  17. package/out/renderer/assets/ort-wasm-simd-threaded.asyncify-DMmc6YqF.wasm +0 -0
  18. package/out/renderer/assets/{python-D_czoeY2.js → python-BLNzYwDv.js} +1 -1
  19. package/out/renderer/assets/{razor-CLMDGvL7.js → razor-CvAww8bG.js} +1 -1
  20. package/out/renderer/assets/transformers.web-DtSCnG36.js +33668 -0
  21. package/out/renderer/assets/{tsMode-EIuSGG42.js → tsMode-C7m6Kr5E.js} +1 -1
  22. package/out/renderer/assets/{typescript-DQkV4kKA.js → typescript-DhPw4VVg.js} +1 -1
  23. package/out/renderer/assets/{xml-DJ0OOQTu.js → xml-B0WLFJ2U.js} +1 -1
  24. package/out/renderer/assets/{yaml-DxX26XLN.js → yaml-BWyn9Wd7.js} +1 -1
  25. package/out/renderer/index.html +2 -2
  26. package/package.json +2 -1
  27. package/src/main/index.ts +7 -0
  28. package/src/renderer/App.tsx +41 -1
  29. package/src/renderer/components/FloatingMic.tsx +128 -0
  30. package/src/renderer/components/TerminalPanel.tsx +6 -0
  31. package/src/renderer/components/VoiceInput.tsx +321 -0
  32. package/src/renderer/lib/localWhisper.ts +88 -0
  33. package/src/renderer/styles.css +163 -0
@@ -1,4 +1,4 @@
1
- import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-DBt_rov1.js";
1
+ import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-dRvutfbl.js";
2
2
  class WorkerManager {
3
3
  constructor(_modeId, _defaults) {
4
4
  this._modeId = _modeId;
@@ -1,4 +1,4 @@
1
- import { l as languages } from "./index-DBt_rov1.js";
1
+ import { l as languages } from "./index-dRvutfbl.js";
2
2
  const conf = {
3
3
  wordPattern: /(-?\d*\.\d\w*)|([^\`\~\!\@\#\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s]+)/g,
4
4
  comments: {
@@ -1,4 +1,4 @@
1
- import { l as languages } from "./index-DBt_rov1.js";
1
+ import { l as languages } from "./index-dRvutfbl.js";
2
2
  const conf = {
3
3
  comments: {
4
4
  blockComment: ["<!--", "-->"]
@@ -1,4 +1,4 @@
1
- import { l as languages } from "./index-DBt_rov1.js";
1
+ import { l as languages } from "./index-dRvutfbl.js";
2
2
  const conf = {
3
3
  comments: {
4
4
  lineComment: "#"
@@ -4,8 +4,8 @@
4
4
  <meta charset="UTF-8" />
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
6
  <title>ctlsurf-worker</title>
7
- <script type="module" crossorigin src="./assets/index-DBt_rov1.js"></script>
8
- <link rel="stylesheet" crossorigin href="./assets/index-ezC-iarf.css">
7
+ <script type="module" crossorigin src="./assets/index-dRvutfbl.js"></script>
8
+ <link rel="stylesheet" crossorigin href="./assets/index-Cf-RsxoC.css">
9
9
  </head>
10
10
  <body>
11
11
  <div id="root"></div>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@phenx-inc/ctlsurf",
3
- "version": "0.5.2",
3
+ "version": "0.7.0",
4
4
  "description": "Agent-agnostic terminal and desktop app for ctlsurf — run Claude Code, Codex, or any coding agent with live session logging and remote control",
5
5
  "main": "out/main/index.js",
6
6
  "bin": {
@@ -43,6 +43,7 @@
43
43
  "node": ">=18"
44
44
  },
45
45
  "dependencies": {
46
+ "@huggingface/transformers": "^4.2.0",
46
47
  "@monaco-editor/react": "^4.7.0",
47
48
  "@xterm/addon-fit": "^0.10.0",
48
49
  "@xterm/addon-serialize": "^0.14.0",
package/src/main/index.ts CHANGED
@@ -132,6 +132,13 @@ function createWindow(): void {
132
132
  }
133
133
  })
134
134
 
135
+ // Voice typing needs microphone access. Approve permission requests (there
136
+ // was no handler before, so the renderer already had the default-permissive
137
+ // behavior — this just ensures the mic request resolves to "allow").
138
+ mainWindow.webContents.session.setPermissionRequestHandler((_wc, _permission, callback) => {
139
+ callback(true)
140
+ })
141
+
135
142
  if (process.env.ELECTRON_RENDERER_URL) {
136
143
  mainWindow.loadURL(process.env.ELECTRON_RENDERER_URL)
137
144
  } else {
@@ -1,5 +1,6 @@
1
1
  import { useState, useEffect, useCallback, useRef } from 'react'
2
- import { TerminalPanel, destroyTerminal } from './components/TerminalPanel'
2
+ import { TerminalPanel, destroyTerminal, focusTerminal } from './components/TerminalPanel'
3
+ import { FloatingMic } from './components/FloatingMic'
3
4
  import { CtlsurfPanel } from './components/CtlsurfPanel'
4
5
  import { EditorPanel } from './components/EditorPanel'
5
6
  import { AgentPicker } from './components/AgentPicker'
@@ -132,6 +133,14 @@ export default function App() {
132
133
  const [activeTabId, setActiveTabId] = useState<string>(tabs[0].id)
133
134
  const [trackingActive, setTrackingActive] = useState(false)
134
135
  const [showTicketPanel, setShowTicketPanel] = useState(false)
136
+ // Draggable on-canvas push-to-talk mic; visibility persists across launches.
137
+ const [showFloatingMic, setShowFloatingMic] = useState<boolean>(() => {
138
+ try { return localStorage.getItem('ctlsurf.floatingMicVisible') !== 'false' } catch { return true }
139
+ })
140
+ const setFloatingMicVisible = useCallback((v: boolean) => {
141
+ setShowFloatingMic(v)
142
+ try { localStorage.setItem('ctlsurf.floatingMicVisible', String(v)) } catch { /* ignore */ }
143
+ }, [])
135
144
 
136
145
  // Agent picker state: which tab is being configured (null = initial picker for first tab)
137
146
  const [pickerTargetTabId, setPickerTargetTabId] = useState<string | null>(tabs[0].id)
@@ -206,6 +215,16 @@ export default function App() {
206
215
  }
207
216
  }, [trackingActive])
208
217
 
218
+ // Voice typing: inject the transcribed text into the active terminal as if it
219
+ // were typed, then send a carriage return to submit it (same as pressing Enter
220
+ // after typing), and refocus the terminal.
221
+ const handleVoiceTranscript = useCallback((text: string) => {
222
+ const trimmed = text.trim()
223
+ if (!trimmed) return
224
+ window.worker.writePty(activeTabId, trimmed + '\r')
225
+ focusTerminal(activeTabId)
226
+ }, [activeTabId])
227
+
209
228
  const cwdRef = useRef<string | null>(null)
210
229
 
211
230
  const handleSpawn = useCallback(async (tabId: string, agent: AgentConfig) => {
@@ -471,6 +490,20 @@ export default function App() {
471
490
  </svg>
472
491
  <span>Tickets</span>
473
492
  </button>
493
+ <button
494
+ className={`titlebar-btn titlebar-icon-btn ${showFloatingMic ? 'active' : ''}`}
495
+ onClick={() => setFloatingMicVisible(!showFloatingMic)}
496
+ title={showFloatingMic ? 'Hide floating mic' : 'Show floating mic'}
497
+ aria-label="Toggle floating mic"
498
+ >
499
+ <svg viewBox="0 0 24 24" width="13" height="13" fill="none" stroke="currentColor"
500
+ strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" aria-hidden="true">
501
+ <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z" />
502
+ <path d="M19 10v2a7 7 0 0 1-14 0v-2" />
503
+ <line x1="12" y1="19" x2="12" y2="23" />
504
+ <line x1="8" y1="23" x2="16" y2="23" />
505
+ </svg>
506
+ </button>
474
507
  <span className="titlebar-separator" />
475
508
  {agents.map(a => {
476
509
  const activeTab = tabs.find(t => t.id === activeTabId)
@@ -532,6 +565,13 @@ export default function App() {
532
565
  }}
533
566
  />
534
567
  )}
568
+
569
+ {showFloatingMic && (
570
+ <FloatingMic
571
+ onTranscript={handleVoiceTranscript}
572
+ onHide={() => setFloatingMicVisible(false)}
573
+ />
574
+ )}
535
575
  </div>
536
576
  )
537
577
  }
@@ -0,0 +1,128 @@
1
+ import { useCallback, useEffect, useRef, useState } from 'react'
2
+ import { VoiceInput } from './VoiceInput'
3
+
4
+ // A draggable, dismissable push-to-talk mic that floats over the panes. It wraps
5
+ // the same <VoiceInput> push-to-talk logic used in the titlebar; only the chrome
6
+ // (drag handle + hide button) and positioning live here.
7
+
8
+ const POS_KEY = 'ctlsurf.floatingMicPos'
9
+
10
+ interface Pos { x: number; y: number }
11
+
12
+ interface FloatingMicProps {
13
+ onTranscript: (text: string) => void
14
+ onHide: () => void
15
+ }
16
+
17
+ // Keep the button clear of the 38px titlebar and 24px status bar.
18
+ const EDGE = 20
19
+ const TOP_MIN = 46
20
+ const BOTTOM_GAP = 36
21
+
22
+ function loadPos(): Pos | null {
23
+ try {
24
+ const raw = localStorage.getItem(POS_KEY)
25
+ if (raw) {
26
+ const p = JSON.parse(raw) as Partial<Pos>
27
+ if (typeof p.x === 'number' && typeof p.y === 'number') return { x: p.x, y: p.y }
28
+ }
29
+ } catch { /* ignore */ }
30
+ return null
31
+ }
32
+
33
+ export function FloatingMic({ onTranscript, onHide }: FloatingMicProps) {
34
+ const [pos, setPos] = useState<Pos | null>(loadPos)
35
+ const elRef = useRef<HTMLDivElement>(null)
36
+ // Pointer-to-element offset captured at drag start; null when not dragging.
37
+ const dragRef = useRef<{ dx: number; dy: number } | null>(null)
38
+
39
+ // Keep the button fully inside the viewport (used on drag, mount, and resize).
40
+ const clamp = useCallback((x: number, y: number): Pos => {
41
+ const el = elRef.current
42
+ const w = el?.offsetWidth ?? 64
43
+ const h = el?.offsetHeight ?? 90
44
+ return {
45
+ x: Math.max(EDGE, Math.min(x, window.innerWidth - w - EDGE)),
46
+ y: Math.max(TOP_MIN, Math.min(y, window.innerHeight - h - BOTTOM_GAP)),
47
+ }
48
+ }, [])
49
+
50
+ // First mount with no saved position: default to bottom-right.
51
+ useEffect(() => {
52
+ if (pos) return
53
+ const el = elRef.current
54
+ const w = el?.offsetWidth ?? 64
55
+ const h = el?.offsetHeight ?? 90
56
+ setPos({
57
+ x: window.innerWidth - w - EDGE,
58
+ y: window.innerHeight - h - BOTTOM_GAP,
59
+ })
60
+ }, [pos])
61
+
62
+ // Keep it reachable if the window shrinks.
63
+ useEffect(() => {
64
+ const onResize = () => setPos((p) => (p ? clamp(p.x, p.y) : p))
65
+ window.addEventListener('resize', onResize)
66
+ return () => window.removeEventListener('resize', onResize)
67
+ }, [clamp])
68
+
69
+ const onHandleDown = useCallback((e: React.PointerEvent) => {
70
+ const el = elRef.current
71
+ if (!el) return
72
+ e.preventDefault()
73
+ const rect = el.getBoundingClientRect()
74
+ dragRef.current = { dx: e.clientX - rect.left, dy: e.clientY - rect.top }
75
+ e.currentTarget.setPointerCapture?.(e.pointerId)
76
+ }, [])
77
+
78
+ const onHandleMove = useCallback((e: React.PointerEvent) => {
79
+ const d = dragRef.current
80
+ if (!d) return
81
+ setPos(clamp(e.clientX - d.dx, e.clientY - d.dy))
82
+ }, [clamp])
83
+
84
+ const onHandleUp = useCallback((e: React.PointerEvent) => {
85
+ if (!dragRef.current) return
86
+ dragRef.current = null
87
+ e.currentTarget.releasePointerCapture?.(e.pointerId)
88
+ setPos((p) => {
89
+ if (p) {
90
+ try { localStorage.setItem(POS_KEY, JSON.stringify(p)) } catch { /* ignore */ }
91
+ }
92
+ return p
93
+ })
94
+ }, [])
95
+
96
+ // Render off-screen+hidden until the first position is computed (no flash).
97
+ const style: React.CSSProperties = pos
98
+ ? { left: pos.x, top: pos.y }
99
+ : { left: -9999, top: -9999, visibility: 'hidden' }
100
+
101
+ return (
102
+ <div ref={elRef} className="floating-mic" style={style}>
103
+ <div
104
+ className="floating-mic-handle"
105
+ onPointerDown={onHandleDown}
106
+ onPointerMove={onHandleMove}
107
+ onPointerUp={onHandleUp}
108
+ onPointerCancel={onHandleUp}
109
+ title="Drag to move"
110
+ aria-label="Drag floating mic"
111
+ >
112
+ <span className="floating-mic-grip" aria-hidden="true">⠿</span>
113
+ <button
114
+ type="button"
115
+ className="floating-mic-hide"
116
+ // Don't let a click on the hide button start a drag.
117
+ onPointerDown={(e) => e.stopPropagation()}
118
+ onClick={onHide}
119
+ title="Hide floating mic"
120
+ aria-label="Hide floating mic"
121
+ >
122
+ ×
123
+ </button>
124
+ </div>
125
+ <VoiceInput variant="floating" onTranscript={onTranscript} />
126
+ </div>
127
+ )
128
+ }
@@ -185,6 +185,12 @@ function getOrCreateTerminal(tabId: string, onExit: (tabId: string) => void): {
185
185
  return { terminal, fitAddon }
186
186
  }
187
187
 
188
+ // Return keyboard focus to a tab's terminal (e.g. after inserting voice text
189
+ // so the user can immediately press Enter to submit).
190
+ export function focusTerminal(tabId: string): void {
191
+ _terminals.get(tabId)?.terminal.focus()
192
+ }
193
+
188
194
  export function destroyTerminal(tabId: string): void {
189
195
  const state = _terminals.get(tabId)
190
196
  if (!state) return
@@ -0,0 +1,321 @@
1
+ import { useCallback, useEffect, useRef, useState } from 'react'
2
+ import { transcribeBlob, type ModelProgress } from '../lib/localWhisper'
3
+
4
+ // ─── Minimal Web Speech API typings ──────────────────
5
+ // webkitSpeechRecognition isn't in the standard DOM lib, so declare just the
6
+ // surface we use. This API is frequently unavailable inside Electron (Chromium
7
+ // ships without Google's speech backend); when it fails we fall back to a local
8
+ // Whisper model (see ../lib/localWhisper).
9
+
10
+ interface SpeechRecognitionResult {
11
+ isFinal: boolean
12
+ 0: { transcript: string }
13
+ }
14
+ interface SpeechRecognitionEvent {
15
+ resultIndex: number
16
+ results: { length: number;[index: number]: SpeechRecognitionResult }
17
+ }
18
+ interface SpeechRecognitionErrorEvent { error: string }
19
+ interface SpeechRecognitionLike {
20
+ lang: string
21
+ continuous: boolean
22
+ interimResults: boolean
23
+ start: () => void
24
+ stop: () => void
25
+ abort: () => void
26
+ onresult: ((e: SpeechRecognitionEvent) => void) | null
27
+ onerror: ((e: SpeechRecognitionErrorEvent) => void) | null
28
+ onend: (() => void) | null
29
+ }
30
+ type SpeechRecognitionCtor = new () => SpeechRecognitionLike
31
+
32
+ function getRecognitionCtor(): SpeechRecognitionCtor | null {
33
+ const w = window as unknown as {
34
+ SpeechRecognition?: SpeechRecognitionCtor
35
+ webkitSpeechRecognition?: SpeechRecognitionCtor
36
+ }
37
+ return w.SpeechRecognition || w.webkitSpeechRecognition || null
38
+ }
39
+
40
+ // ─── Capabilities & engine selection ─────────────────
41
+
42
+ type Engine = 'web-speech' | 'local'
43
+ type Phase = 'idle' | 'listening' | 'transcribing'
44
+
45
+ const ENGINE_KEY = 'ctlsurf.voiceEngine'
46
+
47
+ const WEB_SPEECH_SUPPORTED = getRecognitionCtor() !== null
48
+ const LOCAL_SUPPORTED =
49
+ typeof navigator !== 'undefined' &&
50
+ !!navigator.mediaDevices?.getUserMedia &&
51
+ typeof MediaRecorder !== 'undefined' &&
52
+ typeof OfflineAudioContext !== 'undefined'
53
+ const ANY_SUPPORTED = WEB_SPEECH_SUPPORTED || LOCAL_SUPPORTED
54
+
55
+ function loadInitialEngine(): Engine {
56
+ if (!WEB_SPEECH_SUPPORTED && LOCAL_SUPPORTED) return 'local'
57
+ try {
58
+ if (localStorage.getItem(ENGINE_KEY) === 'local' && LOCAL_SUPPORTED) return 'local'
59
+ } catch { /* ignore */ }
60
+ return WEB_SPEECH_SUPPORTED ? 'web-speech' : 'local'
61
+ }
62
+
63
+ // Web Speech errors that mean the engine itself is unreachable (vs. a mic
64
+ // permission/hardware problem, which would also break the local fallback).
65
+ function isEngineUnavailable(code: string): boolean {
66
+ return code === 'network' || code === 'service-not-allowed'
67
+ }
68
+
69
+ function describeMicError(err: unknown): string {
70
+ const name = (err as { name?: string })?.name
71
+ if (name === 'NotAllowedError' || name === 'SecurityError') return 'Microphone access denied'
72
+ if (name === 'NotFoundError') return 'No microphone found'
73
+ return 'Could not start microphone'
74
+ }
75
+
76
+ interface VoiceInputProps {
77
+ // Called once per push-to-talk session with the final transcribed text.
78
+ onTranscript: (text: string) => void
79
+ // 'titlebar' (default) renders the compact titlebar pill; 'floating' renders
80
+ // a round FAB used by the draggable on-canvas mic (see FloatingMic).
81
+ variant?: 'titlebar' | 'floating'
82
+ }
83
+
84
+ export function VoiceInput({ onTranscript, variant = 'titlebar' }: VoiceInputProps) {
85
+ const [engine, setEngine] = useState<Engine>(loadInitialEngine)
86
+ const [phase, setPhase] = useState<Phase>('idle')
87
+ const [interim, setInterim] = useState('')
88
+ const [modelPct, setModelPct] = useState<number | null>(null)
89
+ const [error, setError] = useState<string | null>(null)
90
+ const [notice, setNotice] = useState<string | null>(null)
91
+
92
+ // Web Speech refs
93
+ const recognitionRef = useRef<SpeechRecognitionLike | null>(null)
94
+ const finalRef = useRef('')
95
+ // Local (Whisper) refs
96
+ const streamRef = useRef<MediaStream | null>(null)
97
+ const recorderRef = useRef<MediaRecorder | null>(null)
98
+ const chunksRef = useRef<Blob[]>([])
99
+ // Set true when the user releases before getUserMedia resolves (quick tap).
100
+ const cancelGestureRef = useRef(false)
101
+
102
+ const engineRef = useRef(engine)
103
+ useEffect(() => { engineRef.current = engine }, [engine])
104
+
105
+ const onTranscriptRef = useRef(onTranscript)
106
+ useEffect(() => { onTranscriptRef.current = onTranscript }, [onTranscript])
107
+
108
+ // Auto-dismiss transient chips.
109
+ useEffect(() => {
110
+ if (!error) return
111
+ const t = setTimeout(() => setError(null), 4500)
112
+ return () => clearTimeout(t)
113
+ }, [error])
114
+ useEffect(() => {
115
+ if (!notice) return
116
+ const t = setTimeout(() => setNotice(null), 5000)
117
+ return () => clearTimeout(t)
118
+ }, [notice])
119
+
120
+ const switchToLocal = useCallback((reason: string) => {
121
+ try { localStorage.setItem(ENGINE_KEY, 'local') } catch { /* ignore */ }
122
+ setEngine('local')
123
+ setNotice(reason)
124
+ }, [])
125
+
126
+ const stopStream = useCallback(() => {
127
+ streamRef.current?.getTracks().forEach((t) => t.stop())
128
+ streamRef.current = null
129
+ }, [])
130
+
131
+ // ─── Web Speech engine ─────────────────────────────
132
+
133
+ const startWebSpeech = useCallback(() => {
134
+ const Ctor = getRecognitionCtor()
135
+ if (!Ctor || recognitionRef.current) return
136
+ setError(null); setNotice(null); setInterim('')
137
+ finalRef.current = ''
138
+
139
+ const rec = new Ctor()
140
+ rec.lang = navigator.language || 'en-US'
141
+ rec.continuous = true
142
+ rec.interimResults = true
143
+
144
+ rec.onresult = (event) => {
145
+ let finalText = ''
146
+ let interimText = ''
147
+ for (let i = 0; i < event.results.length; i++) {
148
+ const res = event.results[i]
149
+ if (res.isFinal) finalText += res[0].transcript
150
+ else interimText += res[0].transcript
151
+ }
152
+ finalRef.current = finalText
153
+ setInterim(interimText)
154
+ }
155
+
156
+ rec.onerror = (event) => {
157
+ if (isEngineUnavailable(event.error) && LOCAL_SUPPORTED) {
158
+ // The streamed audio is gone; switch engines and ask for a retry.
159
+ finalRef.current = ''
160
+ switchToLocal('Voice service unavailable — switched to on-device. Press again.')
161
+ } else if (event.error !== 'no-speech' && event.error !== 'aborted') {
162
+ setError(event.error === 'not-allowed' ? 'Microphone access denied' : `Voice error: ${event.error}`)
163
+ }
164
+ }
165
+
166
+ rec.onend = () => {
167
+ const text = finalRef.current.trim()
168
+ recognitionRef.current = null
169
+ setPhase('idle')
170
+ setInterim('')
171
+ if (text) onTranscriptRef.current(text)
172
+ }
173
+
174
+ recognitionRef.current = rec
175
+ try {
176
+ rec.start()
177
+ setPhase('listening')
178
+ } catch (err) {
179
+ recognitionRef.current = null
180
+ setPhase('idle')
181
+ setError('Could not start microphone')
182
+ console.error('[voice] web speech start failed', err)
183
+ }
184
+ }, [switchToLocal])
185
+
186
+ const stopWebSpeech = useCallback(() => {
187
+ try { recognitionRef.current?.stop() } catch { /* already stopped */ }
188
+ }, [])
189
+
190
+ // ─── Local (Whisper) engine ────────────────────────
191
+
192
+ const handleModelProgress = useCallback((p: ModelProgress) => {
193
+ if (p.status === 'progress' && typeof p.progress === 'number') {
194
+ setModelPct(Math.min(100, Math.round(p.progress)))
195
+ }
196
+ }, [])
197
+
198
+ const runLocalTranscription = useCallback(async (rec: MediaRecorder) => {
199
+ stopStream()
200
+ const blob = new Blob(chunksRef.current, { type: rec.mimeType || 'audio/webm' })
201
+ chunksRef.current = []
202
+ recorderRef.current = null
203
+ if (blob.size === 0) { setPhase('idle'); return }
204
+
205
+ setPhase('transcribing')
206
+ setInterim('')
207
+ try {
208
+ const text = await transcribeBlob(blob, handleModelProgress)
209
+ if (text) onTranscriptRef.current(text)
210
+ } catch (err) {
211
+ setError('On-device transcription failed')
212
+ console.error('[voice] local transcription failed', err)
213
+ } finally {
214
+ setPhase('idle')
215
+ setModelPct(null)
216
+ }
217
+ }, [stopStream, handleModelProgress])
218
+
219
+ const startLocal = useCallback(async () => {
220
+ setError(null); setNotice(null); setInterim('')
221
+ cancelGestureRef.current = false
222
+ try {
223
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
224
+ // Released during the permission/await — don't record anything.
225
+ if (cancelGestureRef.current) {
226
+ stream.getTracks().forEach((t) => t.stop())
227
+ setPhase('idle')
228
+ return
229
+ }
230
+ streamRef.current = stream
231
+ chunksRef.current = []
232
+ const rec = new MediaRecorder(stream)
233
+ rec.ondataavailable = (e) => { if (e.data.size) chunksRef.current.push(e.data) }
234
+ rec.onstop = () => { void runLocalTranscription(rec) }
235
+ recorderRef.current = rec
236
+ rec.start()
237
+ setPhase('listening')
238
+ } catch (err) {
239
+ stopStream()
240
+ setPhase('idle')
241
+ setError(describeMicError(err))
242
+ console.error('[voice] getUserMedia failed', err)
243
+ }
244
+ }, [runLocalTranscription, stopStream])
245
+
246
+ const stopLocal = useCallback(() => {
247
+ cancelGestureRef.current = true
248
+ const rec = recorderRef.current
249
+ if (rec && rec.state !== 'inactive') {
250
+ try { rec.stop() } catch { /* ignore */ }
251
+ }
252
+ }, [])
253
+
254
+ // ─── Push-to-talk gesture ──────────────────────────
255
+
256
+ const handlePointerDown = (e: React.PointerEvent) => {
257
+ if (!ANY_SUPPORTED || phase !== 'idle') return
258
+ e.preventDefault()
259
+ e.currentTarget.setPointerCapture?.(e.pointerId)
260
+ if (engineRef.current === 'web-speech' && WEB_SPEECH_SUPPORTED) startWebSpeech()
261
+ else if (LOCAL_SUPPORTED) void startLocal()
262
+ }
263
+ const handlePointerUp = (e: React.PointerEvent) => {
264
+ e.currentTarget.releasePointerCapture?.(e.pointerId)
265
+ if (engineRef.current === 'web-speech') stopWebSpeech()
266
+ else stopLocal()
267
+ }
268
+
269
+ // Clean up on unmount.
270
+ useEffect(() => () => {
271
+ try { recognitionRef.current?.abort() } catch { /* ignore */ }
272
+ try { recorderRef.current?.stop() } catch { /* ignore */ }
273
+ streamRef.current?.getTracks().forEach((t) => t.stop())
274
+ }, [])
275
+
276
+ // ─── Render ────────────────────────────────────────
277
+
278
+ const listening = phase === 'listening'
279
+ const busy = phase === 'transcribing'
280
+
281
+ const title = !ANY_SUPPORTED
282
+ ? 'Voice typing not supported in this build'
283
+ : listening
284
+ ? 'Listening… release to insert'
285
+ : busy
286
+ ? 'Transcribing…'
287
+ : engine === 'local'
288
+ ? 'Hold to talk (on-device) — speech is typed into the terminal'
289
+ : 'Hold to talk — speech is typed into the terminal'
290
+
291
+ let chip: { kind: 'listening' | 'busy' | 'notice' | 'error'; text: string } | null = null
292
+ if (error && phase === 'idle') chip = { kind: 'error', text: error }
293
+ else if (notice && phase === 'idle') chip = { kind: 'notice', text: notice }
294
+ else if (listening) chip = { kind: 'listening', text: interim || (engine === 'local' ? 'Recording…' : 'Listening…') }
295
+ else if (busy) chip = { kind: 'busy', text: modelPct !== null ? `Downloading voice model… ${modelPct}%` : 'Transcribing…' }
296
+
297
+ const floating = variant === 'floating'
298
+ const btnClass = floating
299
+ ? `voice-btn voice-btn-floating ${listening ? 'listening' : ''} ${busy ? 'busy' : ''}`
300
+ : `titlebar-btn titlebar-icon-btn voice-btn ${listening ? 'listening' : ''} ${busy ? 'busy' : ''}`
301
+
302
+ return (
303
+ <div className="voice-input-wrap">
304
+ <button
305
+ type="button"
306
+ className={btnClass}
307
+ disabled={!ANY_SUPPORTED}
308
+ onPointerDown={handlePointerDown}
309
+ onPointerUp={handlePointerUp}
310
+ onPointerCancel={handlePointerUp}
311
+ onContextMenu={(e) => e.preventDefault()}
312
+ title={title}
313
+ aria-label="Voice typing (hold to talk)"
314
+ >
315
+ <span className="voice-icon" aria-hidden="true">🎤</span>
316
+ <span className={`voice-dot ${listening ? 'on' : busy ? 'busy' : 'off'}`} />
317
+ </button>
318
+ {chip && <div className={`voice-chip ${chip.kind} ${floating ? 'voice-chip-floating' : ''}`}>{chip.text}</div>}
319
+ </div>
320
+ )
321
+ }