@phenx-inc/ctlsurf 0.5.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/electron-vite.config.ts +5 -0
- package/out/headless/index.mjs +2 -1
- package/out/headless/index.mjs.map +2 -2
- package/out/main/index.js +3 -0
- package/out/renderer/assets/{cssMode-D9-xaWSI.js → cssMode-eTXVdAkZ.js} +3 -3
- package/out/renderer/assets/{freemarker2-CoRAVxnv.js → freemarker2-B5BKaiK4.js} +1 -1
- package/out/renderer/assets/{handlebars-B0p9Wgkw.js → handlebars-BIdLd2wU.js} +1 -1
- package/out/renderer/assets/{html-D_XFJJtO.js → html-BXL4cnLS.js} +1 -1
- package/out/renderer/assets/{htmlMode-naWw6PWr.js → htmlMode-46N3XG2c.js} +3 -3
- package/out/renderer/assets/{index-ezC-iarf.css → index-Cf-RsxoC.css} +163 -0
- package/out/renderer/assets/{index-DBt_rov1.js → index-dRvutfbl.js} +572 -107
- package/out/renderer/assets/{javascript-DDLsFUr-.js → javascript-n_iZZzDX.js} +2 -2
- package/out/renderer/assets/{jsonMode-Ixhcm5I6.js → jsonMode-DXDczSNu.js} +3 -3
- package/out/renderer/assets/{liquid-BHgSYEHk.js → liquid-B1QweUh7.js} +1 -1
- package/out/renderer/assets/{lspLanguageFeatures-ClbEdD0U.js → lspLanguageFeatures-DqzMqkRk.js} +1 -1
- package/out/renderer/assets/{mdx-DMngMjHR.js → mdx-BCv8lm5e.js} +1 -1
- package/out/renderer/assets/ort-wasm-simd-threaded.asyncify-DMmc6YqF.wasm +0 -0
- package/out/renderer/assets/{python-D_czoeY2.js → python-BLNzYwDv.js} +1 -1
- package/out/renderer/assets/{razor-CLMDGvL7.js → razor-CvAww8bG.js} +1 -1
- package/out/renderer/assets/transformers.web-DtSCnG36.js +33668 -0
- package/out/renderer/assets/{tsMode-EIuSGG42.js → tsMode-C7m6Kr5E.js} +1 -1
- package/out/renderer/assets/{typescript-DQkV4kKA.js → typescript-DhPw4VVg.js} +1 -1
- package/out/renderer/assets/{xml-DJ0OOQTu.js → xml-B0WLFJ2U.js} +1 -1
- package/out/renderer/assets/{yaml-DxX26XLN.js → yaml-BWyn9Wd7.js} +1 -1
- package/out/renderer/index.html +2 -2
- package/package.json +2 -1
- package/src/main/index.ts +7 -0
- package/src/renderer/App.tsx +41 -1
- package/src/renderer/components/FloatingMic.tsx +128 -0
- package/src/renderer/components/TerminalPanel.tsx +6 -0
- package/src/renderer/components/VoiceInput.tsx +321 -0
- package/src/renderer/lib/localWhisper.ts +88 -0
- package/src/renderer/styles.css +163 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-
|
|
1
|
+
import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-dRvutfbl.js";
|
|
2
2
|
class WorkerManager {
|
|
3
3
|
constructor(_modeId, _defaults) {
|
|
4
4
|
this._modeId = _modeId;
|
package/out/renderer/index.html
CHANGED
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
6
|
<title>ctlsurf-worker</title>
|
|
7
|
-
<script type="module" crossorigin src="./assets/index-
|
|
8
|
-
<link rel="stylesheet" crossorigin href="./assets/index-
|
|
7
|
+
<script type="module" crossorigin src="./assets/index-dRvutfbl.js"></script>
|
|
8
|
+
<link rel="stylesheet" crossorigin href="./assets/index-Cf-RsxoC.css">
|
|
9
9
|
</head>
|
|
10
10
|
<body>
|
|
11
11
|
<div id="root"></div>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@phenx-inc/ctlsurf",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Agent-agnostic terminal and desktop app for ctlsurf — run Claude Code, Codex, or any coding agent with live session logging and remote control",
|
|
5
5
|
"main": "out/main/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"node": ">=18"
|
|
44
44
|
},
|
|
45
45
|
"dependencies": {
|
|
46
|
+
"@huggingface/transformers": "^4.2.0",
|
|
46
47
|
"@monaco-editor/react": "^4.7.0",
|
|
47
48
|
"@xterm/addon-fit": "^0.10.0",
|
|
48
49
|
"@xterm/addon-serialize": "^0.14.0",
|
package/src/main/index.ts
CHANGED
|
@@ -132,6 +132,13 @@ function createWindow(): void {
|
|
|
132
132
|
}
|
|
133
133
|
})
|
|
134
134
|
|
|
135
|
+
// Voice typing needs microphone access. Approve permission requests (there
|
|
136
|
+
// was no handler before, so the renderer already had the default-permissive
|
|
137
|
+
// behavior — this just ensures the mic request resolves to "allow").
|
|
138
|
+
mainWindow.webContents.session.setPermissionRequestHandler((_wc, _permission, callback) => {
|
|
139
|
+
callback(true)
|
|
140
|
+
})
|
|
141
|
+
|
|
135
142
|
if (process.env.ELECTRON_RENDERER_URL) {
|
|
136
143
|
mainWindow.loadURL(process.env.ELECTRON_RENDERER_URL)
|
|
137
144
|
} else {
|
package/src/renderer/App.tsx
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { useState, useEffect, useCallback, useRef } from 'react'
|
|
2
|
-
import { TerminalPanel, destroyTerminal } from './components/TerminalPanel'
|
|
2
|
+
import { TerminalPanel, destroyTerminal, focusTerminal } from './components/TerminalPanel'
|
|
3
|
+
import { FloatingMic } from './components/FloatingMic'
|
|
3
4
|
import { CtlsurfPanel } from './components/CtlsurfPanel'
|
|
4
5
|
import { EditorPanel } from './components/EditorPanel'
|
|
5
6
|
import { AgentPicker } from './components/AgentPicker'
|
|
@@ -132,6 +133,14 @@ export default function App() {
|
|
|
132
133
|
const [activeTabId, setActiveTabId] = useState<string>(tabs[0].id)
|
|
133
134
|
const [trackingActive, setTrackingActive] = useState(false)
|
|
134
135
|
const [showTicketPanel, setShowTicketPanel] = useState(false)
|
|
136
|
+
// Draggable on-canvas push-to-talk mic; visibility persists across launches.
|
|
137
|
+
const [showFloatingMic, setShowFloatingMic] = useState<boolean>(() => {
|
|
138
|
+
try { return localStorage.getItem('ctlsurf.floatingMicVisible') !== 'false' } catch { return true }
|
|
139
|
+
})
|
|
140
|
+
const setFloatingMicVisible = useCallback((v: boolean) => {
|
|
141
|
+
setShowFloatingMic(v)
|
|
142
|
+
try { localStorage.setItem('ctlsurf.floatingMicVisible', String(v)) } catch { /* ignore */ }
|
|
143
|
+
}, [])
|
|
135
144
|
|
|
136
145
|
// Agent picker state: which tab is being configured (null = initial picker for first tab)
|
|
137
146
|
const [pickerTargetTabId, setPickerTargetTabId] = useState<string | null>(tabs[0].id)
|
|
@@ -206,6 +215,16 @@ export default function App() {
|
|
|
206
215
|
}
|
|
207
216
|
}, [trackingActive])
|
|
208
217
|
|
|
218
|
+
// Voice typing: inject the transcribed text into the active terminal as if it
|
|
219
|
+
// were typed, then send a carriage return to submit it (same as pressing Enter
|
|
220
|
+
// after typing), and refocus the terminal.
|
|
221
|
+
const handleVoiceTranscript = useCallback((text: string) => {
|
|
222
|
+
const trimmed = text.trim()
|
|
223
|
+
if (!trimmed) return
|
|
224
|
+
window.worker.writePty(activeTabId, trimmed + '\r')
|
|
225
|
+
focusTerminal(activeTabId)
|
|
226
|
+
}, [activeTabId])
|
|
227
|
+
|
|
209
228
|
const cwdRef = useRef<string | null>(null)
|
|
210
229
|
|
|
211
230
|
const handleSpawn = useCallback(async (tabId: string, agent: AgentConfig) => {
|
|
@@ -471,6 +490,20 @@ export default function App() {
|
|
|
471
490
|
</svg>
|
|
472
491
|
<span>Tickets</span>
|
|
473
492
|
</button>
|
|
493
|
+
<button
|
|
494
|
+
className={`titlebar-btn titlebar-icon-btn ${showFloatingMic ? 'active' : ''}`}
|
|
495
|
+
onClick={() => setFloatingMicVisible(!showFloatingMic)}
|
|
496
|
+
title={showFloatingMic ? 'Hide floating mic' : 'Show floating mic'}
|
|
497
|
+
aria-label="Toggle floating mic"
|
|
498
|
+
>
|
|
499
|
+
<svg viewBox="0 0 24 24" width="13" height="13" fill="none" stroke="currentColor"
|
|
500
|
+
strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" aria-hidden="true">
|
|
501
|
+
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z" />
|
|
502
|
+
<path d="M19 10v2a7 7 0 0 1-14 0v-2" />
|
|
503
|
+
<line x1="12" y1="19" x2="12" y2="23" />
|
|
504
|
+
<line x1="8" y1="23" x2="16" y2="23" />
|
|
505
|
+
</svg>
|
|
506
|
+
</button>
|
|
474
507
|
<span className="titlebar-separator" />
|
|
475
508
|
{agents.map(a => {
|
|
476
509
|
const activeTab = tabs.find(t => t.id === activeTabId)
|
|
@@ -532,6 +565,13 @@ export default function App() {
|
|
|
532
565
|
}}
|
|
533
566
|
/>
|
|
534
567
|
)}
|
|
568
|
+
|
|
569
|
+
{showFloatingMic && (
|
|
570
|
+
<FloatingMic
|
|
571
|
+
onTranscript={handleVoiceTranscript}
|
|
572
|
+
onHide={() => setFloatingMicVisible(false)}
|
|
573
|
+
/>
|
|
574
|
+
)}
|
|
535
575
|
</div>
|
|
536
576
|
)
|
|
537
577
|
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { useCallback, useEffect, useRef, useState } from 'react'
|
|
2
|
+
import { VoiceInput } from './VoiceInput'
|
|
3
|
+
|
|
4
|
+
// A draggable, dismissable push-to-talk mic that floats over the panes. It wraps
|
|
5
|
+
// the same <VoiceInput> push-to-talk logic used in the titlebar; only the chrome
|
|
6
|
+
// (drag handle + hide button) and positioning live here.
|
|
7
|
+
|
|
8
|
+
const POS_KEY = 'ctlsurf.floatingMicPos'
|
|
9
|
+
|
|
10
|
+
interface Pos { x: number; y: number }
|
|
11
|
+
|
|
12
|
+
interface FloatingMicProps {
|
|
13
|
+
onTranscript: (text: string) => void
|
|
14
|
+
onHide: () => void
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Keep the button clear of the 38px titlebar and 24px status bar.
|
|
18
|
+
const EDGE = 20
|
|
19
|
+
const TOP_MIN = 46
|
|
20
|
+
const BOTTOM_GAP = 36
|
|
21
|
+
|
|
22
|
+
function loadPos(): Pos | null {
|
|
23
|
+
try {
|
|
24
|
+
const raw = localStorage.getItem(POS_KEY)
|
|
25
|
+
if (raw) {
|
|
26
|
+
const p = JSON.parse(raw) as Partial<Pos>
|
|
27
|
+
if (typeof p.x === 'number' && typeof p.y === 'number') return { x: p.x, y: p.y }
|
|
28
|
+
}
|
|
29
|
+
} catch { /* ignore */ }
|
|
30
|
+
return null
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function FloatingMic({ onTranscript, onHide }: FloatingMicProps) {
|
|
34
|
+
const [pos, setPos] = useState<Pos | null>(loadPos)
|
|
35
|
+
const elRef = useRef<HTMLDivElement>(null)
|
|
36
|
+
// Pointer-to-element offset captured at drag start; null when not dragging.
|
|
37
|
+
const dragRef = useRef<{ dx: number; dy: number } | null>(null)
|
|
38
|
+
|
|
39
|
+
// Keep the button fully inside the viewport (used on drag, mount, and resize).
|
|
40
|
+
const clamp = useCallback((x: number, y: number): Pos => {
|
|
41
|
+
const el = elRef.current
|
|
42
|
+
const w = el?.offsetWidth ?? 64
|
|
43
|
+
const h = el?.offsetHeight ?? 90
|
|
44
|
+
return {
|
|
45
|
+
x: Math.max(EDGE, Math.min(x, window.innerWidth - w - EDGE)),
|
|
46
|
+
y: Math.max(TOP_MIN, Math.min(y, window.innerHeight - h - BOTTOM_GAP)),
|
|
47
|
+
}
|
|
48
|
+
}, [])
|
|
49
|
+
|
|
50
|
+
// First mount with no saved position: default to bottom-right.
|
|
51
|
+
useEffect(() => {
|
|
52
|
+
if (pos) return
|
|
53
|
+
const el = elRef.current
|
|
54
|
+
const w = el?.offsetWidth ?? 64
|
|
55
|
+
const h = el?.offsetHeight ?? 90
|
|
56
|
+
setPos({
|
|
57
|
+
x: window.innerWidth - w - EDGE,
|
|
58
|
+
y: window.innerHeight - h - BOTTOM_GAP,
|
|
59
|
+
})
|
|
60
|
+
}, [pos])
|
|
61
|
+
|
|
62
|
+
// Keep it reachable if the window shrinks.
|
|
63
|
+
useEffect(() => {
|
|
64
|
+
const onResize = () => setPos((p) => (p ? clamp(p.x, p.y) : p))
|
|
65
|
+
window.addEventListener('resize', onResize)
|
|
66
|
+
return () => window.removeEventListener('resize', onResize)
|
|
67
|
+
}, [clamp])
|
|
68
|
+
|
|
69
|
+
const onHandleDown = useCallback((e: React.PointerEvent) => {
|
|
70
|
+
const el = elRef.current
|
|
71
|
+
if (!el) return
|
|
72
|
+
e.preventDefault()
|
|
73
|
+
const rect = el.getBoundingClientRect()
|
|
74
|
+
dragRef.current = { dx: e.clientX - rect.left, dy: e.clientY - rect.top }
|
|
75
|
+
e.currentTarget.setPointerCapture?.(e.pointerId)
|
|
76
|
+
}, [])
|
|
77
|
+
|
|
78
|
+
const onHandleMove = useCallback((e: React.PointerEvent) => {
|
|
79
|
+
const d = dragRef.current
|
|
80
|
+
if (!d) return
|
|
81
|
+
setPos(clamp(e.clientX - d.dx, e.clientY - d.dy))
|
|
82
|
+
}, [clamp])
|
|
83
|
+
|
|
84
|
+
const onHandleUp = useCallback((e: React.PointerEvent) => {
|
|
85
|
+
if (!dragRef.current) return
|
|
86
|
+
dragRef.current = null
|
|
87
|
+
e.currentTarget.releasePointerCapture?.(e.pointerId)
|
|
88
|
+
setPos((p) => {
|
|
89
|
+
if (p) {
|
|
90
|
+
try { localStorage.setItem(POS_KEY, JSON.stringify(p)) } catch { /* ignore */ }
|
|
91
|
+
}
|
|
92
|
+
return p
|
|
93
|
+
})
|
|
94
|
+
}, [])
|
|
95
|
+
|
|
96
|
+
// Render off-screen+hidden until the first position is computed (no flash).
|
|
97
|
+
const style: React.CSSProperties = pos
|
|
98
|
+
? { left: pos.x, top: pos.y }
|
|
99
|
+
: { left: -9999, top: -9999, visibility: 'hidden' }
|
|
100
|
+
|
|
101
|
+
return (
|
|
102
|
+
<div ref={elRef} className="floating-mic" style={style}>
|
|
103
|
+
<div
|
|
104
|
+
className="floating-mic-handle"
|
|
105
|
+
onPointerDown={onHandleDown}
|
|
106
|
+
onPointerMove={onHandleMove}
|
|
107
|
+
onPointerUp={onHandleUp}
|
|
108
|
+
onPointerCancel={onHandleUp}
|
|
109
|
+
title="Drag to move"
|
|
110
|
+
aria-label="Drag floating mic"
|
|
111
|
+
>
|
|
112
|
+
<span className="floating-mic-grip" aria-hidden="true">⠿</span>
|
|
113
|
+
<button
|
|
114
|
+
type="button"
|
|
115
|
+
className="floating-mic-hide"
|
|
116
|
+
// Don't let a click on the hide button start a drag.
|
|
117
|
+
onPointerDown={(e) => e.stopPropagation()}
|
|
118
|
+
onClick={onHide}
|
|
119
|
+
title="Hide floating mic"
|
|
120
|
+
aria-label="Hide floating mic"
|
|
121
|
+
>
|
|
122
|
+
×
|
|
123
|
+
</button>
|
|
124
|
+
</div>
|
|
125
|
+
<VoiceInput variant="floating" onTranscript={onTranscript} />
|
|
126
|
+
</div>
|
|
127
|
+
)
|
|
128
|
+
}
|
|
@@ -185,6 +185,12 @@ function getOrCreateTerminal(tabId: string, onExit: (tabId: string) => void): {
|
|
|
185
185
|
return { terminal, fitAddon }
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
+
// Return keyboard focus to a tab's terminal (e.g. after inserting voice text
|
|
189
|
+
// so the user can immediately press Enter to submit).
|
|
190
|
+
export function focusTerminal(tabId: string): void {
|
|
191
|
+
_terminals.get(tabId)?.terminal.focus()
|
|
192
|
+
}
|
|
193
|
+
|
|
188
194
|
export function destroyTerminal(tabId: string): void {
|
|
189
195
|
const state = _terminals.get(tabId)
|
|
190
196
|
if (!state) return
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
import { useCallback, useEffect, useRef, useState } from 'react'
|
|
2
|
+
import { transcribeBlob, type ModelProgress } from '../lib/localWhisper'
|
|
3
|
+
|
|
4
|
+
// ─── Minimal Web Speech API typings ──────────────────
|
|
5
|
+
// webkitSpeechRecognition isn't in the standard DOM lib, so declare just the
|
|
6
|
+
// surface we use. This API is frequently unavailable inside Electron (Chromium
|
|
7
|
+
// ships without Google's speech backend); when it fails we fall back to a local
|
|
8
|
+
// Whisper model (see ../lib/localWhisper).
|
|
9
|
+
|
|
10
|
+
interface SpeechRecognitionResult {
|
|
11
|
+
isFinal: boolean
|
|
12
|
+
0: { transcript: string }
|
|
13
|
+
}
|
|
14
|
+
interface SpeechRecognitionEvent {
|
|
15
|
+
resultIndex: number
|
|
16
|
+
results: { length: number;[index: number]: SpeechRecognitionResult }
|
|
17
|
+
}
|
|
18
|
+
interface SpeechRecognitionErrorEvent { error: string }
|
|
19
|
+
interface SpeechRecognitionLike {
|
|
20
|
+
lang: string
|
|
21
|
+
continuous: boolean
|
|
22
|
+
interimResults: boolean
|
|
23
|
+
start: () => void
|
|
24
|
+
stop: () => void
|
|
25
|
+
abort: () => void
|
|
26
|
+
onresult: ((e: SpeechRecognitionEvent) => void) | null
|
|
27
|
+
onerror: ((e: SpeechRecognitionErrorEvent) => void) | null
|
|
28
|
+
onend: (() => void) | null
|
|
29
|
+
}
|
|
30
|
+
type SpeechRecognitionCtor = new () => SpeechRecognitionLike
|
|
31
|
+
|
|
32
|
+
function getRecognitionCtor(): SpeechRecognitionCtor | null {
|
|
33
|
+
const w = window as unknown as {
|
|
34
|
+
SpeechRecognition?: SpeechRecognitionCtor
|
|
35
|
+
webkitSpeechRecognition?: SpeechRecognitionCtor
|
|
36
|
+
}
|
|
37
|
+
return w.SpeechRecognition || w.webkitSpeechRecognition || null
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ─── Capabilities & engine selection ─────────────────
|
|
41
|
+
|
|
42
|
+
type Engine = 'web-speech' | 'local'
|
|
43
|
+
type Phase = 'idle' | 'listening' | 'transcribing'
|
|
44
|
+
|
|
45
|
+
const ENGINE_KEY = 'ctlsurf.voiceEngine'
|
|
46
|
+
|
|
47
|
+
const WEB_SPEECH_SUPPORTED = getRecognitionCtor() !== null
|
|
48
|
+
const LOCAL_SUPPORTED =
|
|
49
|
+
typeof navigator !== 'undefined' &&
|
|
50
|
+
!!navigator.mediaDevices?.getUserMedia &&
|
|
51
|
+
typeof MediaRecorder !== 'undefined' &&
|
|
52
|
+
typeof OfflineAudioContext !== 'undefined'
|
|
53
|
+
const ANY_SUPPORTED = WEB_SPEECH_SUPPORTED || LOCAL_SUPPORTED
|
|
54
|
+
|
|
55
|
+
function loadInitialEngine(): Engine {
|
|
56
|
+
if (!WEB_SPEECH_SUPPORTED && LOCAL_SUPPORTED) return 'local'
|
|
57
|
+
try {
|
|
58
|
+
if (localStorage.getItem(ENGINE_KEY) === 'local' && LOCAL_SUPPORTED) return 'local'
|
|
59
|
+
} catch { /* ignore */ }
|
|
60
|
+
return WEB_SPEECH_SUPPORTED ? 'web-speech' : 'local'
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Web Speech errors that mean the engine itself is unreachable (vs. a mic
|
|
64
|
+
// permission/hardware problem, which would also break the local fallback).
|
|
65
|
+
function isEngineUnavailable(code: string): boolean {
|
|
66
|
+
return code === 'network' || code === 'service-not-allowed'
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function describeMicError(err: unknown): string {
|
|
70
|
+
const name = (err as { name?: string })?.name
|
|
71
|
+
if (name === 'NotAllowedError' || name === 'SecurityError') return 'Microphone access denied'
|
|
72
|
+
if (name === 'NotFoundError') return 'No microphone found'
|
|
73
|
+
return 'Could not start microphone'
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
interface VoiceInputProps {
|
|
77
|
+
// Called once per push-to-talk session with the final transcribed text.
|
|
78
|
+
onTranscript: (text: string) => void
|
|
79
|
+
// 'titlebar' (default) renders the compact titlebar pill; 'floating' renders
|
|
80
|
+
// a round FAB used by the draggable on-canvas mic (see FloatingMic).
|
|
81
|
+
variant?: 'titlebar' | 'floating'
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function VoiceInput({ onTranscript, variant = 'titlebar' }: VoiceInputProps) {
|
|
85
|
+
const [engine, setEngine] = useState<Engine>(loadInitialEngine)
|
|
86
|
+
const [phase, setPhase] = useState<Phase>('idle')
|
|
87
|
+
const [interim, setInterim] = useState('')
|
|
88
|
+
const [modelPct, setModelPct] = useState<number | null>(null)
|
|
89
|
+
const [error, setError] = useState<string | null>(null)
|
|
90
|
+
const [notice, setNotice] = useState<string | null>(null)
|
|
91
|
+
|
|
92
|
+
// Web Speech refs
|
|
93
|
+
const recognitionRef = useRef<SpeechRecognitionLike | null>(null)
|
|
94
|
+
const finalRef = useRef('')
|
|
95
|
+
// Local (Whisper) refs
|
|
96
|
+
const streamRef = useRef<MediaStream | null>(null)
|
|
97
|
+
const recorderRef = useRef<MediaRecorder | null>(null)
|
|
98
|
+
const chunksRef = useRef<Blob[]>([])
|
|
99
|
+
// Set true when the user releases before getUserMedia resolves (quick tap).
|
|
100
|
+
const cancelGestureRef = useRef(false)
|
|
101
|
+
|
|
102
|
+
const engineRef = useRef(engine)
|
|
103
|
+
useEffect(() => { engineRef.current = engine }, [engine])
|
|
104
|
+
|
|
105
|
+
const onTranscriptRef = useRef(onTranscript)
|
|
106
|
+
useEffect(() => { onTranscriptRef.current = onTranscript }, [onTranscript])
|
|
107
|
+
|
|
108
|
+
// Auto-dismiss transient chips.
|
|
109
|
+
useEffect(() => {
|
|
110
|
+
if (!error) return
|
|
111
|
+
const t = setTimeout(() => setError(null), 4500)
|
|
112
|
+
return () => clearTimeout(t)
|
|
113
|
+
}, [error])
|
|
114
|
+
useEffect(() => {
|
|
115
|
+
if (!notice) return
|
|
116
|
+
const t = setTimeout(() => setNotice(null), 5000)
|
|
117
|
+
return () => clearTimeout(t)
|
|
118
|
+
}, [notice])
|
|
119
|
+
|
|
120
|
+
const switchToLocal = useCallback((reason: string) => {
|
|
121
|
+
try { localStorage.setItem(ENGINE_KEY, 'local') } catch { /* ignore */ }
|
|
122
|
+
setEngine('local')
|
|
123
|
+
setNotice(reason)
|
|
124
|
+
}, [])
|
|
125
|
+
|
|
126
|
+
const stopStream = useCallback(() => {
|
|
127
|
+
streamRef.current?.getTracks().forEach((t) => t.stop())
|
|
128
|
+
streamRef.current = null
|
|
129
|
+
}, [])
|
|
130
|
+
|
|
131
|
+
// ─── Web Speech engine ─────────────────────────────
|
|
132
|
+
|
|
133
|
+
const startWebSpeech = useCallback(() => {
|
|
134
|
+
const Ctor = getRecognitionCtor()
|
|
135
|
+
if (!Ctor || recognitionRef.current) return
|
|
136
|
+
setError(null); setNotice(null); setInterim('')
|
|
137
|
+
finalRef.current = ''
|
|
138
|
+
|
|
139
|
+
const rec = new Ctor()
|
|
140
|
+
rec.lang = navigator.language || 'en-US'
|
|
141
|
+
rec.continuous = true
|
|
142
|
+
rec.interimResults = true
|
|
143
|
+
|
|
144
|
+
rec.onresult = (event) => {
|
|
145
|
+
let finalText = ''
|
|
146
|
+
let interimText = ''
|
|
147
|
+
for (let i = 0; i < event.results.length; i++) {
|
|
148
|
+
const res = event.results[i]
|
|
149
|
+
if (res.isFinal) finalText += res[0].transcript
|
|
150
|
+
else interimText += res[0].transcript
|
|
151
|
+
}
|
|
152
|
+
finalRef.current = finalText
|
|
153
|
+
setInterim(interimText)
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
rec.onerror = (event) => {
|
|
157
|
+
if (isEngineUnavailable(event.error) && LOCAL_SUPPORTED) {
|
|
158
|
+
// The streamed audio is gone; switch engines and ask for a retry.
|
|
159
|
+
finalRef.current = ''
|
|
160
|
+
switchToLocal('Voice service unavailable — switched to on-device. Press again.')
|
|
161
|
+
} else if (event.error !== 'no-speech' && event.error !== 'aborted') {
|
|
162
|
+
setError(event.error === 'not-allowed' ? 'Microphone access denied' : `Voice error: ${event.error}`)
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
rec.onend = () => {
|
|
167
|
+
const text = finalRef.current.trim()
|
|
168
|
+
recognitionRef.current = null
|
|
169
|
+
setPhase('idle')
|
|
170
|
+
setInterim('')
|
|
171
|
+
if (text) onTranscriptRef.current(text)
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
recognitionRef.current = rec
|
|
175
|
+
try {
|
|
176
|
+
rec.start()
|
|
177
|
+
setPhase('listening')
|
|
178
|
+
} catch (err) {
|
|
179
|
+
recognitionRef.current = null
|
|
180
|
+
setPhase('idle')
|
|
181
|
+
setError('Could not start microphone')
|
|
182
|
+
console.error('[voice] web speech start failed', err)
|
|
183
|
+
}
|
|
184
|
+
}, [switchToLocal])
|
|
185
|
+
|
|
186
|
+
const stopWebSpeech = useCallback(() => {
|
|
187
|
+
try { recognitionRef.current?.stop() } catch { /* already stopped */ }
|
|
188
|
+
}, [])
|
|
189
|
+
|
|
190
|
+
// ─── Local (Whisper) engine ────────────────────────
|
|
191
|
+
|
|
192
|
+
const handleModelProgress = useCallback((p: ModelProgress) => {
|
|
193
|
+
if (p.status === 'progress' && typeof p.progress === 'number') {
|
|
194
|
+
setModelPct(Math.min(100, Math.round(p.progress)))
|
|
195
|
+
}
|
|
196
|
+
}, [])
|
|
197
|
+
|
|
198
|
+
const runLocalTranscription = useCallback(async (rec: MediaRecorder) => {
|
|
199
|
+
stopStream()
|
|
200
|
+
const blob = new Blob(chunksRef.current, { type: rec.mimeType || 'audio/webm' })
|
|
201
|
+
chunksRef.current = []
|
|
202
|
+
recorderRef.current = null
|
|
203
|
+
if (blob.size === 0) { setPhase('idle'); return }
|
|
204
|
+
|
|
205
|
+
setPhase('transcribing')
|
|
206
|
+
setInterim('')
|
|
207
|
+
try {
|
|
208
|
+
const text = await transcribeBlob(blob, handleModelProgress)
|
|
209
|
+
if (text) onTranscriptRef.current(text)
|
|
210
|
+
} catch (err) {
|
|
211
|
+
setError('On-device transcription failed')
|
|
212
|
+
console.error('[voice] local transcription failed', err)
|
|
213
|
+
} finally {
|
|
214
|
+
setPhase('idle')
|
|
215
|
+
setModelPct(null)
|
|
216
|
+
}
|
|
217
|
+
}, [stopStream, handleModelProgress])
|
|
218
|
+
|
|
219
|
+
const startLocal = useCallback(async () => {
|
|
220
|
+
setError(null); setNotice(null); setInterim('')
|
|
221
|
+
cancelGestureRef.current = false
|
|
222
|
+
try {
|
|
223
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
|
224
|
+
// Released during the permission/await — don't record anything.
|
|
225
|
+
if (cancelGestureRef.current) {
|
|
226
|
+
stream.getTracks().forEach((t) => t.stop())
|
|
227
|
+
setPhase('idle')
|
|
228
|
+
return
|
|
229
|
+
}
|
|
230
|
+
streamRef.current = stream
|
|
231
|
+
chunksRef.current = []
|
|
232
|
+
const rec = new MediaRecorder(stream)
|
|
233
|
+
rec.ondataavailable = (e) => { if (e.data.size) chunksRef.current.push(e.data) }
|
|
234
|
+
rec.onstop = () => { void runLocalTranscription(rec) }
|
|
235
|
+
recorderRef.current = rec
|
|
236
|
+
rec.start()
|
|
237
|
+
setPhase('listening')
|
|
238
|
+
} catch (err) {
|
|
239
|
+
stopStream()
|
|
240
|
+
setPhase('idle')
|
|
241
|
+
setError(describeMicError(err))
|
|
242
|
+
console.error('[voice] getUserMedia failed', err)
|
|
243
|
+
}
|
|
244
|
+
}, [runLocalTranscription, stopStream])
|
|
245
|
+
|
|
246
|
+
const stopLocal = useCallback(() => {
|
|
247
|
+
cancelGestureRef.current = true
|
|
248
|
+
const rec = recorderRef.current
|
|
249
|
+
if (rec && rec.state !== 'inactive') {
|
|
250
|
+
try { rec.stop() } catch { /* ignore */ }
|
|
251
|
+
}
|
|
252
|
+
}, [])
|
|
253
|
+
|
|
254
|
+
// ─── Push-to-talk gesture ──────────────────────────
|
|
255
|
+
|
|
256
|
+
const handlePointerDown = (e: React.PointerEvent) => {
|
|
257
|
+
if (!ANY_SUPPORTED || phase !== 'idle') return
|
|
258
|
+
e.preventDefault()
|
|
259
|
+
e.currentTarget.setPointerCapture?.(e.pointerId)
|
|
260
|
+
if (engineRef.current === 'web-speech' && WEB_SPEECH_SUPPORTED) startWebSpeech()
|
|
261
|
+
else if (LOCAL_SUPPORTED) void startLocal()
|
|
262
|
+
}
|
|
263
|
+
const handlePointerUp = (e: React.PointerEvent) => {
|
|
264
|
+
e.currentTarget.releasePointerCapture?.(e.pointerId)
|
|
265
|
+
if (engineRef.current === 'web-speech') stopWebSpeech()
|
|
266
|
+
else stopLocal()
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Clean up on unmount.
|
|
270
|
+
useEffect(() => () => {
|
|
271
|
+
try { recognitionRef.current?.abort() } catch { /* ignore */ }
|
|
272
|
+
try { recorderRef.current?.stop() } catch { /* ignore */ }
|
|
273
|
+
streamRef.current?.getTracks().forEach((t) => t.stop())
|
|
274
|
+
}, [])
|
|
275
|
+
|
|
276
|
+
// ─── Render ────────────────────────────────────────
|
|
277
|
+
|
|
278
|
+
const listening = phase === 'listening'
|
|
279
|
+
const busy = phase === 'transcribing'
|
|
280
|
+
|
|
281
|
+
const title = !ANY_SUPPORTED
|
|
282
|
+
? 'Voice typing not supported in this build'
|
|
283
|
+
: listening
|
|
284
|
+
? 'Listening… release to insert'
|
|
285
|
+
: busy
|
|
286
|
+
? 'Transcribing…'
|
|
287
|
+
: engine === 'local'
|
|
288
|
+
? 'Hold to talk (on-device) — speech is typed into the terminal'
|
|
289
|
+
: 'Hold to talk — speech is typed into the terminal'
|
|
290
|
+
|
|
291
|
+
let chip: { kind: 'listening' | 'busy' | 'notice' | 'error'; text: string } | null = null
|
|
292
|
+
if (error && phase === 'idle') chip = { kind: 'error', text: error }
|
|
293
|
+
else if (notice && phase === 'idle') chip = { kind: 'notice', text: notice }
|
|
294
|
+
else if (listening) chip = { kind: 'listening', text: interim || (engine === 'local' ? 'Recording…' : 'Listening…') }
|
|
295
|
+
else if (busy) chip = { kind: 'busy', text: modelPct !== null ? `Downloading voice model… ${modelPct}%` : 'Transcribing…' }
|
|
296
|
+
|
|
297
|
+
const floating = variant === 'floating'
|
|
298
|
+
const btnClass = floating
|
|
299
|
+
? `voice-btn voice-btn-floating ${listening ? 'listening' : ''} ${busy ? 'busy' : ''}`
|
|
300
|
+
: `titlebar-btn titlebar-icon-btn voice-btn ${listening ? 'listening' : ''} ${busy ? 'busy' : ''}`
|
|
301
|
+
|
|
302
|
+
return (
|
|
303
|
+
<div className="voice-input-wrap">
|
|
304
|
+
<button
|
|
305
|
+
type="button"
|
|
306
|
+
className={btnClass}
|
|
307
|
+
disabled={!ANY_SUPPORTED}
|
|
308
|
+
onPointerDown={handlePointerDown}
|
|
309
|
+
onPointerUp={handlePointerUp}
|
|
310
|
+
onPointerCancel={handlePointerUp}
|
|
311
|
+
onContextMenu={(e) => e.preventDefault()}
|
|
312
|
+
title={title}
|
|
313
|
+
aria-label="Voice typing (hold to talk)"
|
|
314
|
+
>
|
|
315
|
+
<span className="voice-icon" aria-hidden="true">🎤</span>
|
|
316
|
+
<span className={`voice-dot ${listening ? 'on' : busy ? 'busy' : 'off'}`} />
|
|
317
|
+
</button>
|
|
318
|
+
{chip && <div className={`voice-chip ${chip.kind} ${floating ? 'voice-chip-floating' : ''}`}>{chip.text}</div>}
|
|
319
|
+
</div>
|
|
320
|
+
)
|
|
321
|
+
}
|