@phenx-inc/ctlsurf 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/electron-vite.config.ts +5 -0
- package/out/headless/index.mjs +2 -1
- package/out/headless/index.mjs.map +2 -2
- package/out/main/index.js +38 -6
- package/out/preload/index.js +3 -0
- package/out/renderer/assets/{cssMode-DkmdBgO7.js → cssMode-DbMmcl1h.js} +3 -3
- package/out/renderer/assets/{freemarker2-CI-gkP-3.js → freemarker2-CvaHiy92.js} +1 -1
- package/out/renderer/assets/{handlebars-D5tEqanR.js → handlebars-D58lUIOu.js} +1 -1
- package/out/renderer/assets/{html-fH93EYfn.js → html-D1h1aJbM.js} +1 -1
- package/out/renderer/assets/{htmlMode-CRicxcwK.js → htmlMode-BdkAp9qr.js} +3 -3
- package/out/renderer/assets/{index-BOOvUI7u.js → index-B60JU1yI.js} +461 -111
- package/out/renderer/assets/{index-ezC-iarf.css → index-DJFYmHjz.css} +89 -0
- package/out/renderer/assets/{javascript-D1Baz4fV.js → javascript-CXqZcnvb.js} +2 -2
- package/out/renderer/assets/{jsonMode-Bquqf3QN.js → jsonMode-BuVr-eSl.js} +3 -3
- package/out/renderer/assets/{liquid-ByOcPjBF.js → liquid-LKu0Wd0B.js} +1 -1
- package/out/renderer/assets/{lspLanguageFeatures-BxPLl0yy.js → lspLanguageFeatures-Cjr_4HGs.js} +1 -1
- package/out/renderer/assets/{mdx-yuNgx0rM.js → mdx-Bl84ILla.js} +1 -1
- package/out/renderer/assets/ort-wasm-simd-threaded.asyncify-DMmc6YqF.wasm +0 -0
- package/out/renderer/assets/{python-2OakgLlA.js → python-0sFd9G1k.js} +1 -1
- package/out/renderer/assets/{razor-DnIVMSwa.js → razor-Cqcu1rLJ.js} +1 -1
- package/out/renderer/assets/transformers.web-DtSCnG36.js +33668 -0
- package/out/renderer/assets/{tsMode-CRIrHuii.js → tsMode-CYd3NUkW.js} +1 -1
- package/out/renderer/assets/{typescript-DJ3C8Yly.js → typescript-rkc9lhpi.js} +1 -1
- package/out/renderer/assets/{xml-CalvD5_C.js → xml-EsHEUps1.js} +1 -1
- package/out/renderer/assets/{yaml-Cgs8pdVp.js → yaml-B9-nQ_s2.js} +1 -1
- package/out/renderer/index.html +2 -2
- package/package.json +2 -1
- package/src/main/index.ts +49 -2
- package/src/preload/index.ts +4 -0
- package/src/renderer/App.tsx +34 -2
- package/src/renderer/components/CtlsurfPanel.tsx +19 -3
- package/src/renderer/components/TerminalPanel.tsx +32 -1
- package/src/renderer/components/VoiceInput.tsx +313 -0
- package/src/renderer/lib/localWhisper.ts +88 -0
- package/src/renderer/styles.css +89 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-
|
|
1
|
+
import { c as createWebWorker, e as editor, U as Uri, a as MarkerTag, M as MarkerSeverity, l as languages, t as typescriptDefaults, R as Range } from "./index-B60JU1yI.js";
|
|
2
2
|
class WorkerManager {
|
|
3
3
|
constructor(_modeId, _defaults) {
|
|
4
4
|
this._modeId = _modeId;
|
package/out/renderer/index.html
CHANGED
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
6
|
<title>ctlsurf-worker</title>
|
|
7
|
-
<script type="module" crossorigin src="./assets/index-
|
|
8
|
-
<link rel="stylesheet" crossorigin href="./assets/index-
|
|
7
|
+
<script type="module" crossorigin src="./assets/index-B60JU1yI.js"></script>
|
|
8
|
+
<link rel="stylesheet" crossorigin href="./assets/index-DJFYmHjz.css">
|
|
9
9
|
</head>
|
|
10
10
|
<body>
|
|
11
11
|
<div id="root"></div>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@phenx-inc/ctlsurf",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Agent-agnostic terminal and desktop app for ctlsurf — run Claude Code, Codex, or any coding agent with live session logging and remote control",
|
|
5
5
|
"main": "out/main/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"node": ">=18"
|
|
44
44
|
},
|
|
45
45
|
"dependencies": {
|
|
46
|
+
"@huggingface/transformers": "^4.2.0",
|
|
46
47
|
"@monaco-editor/react": "^4.7.0",
|
|
47
48
|
"@xterm/addon-fit": "^0.10.0",
|
|
48
49
|
"@xterm/addon-serialize": "^0.14.0",
|
package/src/main/index.ts
CHANGED
|
@@ -132,6 +132,13 @@ function createWindow(): void {
|
|
|
132
132
|
}
|
|
133
133
|
})
|
|
134
134
|
|
|
135
|
+
// Voice typing needs microphone access. Approve permission requests (there
|
|
136
|
+
// was no handler before, so the renderer already had the default-permissive
|
|
137
|
+
// behavior — this just ensures the mic request resolves to "allow").
|
|
138
|
+
mainWindow.webContents.session.setPermissionRequestHandler((_wc, _permission, callback) => {
|
|
139
|
+
callback(true)
|
|
140
|
+
})
|
|
141
|
+
|
|
135
142
|
if (process.env.ELECTRON_RENDERER_URL) {
|
|
136
143
|
mainWindow.loadURL(process.env.ELECTRON_RENDERER_URL)
|
|
137
144
|
} else {
|
|
@@ -300,12 +307,18 @@ ipcMain.handle('worker:createProject', async () => {
|
|
|
300
307
|
}
|
|
301
308
|
})
|
|
302
309
|
|
|
303
|
-
|
|
310
|
+
// Resolve the ctlsurf frontend URL (the origin the embedded webview loads from)
|
|
311
|
+
// from the active profile, applying the dev localhost:8000 → :88 rewrite.
|
|
312
|
+
function getCtlsurfFrontendUrl(): string {
|
|
304
313
|
const profile = orchestrator.getActiveProfile()
|
|
305
314
|
const baseUrl = profile.baseUrl || process.env.CTLSURF_BASE_URL || 'https://app.ctlsurf.com'
|
|
306
|
-
|
|
315
|
+
return baseUrl.includes('localhost:8000')
|
|
307
316
|
? baseUrl.replace(':8000', ':88')
|
|
308
317
|
: baseUrl
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
ipcMain.handle('worker:getWebviewInfo', async () => {
|
|
321
|
+
const frontendUrl = getCtlsurfFrontendUrl()
|
|
309
322
|
|
|
310
323
|
if (!orchestrator.ctlsurfApi.getApiKey()) {
|
|
311
324
|
return { frontendUrl: `${frontendUrl}?embed=1`, authenticated: false }
|
|
@@ -348,6 +361,40 @@ ipcMain.handle('worker:getWebviewInfo', async () => {
|
|
|
348
361
|
}
|
|
349
362
|
})
|
|
350
363
|
|
|
364
|
+
// Given a URL the agent printed in the terminal, return an authenticated embed
|
|
365
|
+
// URL if it points at the ctlsurf frontend, else null (so the caller opens it
|
|
366
|
+
// in the default browser instead).
|
|
367
|
+
ipcMain.handle('worker:getEmbedUrl', async (_event, targetUrl: string) => {
|
|
368
|
+
try {
|
|
369
|
+
if (!targetUrl) return null
|
|
370
|
+
const frontendUrl = getCtlsurfFrontendUrl()
|
|
371
|
+
const profile = orchestrator.getActiveProfile()
|
|
372
|
+
const rawBaseUrl = profile.baseUrl || process.env.CTLSURF_BASE_URL || 'https://app.ctlsurf.com'
|
|
373
|
+
|
|
374
|
+
const target = new URL(targetUrl)
|
|
375
|
+
const front = new URL(frontendUrl)
|
|
376
|
+
|
|
377
|
+
// Treat both the frontend origin and the raw (dev :8000) base as ctlsurf.
|
|
378
|
+
const origins = new Set<string>([front.origin])
|
|
379
|
+
try { origins.add(new URL(rawBaseUrl).origin) } catch { /* ignore */ }
|
|
380
|
+
if (!origins.has(target.origin)) return null
|
|
381
|
+
|
|
382
|
+
// Rebuild on the frontend origin (handles the dev :8000 → :88 case) so the
|
|
383
|
+
// webview loads from the same origin its auth code is valid for.
|
|
384
|
+
const out = new URL(target.pathname + target.search + target.hash, front.origin)
|
|
385
|
+
out.searchParams.set('embed', '1')
|
|
386
|
+
|
|
387
|
+
if (orchestrator.ctlsurfApi.getApiKey()) {
|
|
388
|
+
const { code } = await orchestrator.ctlsurfApi.getAuthCode()
|
|
389
|
+
out.searchParams.set('_code', code)
|
|
390
|
+
}
|
|
391
|
+
return out.toString()
|
|
392
|
+
} catch (err: any) {
|
|
393
|
+
log('[worker] getEmbedUrl failed:', err?.message)
|
|
394
|
+
return null
|
|
395
|
+
}
|
|
396
|
+
})
|
|
397
|
+
|
|
351
398
|
// ─── Profile IPC ──────────────────────────────────
|
|
352
399
|
|
|
353
400
|
ipcMain.handle('profiles:list', () => orchestrator.listProfiles())
|
package/src/preload/index.ts
CHANGED
|
@@ -129,6 +129,10 @@ const api = {
|
|
|
129
129
|
pageUrl?: string;
|
|
130
130
|
authenticated: boolean;
|
|
131
131
|
}> => ipcRenderer.invoke('worker:getWebviewInfo'),
|
|
132
|
+
// Resolve a terminal-printed URL to an authenticated ctlsurf embed URL, or
|
|
133
|
+
// null if it isn't a ctlsurf link (caller opens those externally).
|
|
134
|
+
getEmbedUrl: (targetUrl: string): Promise<string | null> =>
|
|
135
|
+
ipcRenderer.invoke('worker:getEmbedUrl', targetUrl),
|
|
132
136
|
|
|
133
137
|
onWorkerStatus: (callback: (status: string) => void) => {
|
|
134
138
|
const listener = (_event: Electron.IpcRendererEvent, status: string) => callback(status)
|
package/src/renderer/App.tsx
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { useState, useEffect, useCallback, useRef } from 'react'
|
|
2
|
-
import { TerminalPanel, destroyTerminal } from './components/TerminalPanel'
|
|
2
|
+
import { TerminalPanel, destroyTerminal, focusTerminal } from './components/TerminalPanel'
|
|
3
|
+
import { VoiceInput } from './components/VoiceInput'
|
|
3
4
|
import { CtlsurfPanel } from './components/CtlsurfPanel'
|
|
4
5
|
import { EditorPanel } from './components/EditorPanel'
|
|
5
6
|
import { AgentPicker } from './components/AgentPicker'
|
|
@@ -59,6 +60,7 @@ declare global {
|
|
|
59
60
|
getWebviewInfo: () => Promise<{
|
|
60
61
|
frontendUrl: string; pageUrl?: string; authenticated: boolean;
|
|
61
62
|
}>
|
|
63
|
+
getEmbedUrl: (targetUrl: string) => Promise<string | null>
|
|
62
64
|
getWorkerStatus: () => Promise<string>
|
|
63
65
|
getWorkerId: () => Promise<string | null>
|
|
64
66
|
onWorkerStatus: (callback: (status: string) => void) => () => void
|
|
@@ -117,6 +119,11 @@ export default function App() {
|
|
|
117
119
|
const [cwd, setCwd] = useState<string | null>(null)
|
|
118
120
|
const [projectName, setProjectName] = useState<string | null>(null)
|
|
119
121
|
const [updateInfo, setUpdateInfo] = useState<UpdateInfo | null>(null)
|
|
122
|
+
// A ctlsurf link clicked in the terminal: the authenticated URL to load in
|
|
123
|
+
// the ctlsurf panel. `n` bumps on every request so re-clicking the same URL
|
|
124
|
+
// still triggers a navigation.
|
|
125
|
+
const [ctlsurfNav, setCtlsurfNav] = useState<{ url: string; n: number } | null>(null)
|
|
126
|
+
const ctlsurfNavCounter = useRef(0)
|
|
120
127
|
|
|
121
128
|
// Multi-tab state
|
|
122
129
|
const [tabs, setTabs] = useState<TabInfo[]>(() => {
|
|
@@ -200,6 +207,16 @@ export default function App() {
|
|
|
200
207
|
}
|
|
201
208
|
}, [trackingActive])
|
|
202
209
|
|
|
210
|
+
// Voice typing: inject the transcribed text into the active terminal exactly
|
|
211
|
+
// as if it were typed (no auto-submit), then refocus so the user can press
|
|
212
|
+
// Enter to send it.
|
|
213
|
+
const handleVoiceTranscript = useCallback((text: string) => {
|
|
214
|
+
const trimmed = text.trim()
|
|
215
|
+
if (!trimmed) return
|
|
216
|
+
window.worker.writePty(activeTabId, trimmed)
|
|
217
|
+
focusTerminal(activeTabId)
|
|
218
|
+
}, [activeTabId])
|
|
219
|
+
|
|
203
220
|
const cwdRef = useRef<string | null>(null)
|
|
204
221
|
|
|
205
222
|
const handleSpawn = useCallback(async (tabId: string, agent: AgentConfig) => {
|
|
@@ -349,6 +366,20 @@ export default function App() {
|
|
|
349
366
|
return () => window.removeEventListener('keydown', handleKeyDown)
|
|
350
367
|
}, [togglePane])
|
|
351
368
|
|
|
369
|
+
// A ctlsurf link was clicked in the terminal — reveal the ctlsurf pane if
|
|
370
|
+
// hidden and hand the authenticated URL to the panel to load.
|
|
371
|
+
useEffect(() => {
|
|
372
|
+
const onOpen = (e: Event) => {
|
|
373
|
+
const url = (e as CustomEvent).detail?.url as string | undefined
|
|
374
|
+
if (!url) return
|
|
375
|
+
if (!findPaneIds(layout).includes('ctlsurf')) togglePane('ctlsurf')
|
|
376
|
+
ctlsurfNavCounter.current += 1
|
|
377
|
+
setCtlsurfNav({ url, n: ctlsurfNavCounter.current })
|
|
378
|
+
}
|
|
379
|
+
window.addEventListener('ctlsurf-open-url', onOpen)
|
|
380
|
+
return () => window.removeEventListener('ctlsurf-open-url', onOpen)
|
|
381
|
+
}, [layout, togglePane])
|
|
382
|
+
|
|
352
383
|
// Build pane contents (always rendered, layout controls visibility)
|
|
353
384
|
const panes: PaneContent[] = [
|
|
354
385
|
{ id: 'editor', label: 'Editor', content: <EditorPanel cwd={cwd} /> },
|
|
@@ -409,7 +440,7 @@ export default function App() {
|
|
|
409
440
|
</div>
|
|
410
441
|
),
|
|
411
442
|
},
|
|
412
|
-
{ id: 'ctlsurf', label: 'ctlsurf', content: <CtlsurfPanel /> },
|
|
443
|
+
{ id: 'ctlsurf', label: 'ctlsurf', content: <CtlsurfPanel navigate={ctlsurfNav} /> },
|
|
413
444
|
]
|
|
414
445
|
|
|
415
446
|
return (
|
|
@@ -451,6 +482,7 @@ export default function App() {
|
|
|
451
482
|
</svg>
|
|
452
483
|
<span>Tickets</span>
|
|
453
484
|
</button>
|
|
485
|
+
<VoiceInput onTranscript={handleVoiceTranscript} />
|
|
454
486
|
<span className="titlebar-separator" />
|
|
455
487
|
{agents.map(a => {
|
|
456
488
|
const activeTab = tabs.find(t => t.id === activeTabId)
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import { useRef, useEffect, useState } from 'react'
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
interface CtlsurfPanelProps {
|
|
4
|
+
// An authenticated ctlsurf URL to load (e.g. from a terminal link click).
|
|
5
|
+
// `n` bumps per request so repeat navigations to the same URL still apply.
|
|
6
|
+
navigate?: { url: string; n: number } | null
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function CtlsurfPanel({ navigate }: CtlsurfPanelProps) {
|
|
4
10
|
const webviewRef = useRef<HTMLWebViewElement>(null)
|
|
5
11
|
const [url, setUrl] = useState<string | null>(null)
|
|
6
|
-
const [key, setKey] = useState(0) // force remount on cwd change
|
|
12
|
+
const [key, setKey] = useState(0) // force remount on cwd change / navigation
|
|
7
13
|
|
|
8
14
|
const loadUrl = async () => {
|
|
9
15
|
try {
|
|
@@ -15,7 +21,17 @@ export function CtlsurfPanel() {
|
|
|
15
21
|
}
|
|
16
22
|
}
|
|
17
23
|
|
|
18
|
-
|
|
24
|
+
// Default-load the project page on mount, unless a navigation is already
|
|
25
|
+
// queued (e.g. the pane was just revealed to show a clicked link).
|
|
26
|
+
useEffect(() => { if (!navigate?.url) loadUrl() }, [])
|
|
27
|
+
|
|
28
|
+
// Navigate to a specific authenticated URL when requested.
|
|
29
|
+
useEffect(() => {
|
|
30
|
+
if (navigate?.url) {
|
|
31
|
+
setUrl(navigate.url)
|
|
32
|
+
setKey(k => k + 1)
|
|
33
|
+
}
|
|
34
|
+
}, [navigate?.n])
|
|
19
35
|
|
|
20
36
|
// Reload webview when cwd changes
|
|
21
37
|
useEffect(() => {
|
|
@@ -22,6 +22,31 @@ function copySelectionAsEmailTable(terminal: Terminal): void {
|
|
|
22
22
|
)
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
// Open a URL in the system browser, mirroring WebLinksAddon's safe default
|
|
26
|
+
// (clear the opener so the new page can't reach back into this window).
|
|
27
|
+
function openExternal(uri: string): void {
|
|
28
|
+
const win = window.open()
|
|
29
|
+
if (win) {
|
|
30
|
+
try { (win as { opener: unknown }).opener = null } catch { /* ignore */ }
|
|
31
|
+
win.location.href = uri
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Custom handler for links clicked in the terminal. ctlsurf page links are
|
|
36
|
+
// routed into the embedded (authenticated) ctlsurf panel via a window event;
|
|
37
|
+
// everything else opens in the default browser as before.
|
|
38
|
+
function handleTerminalLink(_event: MouseEvent, uri: string): void {
|
|
39
|
+
window.worker.getEmbedUrl(uri)
|
|
40
|
+
.then((embedUrl) => {
|
|
41
|
+
if (embedUrl) {
|
|
42
|
+
window.dispatchEvent(new CustomEvent('ctlsurf-open-url', { detail: { url: embedUrl } }))
|
|
43
|
+
} else {
|
|
44
|
+
openExternal(uri)
|
|
45
|
+
}
|
|
46
|
+
})
|
|
47
|
+
.catch(() => openExternal(uri))
|
|
48
|
+
}
|
|
49
|
+
|
|
25
50
|
interface AgentConfig {
|
|
26
51
|
id: string
|
|
27
52
|
name: string
|
|
@@ -105,7 +130,7 @@ function getOrCreateTerminal(tabId: string, onExit: (tabId: string) => void): {
|
|
|
105
130
|
|
|
106
131
|
const fitAddon = new FitAddon()
|
|
107
132
|
terminal.loadAddon(fitAddon)
|
|
108
|
-
terminal.loadAddon(new WebLinksAddon())
|
|
133
|
+
terminal.loadAddon(new WebLinksAddon(handleTerminalLink))
|
|
109
134
|
|
|
110
135
|
// ⌘/Ctrl+Shift+E → convert the current selection to an email table. Handled
|
|
111
136
|
// here (not as a DOM listener) so it works even while a mouse-tracking TUI
|
|
@@ -160,6 +185,12 @@ function getOrCreateTerminal(tabId: string, onExit: (tabId: string) => void): {
|
|
|
160
185
|
return { terminal, fitAddon }
|
|
161
186
|
}
|
|
162
187
|
|
|
188
|
+
// Return keyboard focus to a tab's terminal (e.g. after inserting voice text
|
|
189
|
+
// so the user can immediately press Enter to submit).
|
|
190
|
+
export function focusTerminal(tabId: string): void {
|
|
191
|
+
_terminals.get(tabId)?.terminal.focus()
|
|
192
|
+
}
|
|
193
|
+
|
|
163
194
|
export function destroyTerminal(tabId: string): void {
|
|
164
195
|
const state = _terminals.get(tabId)
|
|
165
196
|
if (!state) return
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import { useCallback, useEffect, useRef, useState } from 'react'
|
|
2
|
+
import { transcribeBlob, type ModelProgress } from '../lib/localWhisper'
|
|
3
|
+
|
|
4
|
+
// ─── Minimal Web Speech API typings ──────────────────
|
|
5
|
+
// webkitSpeechRecognition isn't in the standard DOM lib, so declare just the
|
|
6
|
+
// surface we use. This API is frequently unavailable inside Electron (Chromium
|
|
7
|
+
// ships without Google's speech backend); when it fails we fall back to a local
|
|
8
|
+
// Whisper model (see ../lib/localWhisper).
|
|
9
|
+
|
|
10
|
+
interface SpeechRecognitionResult {
|
|
11
|
+
isFinal: boolean
|
|
12
|
+
0: { transcript: string }
|
|
13
|
+
}
|
|
14
|
+
interface SpeechRecognitionEvent {
|
|
15
|
+
resultIndex: number
|
|
16
|
+
results: { length: number;[index: number]: SpeechRecognitionResult }
|
|
17
|
+
}
|
|
18
|
+
interface SpeechRecognitionErrorEvent { error: string }
|
|
19
|
+
interface SpeechRecognitionLike {
|
|
20
|
+
lang: string
|
|
21
|
+
continuous: boolean
|
|
22
|
+
interimResults: boolean
|
|
23
|
+
start: () => void
|
|
24
|
+
stop: () => void
|
|
25
|
+
abort: () => void
|
|
26
|
+
onresult: ((e: SpeechRecognitionEvent) => void) | null
|
|
27
|
+
onerror: ((e: SpeechRecognitionErrorEvent) => void) | null
|
|
28
|
+
onend: (() => void) | null
|
|
29
|
+
}
|
|
30
|
+
type SpeechRecognitionCtor = new () => SpeechRecognitionLike
|
|
31
|
+
|
|
32
|
+
function getRecognitionCtor(): SpeechRecognitionCtor | null {
|
|
33
|
+
const w = window as unknown as {
|
|
34
|
+
SpeechRecognition?: SpeechRecognitionCtor
|
|
35
|
+
webkitSpeechRecognition?: SpeechRecognitionCtor
|
|
36
|
+
}
|
|
37
|
+
return w.SpeechRecognition || w.webkitSpeechRecognition || null
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ─── Capabilities & engine selection ─────────────────
|
|
41
|
+
|
|
42
|
+
type Engine = 'web-speech' | 'local'
|
|
43
|
+
type Phase = 'idle' | 'listening' | 'transcribing'
|
|
44
|
+
|
|
45
|
+
const ENGINE_KEY = 'ctlsurf.voiceEngine'
|
|
46
|
+
|
|
47
|
+
const WEB_SPEECH_SUPPORTED = getRecognitionCtor() !== null
|
|
48
|
+
const LOCAL_SUPPORTED =
|
|
49
|
+
typeof navigator !== 'undefined' &&
|
|
50
|
+
!!navigator.mediaDevices?.getUserMedia &&
|
|
51
|
+
typeof MediaRecorder !== 'undefined' &&
|
|
52
|
+
typeof OfflineAudioContext !== 'undefined'
|
|
53
|
+
const ANY_SUPPORTED = WEB_SPEECH_SUPPORTED || LOCAL_SUPPORTED
|
|
54
|
+
|
|
55
|
+
function loadInitialEngine(): Engine {
|
|
56
|
+
if (!WEB_SPEECH_SUPPORTED && LOCAL_SUPPORTED) return 'local'
|
|
57
|
+
try {
|
|
58
|
+
if (localStorage.getItem(ENGINE_KEY) === 'local' && LOCAL_SUPPORTED) return 'local'
|
|
59
|
+
} catch { /* ignore */ }
|
|
60
|
+
return WEB_SPEECH_SUPPORTED ? 'web-speech' : 'local'
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Web Speech errors that mean the engine itself is unreachable (vs. a mic
|
|
64
|
+
// permission/hardware problem, which would also break the local fallback).
|
|
65
|
+
function isEngineUnavailable(code: string): boolean {
|
|
66
|
+
return code === 'network' || code === 'service-not-allowed'
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function describeMicError(err: unknown): string {
|
|
70
|
+
const name = (err as { name?: string })?.name
|
|
71
|
+
if (name === 'NotAllowedError' || name === 'SecurityError') return 'Microphone access denied'
|
|
72
|
+
if (name === 'NotFoundError') return 'No microphone found'
|
|
73
|
+
return 'Could not start microphone'
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
interface VoiceInputProps {
|
|
77
|
+
// Called once per push-to-talk session with the final transcribed text.
|
|
78
|
+
onTranscript: (text: string) => void
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function VoiceInput({ onTranscript }: VoiceInputProps) {
|
|
82
|
+
const [engine, setEngine] = useState<Engine>(loadInitialEngine)
|
|
83
|
+
const [phase, setPhase] = useState<Phase>('idle')
|
|
84
|
+
const [interim, setInterim] = useState('')
|
|
85
|
+
const [modelPct, setModelPct] = useState<number | null>(null)
|
|
86
|
+
const [error, setError] = useState<string | null>(null)
|
|
87
|
+
const [notice, setNotice] = useState<string | null>(null)
|
|
88
|
+
|
|
89
|
+
// Web Speech refs
|
|
90
|
+
const recognitionRef = useRef<SpeechRecognitionLike | null>(null)
|
|
91
|
+
const finalRef = useRef('')
|
|
92
|
+
// Local (Whisper) refs
|
|
93
|
+
const streamRef = useRef<MediaStream | null>(null)
|
|
94
|
+
const recorderRef = useRef<MediaRecorder | null>(null)
|
|
95
|
+
const chunksRef = useRef<Blob[]>([])
|
|
96
|
+
// Set true when the user releases before getUserMedia resolves (quick tap).
|
|
97
|
+
const cancelGestureRef = useRef(false)
|
|
98
|
+
|
|
99
|
+
const engineRef = useRef(engine)
|
|
100
|
+
useEffect(() => { engineRef.current = engine }, [engine])
|
|
101
|
+
|
|
102
|
+
const onTranscriptRef = useRef(onTranscript)
|
|
103
|
+
useEffect(() => { onTranscriptRef.current = onTranscript }, [onTranscript])
|
|
104
|
+
|
|
105
|
+
// Auto-dismiss transient chips.
|
|
106
|
+
useEffect(() => {
|
|
107
|
+
if (!error) return
|
|
108
|
+
const t = setTimeout(() => setError(null), 4500)
|
|
109
|
+
return () => clearTimeout(t)
|
|
110
|
+
}, [error])
|
|
111
|
+
useEffect(() => {
|
|
112
|
+
if (!notice) return
|
|
113
|
+
const t = setTimeout(() => setNotice(null), 5000)
|
|
114
|
+
return () => clearTimeout(t)
|
|
115
|
+
}, [notice])
|
|
116
|
+
|
|
117
|
+
const switchToLocal = useCallback((reason: string) => {
|
|
118
|
+
try { localStorage.setItem(ENGINE_KEY, 'local') } catch { /* ignore */ }
|
|
119
|
+
setEngine('local')
|
|
120
|
+
setNotice(reason)
|
|
121
|
+
}, [])
|
|
122
|
+
|
|
123
|
+
const stopStream = useCallback(() => {
|
|
124
|
+
streamRef.current?.getTracks().forEach((t) => t.stop())
|
|
125
|
+
streamRef.current = null
|
|
126
|
+
}, [])
|
|
127
|
+
|
|
128
|
+
// ─── Web Speech engine ─────────────────────────────
|
|
129
|
+
|
|
130
|
+
const startWebSpeech = useCallback(() => {
|
|
131
|
+
const Ctor = getRecognitionCtor()
|
|
132
|
+
if (!Ctor || recognitionRef.current) return
|
|
133
|
+
setError(null); setNotice(null); setInterim('')
|
|
134
|
+
finalRef.current = ''
|
|
135
|
+
|
|
136
|
+
const rec = new Ctor()
|
|
137
|
+
rec.lang = navigator.language || 'en-US'
|
|
138
|
+
rec.continuous = true
|
|
139
|
+
rec.interimResults = true
|
|
140
|
+
|
|
141
|
+
rec.onresult = (event) => {
|
|
142
|
+
let finalText = ''
|
|
143
|
+
let interimText = ''
|
|
144
|
+
for (let i = 0; i < event.results.length; i++) {
|
|
145
|
+
const res = event.results[i]
|
|
146
|
+
if (res.isFinal) finalText += res[0].transcript
|
|
147
|
+
else interimText += res[0].transcript
|
|
148
|
+
}
|
|
149
|
+
finalRef.current = finalText
|
|
150
|
+
setInterim(interimText)
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
rec.onerror = (event) => {
|
|
154
|
+
if (isEngineUnavailable(event.error) && LOCAL_SUPPORTED) {
|
|
155
|
+
// The streamed audio is gone; switch engines and ask for a retry.
|
|
156
|
+
finalRef.current = ''
|
|
157
|
+
switchToLocal('Voice service unavailable — switched to on-device. Press again.')
|
|
158
|
+
} else if (event.error !== 'no-speech' && event.error !== 'aborted') {
|
|
159
|
+
setError(event.error === 'not-allowed' ? 'Microphone access denied' : `Voice error: ${event.error}`)
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
rec.onend = () => {
|
|
164
|
+
const text = finalRef.current.trim()
|
|
165
|
+
recognitionRef.current = null
|
|
166
|
+
setPhase('idle')
|
|
167
|
+
setInterim('')
|
|
168
|
+
if (text) onTranscriptRef.current(text)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
recognitionRef.current = rec
|
|
172
|
+
try {
|
|
173
|
+
rec.start()
|
|
174
|
+
setPhase('listening')
|
|
175
|
+
} catch (err) {
|
|
176
|
+
recognitionRef.current = null
|
|
177
|
+
setPhase('idle')
|
|
178
|
+
setError('Could not start microphone')
|
|
179
|
+
console.error('[voice] web speech start failed', err)
|
|
180
|
+
}
|
|
181
|
+
}, [switchToLocal])
|
|
182
|
+
|
|
183
|
+
const stopWebSpeech = useCallback(() => {
|
|
184
|
+
try { recognitionRef.current?.stop() } catch { /* already stopped */ }
|
|
185
|
+
}, [])
|
|
186
|
+
|
|
187
|
+
// ─── Local (Whisper) engine ────────────────────────
|
|
188
|
+
|
|
189
|
+
const handleModelProgress = useCallback((p: ModelProgress) => {
|
|
190
|
+
if (p.status === 'progress' && typeof p.progress === 'number') {
|
|
191
|
+
setModelPct(Math.min(100, Math.round(p.progress)))
|
|
192
|
+
}
|
|
193
|
+
}, [])
|
|
194
|
+
|
|
195
|
+
const runLocalTranscription = useCallback(async (rec: MediaRecorder) => {
|
|
196
|
+
stopStream()
|
|
197
|
+
const blob = new Blob(chunksRef.current, { type: rec.mimeType || 'audio/webm' })
|
|
198
|
+
chunksRef.current = []
|
|
199
|
+
recorderRef.current = null
|
|
200
|
+
if (blob.size === 0) { setPhase('idle'); return }
|
|
201
|
+
|
|
202
|
+
setPhase('transcribing')
|
|
203
|
+
setInterim('')
|
|
204
|
+
try {
|
|
205
|
+
const text = await transcribeBlob(blob, handleModelProgress)
|
|
206
|
+
if (text) onTranscriptRef.current(text)
|
|
207
|
+
} catch (err) {
|
|
208
|
+
setError('On-device transcription failed')
|
|
209
|
+
console.error('[voice] local transcription failed', err)
|
|
210
|
+
} finally {
|
|
211
|
+
setPhase('idle')
|
|
212
|
+
setModelPct(null)
|
|
213
|
+
}
|
|
214
|
+
}, [stopStream, handleModelProgress])
|
|
215
|
+
|
|
216
|
+
const startLocal = useCallback(async () => {
|
|
217
|
+
setError(null); setNotice(null); setInterim('')
|
|
218
|
+
cancelGestureRef.current = false
|
|
219
|
+
try {
|
|
220
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
|
221
|
+
// Released during the permission/await — don't record anything.
|
|
222
|
+
if (cancelGestureRef.current) {
|
|
223
|
+
stream.getTracks().forEach((t) => t.stop())
|
|
224
|
+
setPhase('idle')
|
|
225
|
+
return
|
|
226
|
+
}
|
|
227
|
+
streamRef.current = stream
|
|
228
|
+
chunksRef.current = []
|
|
229
|
+
const rec = new MediaRecorder(stream)
|
|
230
|
+
rec.ondataavailable = (e) => { if (e.data.size) chunksRef.current.push(e.data) }
|
|
231
|
+
rec.onstop = () => { void runLocalTranscription(rec) }
|
|
232
|
+
recorderRef.current = rec
|
|
233
|
+
rec.start()
|
|
234
|
+
setPhase('listening')
|
|
235
|
+
} catch (err) {
|
|
236
|
+
stopStream()
|
|
237
|
+
setPhase('idle')
|
|
238
|
+
setError(describeMicError(err))
|
|
239
|
+
console.error('[voice] getUserMedia failed', err)
|
|
240
|
+
}
|
|
241
|
+
}, [runLocalTranscription, stopStream])
|
|
242
|
+
|
|
243
|
+
const stopLocal = useCallback(() => {
|
|
244
|
+
cancelGestureRef.current = true
|
|
245
|
+
const rec = recorderRef.current
|
|
246
|
+
if (rec && rec.state !== 'inactive') {
|
|
247
|
+
try { rec.stop() } catch { /* ignore */ }
|
|
248
|
+
}
|
|
249
|
+
}, [])
|
|
250
|
+
|
|
251
|
+
// ─── Push-to-talk gesture ──────────────────────────
|
|
252
|
+
|
|
253
|
+
const handlePointerDown = (e: React.PointerEvent) => {
|
|
254
|
+
if (!ANY_SUPPORTED || phase !== 'idle') return
|
|
255
|
+
e.preventDefault()
|
|
256
|
+
e.currentTarget.setPointerCapture?.(e.pointerId)
|
|
257
|
+
if (engineRef.current === 'web-speech' && WEB_SPEECH_SUPPORTED) startWebSpeech()
|
|
258
|
+
else if (LOCAL_SUPPORTED) void startLocal()
|
|
259
|
+
}
|
|
260
|
+
const handlePointerUp = (e: React.PointerEvent) => {
|
|
261
|
+
e.currentTarget.releasePointerCapture?.(e.pointerId)
|
|
262
|
+
if (engineRef.current === 'web-speech') stopWebSpeech()
|
|
263
|
+
else stopLocal()
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Clean up on unmount.
|
|
267
|
+
useEffect(() => () => {
|
|
268
|
+
try { recognitionRef.current?.abort() } catch { /* ignore */ }
|
|
269
|
+
try { recorderRef.current?.stop() } catch { /* ignore */ }
|
|
270
|
+
streamRef.current?.getTracks().forEach((t) => t.stop())
|
|
271
|
+
}, [])
|
|
272
|
+
|
|
273
|
+
// ─── Render ────────────────────────────────────────
|
|
274
|
+
|
|
275
|
+
const listening = phase === 'listening'
|
|
276
|
+
const busy = phase === 'transcribing'
|
|
277
|
+
|
|
278
|
+
const title = !ANY_SUPPORTED
|
|
279
|
+
? 'Voice typing not supported in this build'
|
|
280
|
+
: listening
|
|
281
|
+
? 'Listening… release to insert'
|
|
282
|
+
: busy
|
|
283
|
+
? 'Transcribing…'
|
|
284
|
+
: engine === 'local'
|
|
285
|
+
? 'Hold to talk (on-device) — speech is typed into the terminal'
|
|
286
|
+
: 'Hold to talk — speech is typed into the terminal'
|
|
287
|
+
|
|
288
|
+
let chip: { kind: 'listening' | 'busy' | 'notice' | 'error'; text: string } | null = null
|
|
289
|
+
if (error && phase === 'idle') chip = { kind: 'error', text: error }
|
|
290
|
+
else if (notice && phase === 'idle') chip = { kind: 'notice', text: notice }
|
|
291
|
+
else if (listening) chip = { kind: 'listening', text: interim || (engine === 'local' ? 'Recording…' : 'Listening…') }
|
|
292
|
+
else if (busy) chip = { kind: 'busy', text: modelPct !== null ? `Downloading voice model… ${modelPct}%` : 'Transcribing…' }
|
|
293
|
+
|
|
294
|
+
return (
|
|
295
|
+
<div className="voice-input-wrap">
|
|
296
|
+
<button
|
|
297
|
+
type="button"
|
|
298
|
+
className={`titlebar-btn titlebar-icon-btn voice-btn ${listening ? 'listening' : ''} ${busy ? 'busy' : ''}`}
|
|
299
|
+
disabled={!ANY_SUPPORTED}
|
|
300
|
+
onPointerDown={handlePointerDown}
|
|
301
|
+
onPointerUp={handlePointerUp}
|
|
302
|
+
onPointerCancel={handlePointerUp}
|
|
303
|
+
onContextMenu={(e) => e.preventDefault()}
|
|
304
|
+
title={title}
|
|
305
|
+
aria-label="Voice typing (hold to talk)"
|
|
306
|
+
>
|
|
307
|
+
<span className="voice-icon" aria-hidden="true">🎤</span>
|
|
308
|
+
<span className={`voice-dot ${listening ? 'on' : busy ? 'busy' : 'off'}`} />
|
|
309
|
+
</button>
|
|
310
|
+
{chip && <div className={`voice-chip ${chip.kind}`}>{chip.text}</div>}
|
|
311
|
+
</div>
|
|
312
|
+
)
|
|
313
|
+
}
|