@1presence/speech 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +129 -0
- package/package.json +40 -0
- package/public/index.html +483 -0
- package/public/styles.css +452 -0
- package/server.mjs +268 -0
package/README.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# @1presence/speech
|
|
2
|
+
|
|
3
|
+
**Free, private speech-to-text for Mac. No cloud. No subscription. No monthly fee.**
|
|
4
|
+
|
|
5
|
+
Dictate into any app on your Mac — terminal, editor, notes, chat, anything — using the speech recognition that is already built into Chrome or Edge. Your voice is processed entirely on your device; nothing leaves your machine.
|
|
6
|
+
|
|
7
|
+
One command to start:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npx @1presence/speech
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Your browser opens automatically. Start speaking.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Why this exists
|
|
18
|
+
|
|
19
|
+
macOS has excellent voice input, but routing speech into a terminal or a specific app is harder than it should be. This tool fills that gap: a clean browser UI where you dictate, review your words, and send them — live, as you speak — into wherever your cursor is sitting.
|
|
20
|
+
|
|
21
|
+
It works with your browser's built-in Web Speech API (Chrome and Edge on Mac have the best recognition quality). No API keys. No account. No sending audio to a third party. Just open it and talk.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Requirements
|
|
26
|
+
|
|
27
|
+
- Node.js 18 or later
|
|
28
|
+
- Chrome or Edge (Web Speech API has the widest support there)
|
|
29
|
+
- macOS (keystroke injection uses AppleScript; the browser UI works on any OS, but the send-to-app feature is Mac-only)
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
### One-off, no install needed
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
npx @1presence/speech
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
The browser opens at `http://127.0.0.1:8787` automatically. Allow microphone access when Chrome asks. Start speaking — words appear in the transcript as you go.
|
|
42
|
+
|
|
43
|
+
### Installed globally
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
npm install -g @1presence/speech
|
|
47
|
+
1presence-speech
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### In a project
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
npm install @1presence/speech
|
|
54
|
+
npx 1presence-speech
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Sending text to apps
|
|
60
|
+
|
|
61
|
+
Three delivery modes — pick from the dropdown:
|
|
62
|
+
|
|
63
|
+
| Mode | How it works | Best for |
|
|
64
|
+
|------|-------------|----------|
|
|
65
|
+
| **Focused app typed keystrokes** | AppleScript `keystroke` — types character by character into the focused window | Most apps, most of the time |
|
|
66
|
+
| **Frontmost app via paste** | Copies to clipboard and sends Cmd+V | Apps that handle paste better than keystroke |
|
|
67
|
+
| **iTerm active session** | iTerm's native `write text` API | iTerm users |
|
|
68
|
+
|
|
69
|
+
**Click "Send to app"** to send the full transcript at any time. Or leave **Live word mode** enabled (the default) to stream words into the focused app as you speak — no need to click between sentences.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## macOS permissions
|
|
74
|
+
|
|
75
|
+
The first time you send text, macOS will block it. Here is how to fix that in under a minute:
|
|
76
|
+
|
|
77
|
+
1. Open **System Settings > Privacy & Security > Accessibility**
|
|
78
|
+
2. Find the app you used to run the command — Terminal, iTerm, Warp, VS Code, Cursor — and enable it
|
|
79
|
+
3. If it is not listed, click **+** and add it from `/Applications`
|
|
80
|
+
4. Fully quit and reopen that app, then run `npx @1presence/speech` again
|
|
81
|
+
|
|
82
|
+
If macOS still blocks input after enabling the app, reset the Accessibility permissions database:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
tccutil reset Accessibility
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Then repeat the steps above.
|
|
89
|
+
|
|
90
|
+
For **iTerm mode**, also open **System Settings > Privacy & Security > Automation** and allow your terminal to control iTerm.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Options
|
|
95
|
+
|
|
96
|
+
| Variable | Default | Description |
|
|
97
|
+
|---|---|---|
|
|
98
|
+
| `PORT` | `8787` | Port the local server listens on |
|
|
99
|
+
| `HOST` | `127.0.0.1` | Host to bind — loopback only by default |
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## How it works
|
|
104
|
+
|
|
105
|
+
A small Node.js HTTP server (zero dependencies) serves the browser UI and exposes a `POST /paste` endpoint. The browser page uses the Web Speech API for recognition — Chrome's built-in engine, running entirely on device. When words are ready, the page posts them to the local server, which uses AppleScript (`osascript`) to deliver them to the focused app. Nothing hits the internet.
|
|
106
|
+
|
|
107
|
+
Live word mode sends words as they are recognised, before Chrome finalises the transcript, so they appear in your app immediately. A phrase-ordering gate in the browser ensures words always arrive in the correct sequence even when Chrome is processing multiple speech segments in parallel.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Part of 1Presence
|
|
112
|
+
|
|
113
|
+
This tool is part of [1Presence](https://1presence.com) — an AI agent that lives in your pocket. It reads your notes and email, remembers your conversations across sessions, connects to the tools you already use, and works from your phone, tablet, or any browser without needing an app install.
|
|
114
|
+
|
|
115
|
+
Speech Terminal is one piece of the productivity toolkit we built for ourselves and decided to share. If you want an AI that truly knows your context — not just the last few messages — [1Presence](https://1presence.com) is worth a look.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Troubleshooting
|
|
120
|
+
|
|
121
|
+
**"Unsupported" error** — Open the page in Chrome or Edge. Safari and Firefox do not support the Web Speech API.
|
|
122
|
+
|
|
123
|
+
**Error 1002** — macOS blocked keystroke injection. Grant Accessibility permission to the terminal app running the server, fully quit and reopen it, then try again.
|
|
124
|
+
|
|
125
|
+
**Speech stops after a few seconds** — Chrome may have revoked microphone access. Click the lock icon in the address bar and set Microphone to Allow.
|
|
126
|
+
|
|
127
|
+
**iTerm does not appear in Automation settings** — Click **Test in 3s** to trigger the macOS permission prompt, then check **System Settings > Privacy & Security > Automation**.
|
|
128
|
+
|
|
129
|
+
**Words arrive out of order or duplicated** — Make sure you are on the latest version: `npx @1presence/speech@latest`. Version 1.0.0 fixed a phrase-ordering bug that could scramble live-word output during fast continuous speech.
|
package/package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@1presence/speech",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Free speech-to-text for Mac — dictate into any app using your browser's built-in voice recognition, no subscription required",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"1presence-speech": "server.mjs"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"server.mjs",
|
|
11
|
+
"public",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"start": "node server.mjs"
|
|
16
|
+
},
|
|
17
|
+
"engines": {
|
|
18
|
+
"node": ">=18"
|
|
19
|
+
},
|
|
20
|
+
"keywords": [
|
|
21
|
+
"speech-to-text",
|
|
22
|
+
"dictation",
|
|
23
|
+
"voice",
|
|
24
|
+
"macos",
|
|
25
|
+
"accessibility",
|
|
26
|
+
"web-speech-api",
|
|
27
|
+
"free",
|
|
28
|
+
"terminal",
|
|
29
|
+
"productivity"
|
|
30
|
+
],
|
|
31
|
+
"license": "MIT",
|
|
32
|
+
"homepage": "https://1presence.com",
|
|
33
|
+
"repository": {
|
|
34
|
+
"type": "git",
|
|
35
|
+
"url": "https://github.com/jonathankata/presence"
|
|
36
|
+
},
|
|
37
|
+
"publishConfig": {
|
|
38
|
+
"access": "public"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
6
|
+
<title>Speech Terminal</title>
|
|
7
|
+
<link rel="stylesheet" href="/styles.css" />
|
|
8
|
+
</head>
|
|
9
|
+
<body>
|
|
10
|
+
<main class="shell">
|
|
11
|
+
<section class="panel">
|
|
12
|
+
<div class="topbar">
|
|
13
|
+
<div>
|
|
14
|
+
<h1>Speech Terminal</h1>
|
|
15
|
+
<p>Dictate in Chrome, review the text, then send it into the focused app.</p>
|
|
16
|
+
</div>
|
|
17
|
+
<div class="status" id="status">Idle</div>
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
<div class="controls">
|
|
21
|
+
<button id="start" type="button">Start</button>
|
|
22
|
+
<button id="stop" type="button" disabled>Stop</button>
|
|
23
|
+
<button id="send" type="button">Send to app</button>
|
|
24
|
+
<button id="test" type="button" class="secondary">Test in 3s</button>
|
|
25
|
+
<button id="diagnosticsButton" type="button" class="secondary">Diagnostics</button>
|
|
26
|
+
<button id="clear" type="button" class="secondary">Clear</button>
|
|
27
|
+
</div>
|
|
28
|
+
|
|
29
|
+
<label class="field">
|
|
30
|
+
<span>Destination</span>
|
|
31
|
+
<select id="destination">
|
|
32
|
+
<option value="type">Focused app typed keystrokes</option>
|
|
33
|
+
<option value="iterm">iTerm active session</option>
|
|
34
|
+
<option value="frontmost">Frontmost app via paste</option>
|
|
35
|
+
</select>
|
|
36
|
+
</label>
|
|
37
|
+
|
|
38
|
+
<label class="toggle">
|
|
39
|
+
<input id="liveWords" type="checkbox" checked />
|
|
40
|
+
<span>Live word mode: send words as they are recognized</span>
|
|
41
|
+
</label>
|
|
42
|
+
|
|
43
|
+
<textarea id="transcript" spellcheck="true" placeholder="Your dictated text will appear here."></textarea>
|
|
44
|
+
|
|
45
|
+
<div class="hint" id="hint">
|
|
46
|
+
Web Speech API works best in Chrome or Edge. The default destination types into whichever app has focus after macOS Accessibility permission is granted.
|
|
47
|
+
</div>
|
|
48
|
+
|
|
49
|
+
<section class="setup" aria-labelledby="setup-title">
|
|
50
|
+
<h2 id="setup-title">macOS Setup</h2>
|
|
51
|
+
<ol>
|
|
52
|
+
<li>Start this bridge from Terminal, iTerm, Warp, or the terminal app you normally use: <code>cd speech && npm start</code>.</li>
|
|
53
|
+
<li>Open this page in Chrome or Edge at <code>http://127.0.0.1:8787</code>.</li>
|
|
54
|
+
<li>When the browser asks for microphone access, allow it. The page tries to start listening automatically; if Chrome blocks auto-start, click <strong>Start</strong>.</li>
|
|
55
|
+
<li>Leave <strong>Destination</strong> set to <strong>Focused app typed keystrokes</strong> for the broadest app support.</li>
|
|
56
|
+
<li>Click <strong>Test in 3s</strong>, focus any app text input within three seconds, and wait for the test text. If macOS asks for Accessibility access, allow it.</li>
|
|
57
|
+
<li>Open <strong>System Settings -> Privacy & Security -> Accessibility</strong>.</li>
|
|
58
|
+
<li>Enable the app that is running <code>npm start</code>: Terminal, iTerm, Warp, Visual Studio Code, Cursor, or whichever app launched the server. If it is missing, click <strong>+</strong> and add it from <code>/Applications</code>.</li>
|
|
59
|
+
<li>If that app is already enabled but keystrokes are still blocked, toggle it off and on, or remove it and add it again.</li>
|
|
60
|
+
<li>For iTerm, also check <strong>System Settings -> Privacy & Security -> Automation</strong> and allow the app running <code>npm start</code> to control iTerm.</li>
|
|
61
|
+
<li>Fully quit and reopen the app running <code>npm start</code>, then restart <code>npm start</code> after granting permission.</li>
|
|
62
|
+
<li>For manual mode, dictate here, edit the transcript, focus the target app, then click <strong>Send to app</strong>.</li>
|
|
63
|
+
<li>For terminal mode, focus the target app, then speak. Speech is sent automatically without returning to this page.</li>
|
|
64
|
+
<li>Short pauses create a new line in the transcript. Longer pauses add a full stop, start the next phrase with a capital letter, and show a blank line.</li>
|
|
65
|
+
<li><strong>Live word mode</strong> is enabled by default. It sends words earlier and optimistically while you speak. Turn it off to send only after the browser marks speech as settled.</li>
|
|
66
|
+
</ol>
|
|
67
|
+
|
|
68
|
+
<div class="callout">
|
|
69
|
+
<strong>If you see error 1002:</strong>
|
|
70
|
+
macOS has not allowed the server app to control keystrokes yet. Grant Accessibility permission to the app running this server, not to Chrome. Then fully quit and reopen that app, restart <code>npm start</code>, refresh this page, and try <strong>Test in 3s</strong> again.
|
|
71
|
+
</div>
|
|
72
|
+
|
|
73
|
+
<div class="callout">
|
|
74
|
+
<strong>If Accessibility still blocks input:</strong>
|
|
75
|
+
run <code>tccutil reset Accessibility</code>, then repeat the Accessibility setup. This resets the macOS approval database for synthetic keystrokes.
|
|
76
|
+
</div>
|
|
77
|
+
|
|
78
|
+
<div class="callout">
|
|
79
|
+
<strong>If iTerm does not appear in Automation:</strong>
|
|
80
|
+
switch Destination to <strong>iTerm active session</strong>, click <strong>Test in 3s</strong> first to trigger the macOS prompt, and allow the request if macOS asks. If it still does not appear, run <code>tccutil reset AppleEvents</code>, restart this server, then click <strong>Test in 3s</strong> again. macOS may list the controller as Terminal, iTerm, Warp, Visual Studio Code, Cursor, or osascript depending on how the server was started.
|
|
81
|
+
</div>
|
|
82
|
+
|
|
83
|
+
<pre id="diagnostics" class="diagnostics" hidden></pre>
|
|
84
|
+
</section>
|
|
85
|
+
</section>
|
|
86
|
+
</main>
|
|
87
|
+
|
|
88
|
+
<script>
|
|
89
|
+
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
|
|
90
|
+
const statusEl = document.getElementById("status");
|
|
91
|
+
const transcriptEl = document.getElementById("transcript");
|
|
92
|
+
const startButton = document.getElementById("start");
|
|
93
|
+
const stopButton = document.getElementById("stop");
|
|
94
|
+
const sendButton = document.getElementById("send");
|
|
95
|
+
const testButton = document.getElementById("test");
|
|
96
|
+
const diagnosticsButton = document.getElementById("diagnosticsButton");
|
|
97
|
+
const diagnosticsEl = document.getElementById("diagnostics");
|
|
98
|
+
const clearButton = document.getElementById("clear");
|
|
99
|
+
const destinationEl = document.getElementById("destination");
|
|
100
|
+
const liveWordsEl = document.getElementById("liveWords");
|
|
101
|
+
|
|
102
|
+
let recognition = null;
|
|
103
|
+
let finalText = "";
|
|
104
|
+
let listening = false;
|
|
105
|
+
let sendQueue = Promise.resolve();
|
|
106
|
+
const resultState = new Map();
|
|
107
|
+
let hasFinalSpeech = false;
|
|
108
|
+
let lastFinalAt = 0;
|
|
109
|
+
const SHORT_PAUSE_MS = 1000;
|
|
110
|
+
const LONG_PAUSE_MS = 2400;
|
|
111
|
+
|
|
112
|
+
function setStatus(text, tone = "") {
|
|
113
|
+
statusEl.textContent = text;
|
|
114
|
+
statusEl.dataset.tone = tone;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function setListening(nextListening) {
|
|
118
|
+
listening = nextListening;
|
|
119
|
+
startButton.hidden = listening;
|
|
120
|
+
startButton.disabled = listening;
|
|
121
|
+
stopButton.disabled = !listening;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function appendFinal(text, state) {
|
|
125
|
+
const normalized = normalizeSpeechText(text);
|
|
126
|
+
if (!normalized) return;
|
|
127
|
+
|
|
128
|
+
applyDisplaySeparator(state);
|
|
129
|
+
finalText = `${finalText}${formatChunkText(normalized, state)}`;
|
|
130
|
+
hasFinalSpeech = true;
|
|
131
|
+
lastFinalAt = Date.now();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async function sendToTerminal(text, pressEnter) {
|
|
135
|
+
const response = await fetch("/paste", {
|
|
136
|
+
method: "POST",
|
|
137
|
+
headers: { "content-type": "application/json" },
|
|
138
|
+
body: JSON.stringify({
|
|
139
|
+
text,
|
|
140
|
+
pressEnter,
|
|
141
|
+
destination: destinationEl.value,
|
|
142
|
+
}),
|
|
143
|
+
});
|
|
144
|
+
const payload = await response.json();
|
|
145
|
+
|
|
146
|
+
if (!payload.ok) {
|
|
147
|
+
throw new Error(payload.error || "Paste failed.");
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function queueAutoSend(text) {
|
|
152
|
+
if (!text) return;
|
|
153
|
+
|
|
154
|
+
sendQueue = sendQueue
|
|
155
|
+
.then(async () => {
|
|
156
|
+
setStatus("Sending", "active");
|
|
157
|
+
await sendToTerminal(text, false);
|
|
158
|
+
setStatus("Listening", "active");
|
|
159
|
+
})
|
|
160
|
+
.catch((error) => {
|
|
161
|
+
setStatus(error.message, "error");
|
|
162
|
+
if (recognition && listening) recognition.stop();
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function normalizeSpeechText(text) {
|
|
167
|
+
return text.replace(/\s+/g, " ").trim();
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function capitalizeFirstWord(text) {
|
|
171
|
+
return text.replace(/[A-Za-z]/, (letter) => letter.toUpperCase());
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function endsWithSentencePunctuation(text) {
|
|
175
|
+
return /[.!?]["')\]]?$/.test(text.trim());
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function getPauseType() {
|
|
179
|
+
if (!hasFinalSpeech || !lastFinalAt) return "none";
|
|
180
|
+
|
|
181
|
+
const pauseMs = Date.now() - lastFinalAt;
|
|
182
|
+
if (pauseMs >= LONG_PAUSE_MS) return "long";
|
|
183
|
+
if (pauseMs >= SHORT_PAUSE_MS) return "short";
|
|
184
|
+
return "none";
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function createResultState() {
|
|
188
|
+
return {
|
|
189
|
+
sentText: "",
|
|
190
|
+
finalizedText: "",
|
|
191
|
+
pauseType: getPauseType(),
|
|
192
|
+
appPrefixSent: false,
|
|
193
|
+
displaySeparatorApplied: false,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function splitSpeechWords(text) {
|
|
198
|
+
const normalized = normalizeSpeechText(text);
|
|
199
|
+
return normalized ? normalized.split(" ") : [];
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function commonPrefixLength(leftWords, rightWords) {
|
|
203
|
+
let index = 0;
|
|
204
|
+
while (index < leftWords.length && index < rightWords.length && leftWords[index] === rightWords[index]) {
|
|
205
|
+
index += 1;
|
|
206
|
+
}
|
|
207
|
+
return index;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function getResultState(resultIndex) {
|
|
211
|
+
if (!resultState.has(resultIndex)) {
|
|
212
|
+
resultState.set(resultIndex, createResultState());
|
|
213
|
+
}
|
|
214
|
+
return resultState.get(resultIndex);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function resetResultState(resultIndex) {
|
|
218
|
+
const nextState = createResultState();
|
|
219
|
+
resultState.set(resultIndex, nextState);
|
|
220
|
+
return nextState;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function formatChunkText(text, state) {
|
|
224
|
+
const normalized = normalizeSpeechText(text);
|
|
225
|
+
if (!normalized) return "";
|
|
226
|
+
return !hasFinalSpeech || state.pauseType === "long" ? capitalizeFirstWord(normalized) : normalized;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function getDisplaySeparator(state, sourceText = finalText) {
|
|
230
|
+
if (!hasFinalSpeech) return "";
|
|
231
|
+
if (state.pauseType === "long") {
|
|
232
|
+
return `${endsWithSentencePunctuation(sourceText) ? "" : "."}\n\n`;
|
|
233
|
+
}
|
|
234
|
+
if (state.pauseType === "short") return "\n";
|
|
235
|
+
return " ";
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function applyDisplaySeparator(state) {
|
|
239
|
+
if (state.displaySeparatorApplied) return;
|
|
240
|
+
const separator = getDisplaySeparator(state);
|
|
241
|
+
|
|
242
|
+
if (separator) {
|
|
243
|
+
finalText = `${finalText.trimEnd()}${separator}`;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
state.displaySeparatorApplied = true;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function getAppPrefix(state) {
|
|
250
|
+
if (!hasFinalSpeech || state.appPrefixSent) return "";
|
|
251
|
+
if (state.pauseType !== "long") return "";
|
|
252
|
+
return endsWithSentencePunctuation(finalText) ? " " : ". ";
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Normalise a word for comparison: lowercase + strip leading/trailing
|
|
256
|
+
// punctuation. Keeps apostrophes so contractions ("it's") match correctly.
|
|
257
|
+
// This lets "sure" match "sure," and "Testing" match "testing".
|
|
258
|
+
function wordKey(w) {
|
|
259
|
+
return w.toLowerCase().replace(/^[^a-z0-9']+|[^a-z0-9']+$/gi, "");
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function getSpeechDelta(previous, next, allowCorrection) {
|
|
263
|
+
const previousWords = splitSpeechWords(previous);
|
|
264
|
+
const nextWords = splitSpeechWords(next);
|
|
265
|
+
|
|
266
|
+
if (!nextWords.length) return "";
|
|
267
|
+
|
|
268
|
+
const prevKeys = previousWords.map(wordKey);
|
|
269
|
+
const nextKeys = nextWords.map(wordKey);
|
|
270
|
+
|
|
271
|
+
const fullPrefixMatches =
|
|
272
|
+
prevKeys.length <= nextKeys.length &&
|
|
273
|
+
prevKeys.every((key, index) => key === nextKeys[index]);
|
|
274
|
+
|
|
275
|
+
if (fullPrefixMatches) {
|
|
276
|
+
return nextWords.slice(previousWords.length).join(" ");
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (!allowCorrection) return "";
|
|
280
|
+
|
|
281
|
+
return nextWords.slice(commonPrefixLength(prevKeys, nextKeys)).join(" ");
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function getReadyInterimText(text) {
|
|
285
|
+
const normalized = normalizeSpeechText(text);
|
|
286
|
+
const boundary = normalized.lastIndexOf(" ");
|
|
287
|
+
|
|
288
|
+
if (boundary === -1) return;
|
|
289
|
+
return normalized.slice(0, boundary);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function queueResultDelta(resultIndex, text, onlyCompleteWords, isFinal) {
|
|
293
|
+
let state = getResultState(resultIndex);
|
|
294
|
+
|
|
295
|
+
if (!isFinal && state.finalizedText) {
|
|
296
|
+
state = resetResultState(resultIndex);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const candidate = onlyCompleteWords ? getReadyInterimText(text) : normalizeSpeechText(text);
|
|
300
|
+
const normalizedCandidate = formatChunkText(candidate || "", state);
|
|
301
|
+
|
|
302
|
+
if (!normalizedCandidate) return;
|
|
303
|
+
if (isFinal && state.finalizedText === normalizedCandidate) return;
|
|
304
|
+
|
|
305
|
+
const delta = getSpeechDelta(state.sentText, normalizedCandidate, isFinal);
|
|
306
|
+
|
|
307
|
+
if (delta) {
|
|
308
|
+
const prefix = getAppPrefix(state);
|
|
309
|
+
state.appPrefixSent = true;
|
|
310
|
+
queueAutoSend(`${prefix}${delta} `);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
state.sentText = normalizedCandidate;
|
|
314
|
+
if (isFinal) {
|
|
315
|
+
state.finalizedText = normalizedCandidate;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function createRecognition() {
|
|
320
|
+
if (!SpeechRecognition) {
|
|
321
|
+
setStatus("Unsupported", "error");
|
|
322
|
+
startButton.disabled = true;
|
|
323
|
+
return null;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const rec = new SpeechRecognition();
|
|
327
|
+
rec.continuous = true;
|
|
328
|
+
rec.interimResults = true;
|
|
329
|
+
rec.lang = navigator.language || "en-US";
|
|
330
|
+
|
|
331
|
+
rec.onstart = () => {
|
|
332
|
+
setListening(true);
|
|
333
|
+
resultState.clear();
|
|
334
|
+
setStatus("Listening", "active");
|
|
335
|
+
};
|
|
336
|
+
|
|
337
|
+
rec.onend = () => {
|
|
338
|
+
setListening(false);
|
|
339
|
+
setStatus("Idle");
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
rec.onerror = (event) => {
|
|
343
|
+
setStatus(event.error || "Speech error", "error");
|
|
344
|
+
setListening(false);
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
rec.onresult = (event) => {
|
|
348
|
+
let displayText = finalText;
|
|
349
|
+
|
|
350
|
+
for (let i = event.resultIndex; i < event.results.length; i += 1) {
|
|
351
|
+
const result = event.results[i];
|
|
352
|
+
const text = result[0].transcript;
|
|
353
|
+
let state = getResultState(i);
|
|
354
|
+
if (!result.isFinal && state.finalizedText) {
|
|
355
|
+
state = resetResultState(i);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (result.isFinal) {
|
|
359
|
+
queueResultDelta(i, text, false, true);
|
|
360
|
+
appendFinal(text, state);
|
|
361
|
+
displayText = finalText;
|
|
362
|
+
} else {
|
|
363
|
+
const formattedInterimText = formatChunkText(text, state);
|
|
364
|
+
if (formattedInterimText) {
|
|
365
|
+
displayText = `${displayText.trimEnd()}${getDisplaySeparator(state, displayText)}${formattedInterimText}`;
|
|
366
|
+
}
|
|
367
|
+
if (liveWordsEl.checked) {
|
|
368
|
+
// Gate: only live-word result i if all earlier results have
|
|
369
|
+
// finalised. Chrome can fire interim events for result N+1
|
|
370
|
+
// before result N finalises; without this guard their words
|
|
371
|
+
// land in the send queue out of order.
|
|
372
|
+
const prevState = i > 0 ? resultState.get(i - 1) : null;
|
|
373
|
+
if (!prevState || prevState.finalizedText) {
|
|
374
|
+
queueResultDelta(i, text, true, false);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
transcriptEl.value = displayText;
|
|
381
|
+
};
|
|
382
|
+
|
|
383
|
+
return rec;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
function startRecognition() {
|
|
387
|
+
recognition = recognition || createRecognition();
|
|
388
|
+
if (!recognition || listening) return;
|
|
389
|
+
|
|
390
|
+
finalText = transcriptEl.value ? transcriptEl.value.trim() : "";
|
|
391
|
+
hasFinalSpeech = Boolean(finalText);
|
|
392
|
+
lastFinalAt = hasFinalSpeech ? Date.now() : 0;
|
|
393
|
+
try {
|
|
394
|
+
recognition.start();
|
|
395
|
+
} catch (error) {
|
|
396
|
+
setListening(false);
|
|
397
|
+
setStatus(error.message || "Click Start", "error");
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
startButton.addEventListener("click", () => {
|
|
402
|
+
startRecognition();
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
stopButton.addEventListener("click", () => {
|
|
406
|
+
if (recognition && listening) recognition.stop();
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
clearButton.addEventListener("click", () => {
|
|
410
|
+
finalText = "";
|
|
411
|
+
resultState.clear();
|
|
412
|
+
hasFinalSpeech = false;
|
|
413
|
+
lastFinalAt = 0;
|
|
414
|
+
transcriptEl.value = "";
|
|
415
|
+
transcriptEl.focus();
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
sendButton.addEventListener("click", async () => {
|
|
419
|
+
const text = transcriptEl.value.trim();
|
|
420
|
+
if (!text) {
|
|
421
|
+
setStatus("No text", "error");
|
|
422
|
+
return;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
sendButton.disabled = true;
|
|
426
|
+
setStatus("Sending", "active");
|
|
427
|
+
|
|
428
|
+
try {
|
|
429
|
+
await sendToTerminal(text, false);
|
|
430
|
+
setStatus("Sent", "success");
|
|
431
|
+
} catch (error) {
|
|
432
|
+
setStatus(error.message, "error");
|
|
433
|
+
} finally {
|
|
434
|
+
sendButton.disabled = false;
|
|
435
|
+
}
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
testButton.addEventListener("click", async () => {
|
|
439
|
+
testButton.disabled = true;
|
|
440
|
+
setStatus("Focus target", "active");
|
|
441
|
+
|
|
442
|
+
try {
|
|
443
|
+
await new Promise((resolve) => setTimeout(resolve, 3000));
|
|
444
|
+
setStatus("Testing", "active");
|
|
445
|
+
await sendToTerminal("speech terminal test ", false);
|
|
446
|
+
setStatus("Test sent", "success");
|
|
447
|
+
} catch (error) {
|
|
448
|
+
setStatus(error.message, "error");
|
|
449
|
+
} finally {
|
|
450
|
+
testButton.disabled = false;
|
|
451
|
+
}
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
diagnosticsButton.addEventListener("click", async () => {
|
|
455
|
+
diagnosticsButton.disabled = true;
|
|
456
|
+
setStatus("Checking", "active");
|
|
457
|
+
|
|
458
|
+
try {
|
|
459
|
+
const response = await fetch("/diagnostics", { method: "POST" });
|
|
460
|
+
const payload = await response.json();
|
|
461
|
+
if (!payload.ok) throw new Error(payload.error || "Diagnostics failed.");
|
|
462
|
+
diagnosticsEl.hidden = false;
|
|
463
|
+
diagnosticsEl.textContent = JSON.stringify(payload.diagnostics, null, 2);
|
|
464
|
+
setStatus("Checked", "success");
|
|
465
|
+
} catch (error) {
|
|
466
|
+
setStatus(error.message, "error");
|
|
467
|
+
} finally {
|
|
468
|
+
diagnosticsButton.disabled = false;
|
|
469
|
+
}
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
if (!SpeechRecognition) {
|
|
473
|
+
setStatus("Unsupported", "error");
|
|
474
|
+
startButton.disabled = true;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
setListening(false);
|
|
478
|
+
window.addEventListener("load", () => {
|
|
479
|
+
startRecognition();
|
|
480
|
+
});
|
|
481
|
+
</script>
|
|
482
|
+
</body>
|
|
483
|
+
</html>
|
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
@import url("https://fonts.googleapis.com/css2?family=Gilda+Display&family=Alegreya:ital,wght@0,400;0,500;0,700;1,400;1,500&family=Jost:wght@300;400;500&display=swap");
|
|
2
|
+
|
|
3
|
+
:root {
|
|
4
|
+
color-scheme: light;
|
|
5
|
+
|
|
6
|
+
--font-display: "Gilda Display", Georgia, serif;
|
|
7
|
+
--font-body: "Alegreya", Georgia, serif;
|
|
8
|
+
--font-ui: "Jost", system-ui, sans-serif;
|
|
9
|
+
--font-mono: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, monospace;
|
|
10
|
+
|
|
11
|
+
--surface: oklch(0.955 0.018 82);
|
|
12
|
+
--surface-raised: oklch(0.910 0.026 76);
|
|
13
|
+
--surface-overlay: oklch(0.930 0.020 80);
|
|
14
|
+
--surface-hover: oklch(0.895 0.030 74);
|
|
15
|
+
--surface-code: oklch(0.900 0.022 78);
|
|
16
|
+
|
|
17
|
+
--text-primary: oklch(0.220 0.030 50);
|
|
18
|
+
--text-secondary: oklch(0.450 0.038 55);
|
|
19
|
+
--text-tertiary: oklch(0.620 0.022 62);
|
|
20
|
+
--text-faint: oklch(0.680 0.014 65);
|
|
21
|
+
|
|
22
|
+
--accent: oklch(0.420 0.082 52);
|
|
23
|
+
--accent-hover: oklch(0.360 0.088 50);
|
|
24
|
+
--text-on-accent: oklch(0.960 0.010 82);
|
|
25
|
+
--accent-soft: oklch(0.920 0.030 56);
|
|
26
|
+
--accent-soft-2: oklch(0.880 0.038 54);
|
|
27
|
+
|
|
28
|
+
--green: oklch(0.45 0.075 145);
|
|
29
|
+
--green-soft: oklch(0.92 0.035 145);
|
|
30
|
+
--amber: oklch(0.55 0.090 75);
|
|
31
|
+
--amber-soft: oklch(0.93 0.045 80);
|
|
32
|
+
--red: oklch(0.50 0.110 28);
|
|
33
|
+
--red-soft: oklch(0.93 0.040 30);
|
|
34
|
+
|
|
35
|
+
--border-subtle: oklch(0.880 0.016 78);
|
|
36
|
+
--border: oklch(0.820 0.020 72);
|
|
37
|
+
--border-strong: oklch(0.720 0.028 65);
|
|
38
|
+
|
|
39
|
+
--space-1: 4px;
|
|
40
|
+
--space-2: 8px;
|
|
41
|
+
--space-3: 12px;
|
|
42
|
+
--space-4: 16px;
|
|
43
|
+
--space-5: 20px;
|
|
44
|
+
--space-6: 24px;
|
|
45
|
+
--space-8: 32px;
|
|
46
|
+
--space-12: 48px;
|
|
47
|
+
|
|
48
|
+
--radius-sm: 6px;
|
|
49
|
+
--radius-md: 8px;
|
|
50
|
+
--radius-full: 9999px;
|
|
51
|
+
|
|
52
|
+
--ease-out-quart: cubic-bezier(0.25, 1, 0.5, 1);
|
|
53
|
+
--shadow-sm: 0 1px 4px oklch(0.22 0.030 50 / 0.07);
|
|
54
|
+
--shadow-md: 0 4px 20px oklch(0.22 0.030 50 / 0.09);
|
|
55
|
+
--shadow-lg: 0 8px 40px oklch(0.22 0.030 50 / 0.12);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
*,
|
|
59
|
+
*::before,
|
|
60
|
+
*::after {
|
|
61
|
+
box-sizing: border-box;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
html {
|
|
65
|
+
min-height: 100%;
|
|
66
|
+
background: var(--surface);
|
|
67
|
+
font-size: 16px;
|
|
68
|
+
-webkit-text-size-adjust: 100%;
|
|
69
|
+
text-size-adjust: 100%;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
body {
|
|
73
|
+
margin: 0;
|
|
74
|
+
min-height: 100vh;
|
|
75
|
+
background:
|
|
76
|
+
radial-gradient(circle at 18% 0%, oklch(0.990 0.020 88 / 0.78), transparent 34rem),
|
|
77
|
+
linear-gradient(180deg, var(--surface), oklch(0.938 0.022 78));
|
|
78
|
+
color: var(--text-primary);
|
|
79
|
+
font-family: var(--font-ui);
|
|
80
|
+
-webkit-font-smoothing: antialiased;
|
|
81
|
+
-moz-osx-font-smoothing: grayscale;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
button,
|
|
85
|
+
select,
|
|
86
|
+
textarea {
|
|
87
|
+
font: inherit;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
button,
|
|
91
|
+
select,
|
|
92
|
+
input,
|
|
93
|
+
textarea {
|
|
94
|
+
color: inherit;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
:focus {
|
|
98
|
+
outline: none;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
:focus-visible {
|
|
102
|
+
outline: 2px solid var(--accent);
|
|
103
|
+
outline-offset: 2px;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
.shell {
|
|
107
|
+
min-height: 100vh;
|
|
108
|
+
display: grid;
|
|
109
|
+
place-items: start center;
|
|
110
|
+
padding: clamp(var(--space-4), 5vw, var(--space-12));
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
.panel {
|
|
114
|
+
width: min(920px, 100%);
|
|
115
|
+
border: 1px solid var(--border-subtle);
|
|
116
|
+
border-radius: var(--radius-md);
|
|
117
|
+
background: color-mix(in oklch, var(--surface) 82%, var(--surface-overlay));
|
|
118
|
+
padding: clamp(var(--space-5), 4vw, var(--space-8));
|
|
119
|
+
box-shadow: var(--shadow-lg);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
.topbar {
|
|
123
|
+
display: flex;
|
|
124
|
+
align-items: flex-start;
|
|
125
|
+
justify-content: space-between;
|
|
126
|
+
gap: var(--space-6);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
h1,
|
|
130
|
+
h2,
|
|
131
|
+
p {
|
|
132
|
+
margin: 0;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
h1 {
|
|
136
|
+
font-family: var(--font-display);
|
|
137
|
+
font-size: clamp(2.15rem, 8vw, 4.75rem);
|
|
138
|
+
font-weight: 400;
|
|
139
|
+
line-height: 0.96;
|
|
140
|
+
letter-spacing: 0;
|
|
141
|
+
color: var(--text-primary);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
p {
|
|
145
|
+
max-width: 58ch;
|
|
146
|
+
margin-top: var(--space-3);
|
|
147
|
+
color: var(--text-secondary);
|
|
148
|
+
font-family: var(--font-body);
|
|
149
|
+
font-size: 1.16rem;
|
|
150
|
+
line-height: 1.55;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
.status {
|
|
154
|
+
flex: 0 0 auto;
|
|
155
|
+
min-width: 112px;
|
|
156
|
+
border: 1px solid var(--border);
|
|
157
|
+
border-radius: var(--radius-full);
|
|
158
|
+
padding: 7px 12px;
|
|
159
|
+
text-align: center;
|
|
160
|
+
color: var(--text-secondary);
|
|
161
|
+
background: var(--surface-overlay);
|
|
162
|
+
font-size: 0.8125rem;
|
|
163
|
+
font-weight: 500;
|
|
164
|
+
box-shadow: var(--shadow-sm);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
.status[data-tone="active"] {
|
|
168
|
+
border-color: var(--accent-soft-2);
|
|
169
|
+
background: var(--accent-soft);
|
|
170
|
+
color: var(--accent);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
.status[data-tone="success"] {
|
|
174
|
+
border-color: color-mix(in oklch, var(--green), var(--border) 58%);
|
|
175
|
+
background: var(--green-soft);
|
|
176
|
+
color: var(--green);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
.status[data-tone="error"] {
|
|
180
|
+
border-color: color-mix(in oklch, var(--red), var(--border) 58%);
|
|
181
|
+
background: var(--red-soft);
|
|
182
|
+
color: var(--red);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
.controls {
|
|
186
|
+
display: flex;
|
|
187
|
+
flex-wrap: wrap;
|
|
188
|
+
gap: var(--space-2);
|
|
189
|
+
margin-top: var(--space-8);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
button {
|
|
193
|
+
min-height: 44px;
|
|
194
|
+
border: 1px solid transparent;
|
|
195
|
+
border-radius: var(--radius-md);
|
|
196
|
+
padding: 0 var(--space-4);
|
|
197
|
+
background: var(--accent);
|
|
198
|
+
color: var(--text-on-accent);
|
|
199
|
+
cursor: pointer;
|
|
200
|
+
font-weight: 500;
|
|
201
|
+
transition:
|
|
202
|
+
background 160ms var(--ease-out-quart),
|
|
203
|
+
border-color 160ms var(--ease-out-quart),
|
|
204
|
+
transform 160ms var(--ease-out-quart),
|
|
205
|
+
opacity 160ms var(--ease-out-quart);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
button:hover:not(:disabled) {
|
|
209
|
+
background: var(--accent-hover);
|
|
210
|
+
transform: translateY(-1px);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
button:active:not(:disabled) {
|
|
214
|
+
transform: translateY(0);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
button:disabled {
|
|
218
|
+
cursor: not-allowed;
|
|
219
|
+
opacity: 0.48;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
button[hidden] {
|
|
223
|
+
display: none;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
button.secondary {
|
|
227
|
+
border-color: var(--border);
|
|
228
|
+
background: var(--surface-overlay);
|
|
229
|
+
color: var(--text-secondary);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
button.secondary:hover:not(:disabled) {
|
|
233
|
+
border-color: var(--border-strong);
|
|
234
|
+
background: var(--surface-hover);
|
|
235
|
+
color: var(--text-primary);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
.field {
|
|
239
|
+
display: flex;
|
|
240
|
+
align-items: center;
|
|
241
|
+
gap: var(--space-3);
|
|
242
|
+
margin-top: var(--space-5);
|
|
243
|
+
color: var(--text-secondary);
|
|
244
|
+
font-size: 0.9375rem;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
.field span {
|
|
248
|
+
font-weight: 500;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
.field select {
|
|
252
|
+
min-height: 40px;
|
|
253
|
+
border: 1px solid var(--border);
|
|
254
|
+
border-radius: var(--radius-md);
|
|
255
|
+
padding: 0 var(--space-3);
|
|
256
|
+
background: var(--surface-overlay);
|
|
257
|
+
color: var(--text-primary);
|
|
258
|
+
box-shadow: var(--shadow-sm);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
.toggle {
|
|
262
|
+
display: inline-flex;
|
|
263
|
+
align-items: center;
|
|
264
|
+
gap: var(--space-2);
|
|
265
|
+
margin-top: var(--space-4);
|
|
266
|
+
margin-right: var(--space-5);
|
|
267
|
+
color: var(--text-secondary);
|
|
268
|
+
font-size: 0.9375rem;
|
|
269
|
+
line-height: 1.4;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
.toggle input {
|
|
273
|
+
width: 18px;
|
|
274
|
+
height: 18px;
|
|
275
|
+
accent-color: var(--accent);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
textarea {
|
|
279
|
+
display: block;
|
|
280
|
+
width: 100%;
|
|
281
|
+
min-height: 280px;
|
|
282
|
+
margin-top: var(--space-5);
|
|
283
|
+
resize: vertical;
|
|
284
|
+
border: 1px solid var(--border);
|
|
285
|
+
border-radius: var(--radius-md);
|
|
286
|
+
padding: var(--space-5);
|
|
287
|
+
background: color-mix(in oklch, var(--surface) 78%, var(--surface-overlay));
|
|
288
|
+
color: var(--text-primary);
|
|
289
|
+
font-family: var(--font-body);
|
|
290
|
+
font-size: 1.125rem;
|
|
291
|
+
line-height: 1.62;
|
|
292
|
+
box-shadow: inset 0 1px 0 oklch(0.985 0.010 82 / 0.42);
|
|
293
|
+
transition:
|
|
294
|
+
border-color 160ms var(--ease-out-quart),
|
|
295
|
+
background 160ms var(--ease-out-quart),
|
|
296
|
+
box-shadow 160ms var(--ease-out-quart);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
textarea::placeholder {
|
|
300
|
+
color: var(--text-faint);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
textarea:focus {
|
|
304
|
+
border-color: var(--border-strong);
|
|
305
|
+
background: var(--surface);
|
|
306
|
+
box-shadow:
|
|
307
|
+
inset 0 1px 0 oklch(0.985 0.010 82 / 0.48),
|
|
308
|
+
0 0 0 3px color-mix(in oklch, var(--accent-soft), transparent 32%);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
.hint {
|
|
312
|
+
max-width: 66ch;
|
|
313
|
+
margin-top: var(--space-4);
|
|
314
|
+
color: var(--text-tertiary);
|
|
315
|
+
font-family: var(--font-body);
|
|
316
|
+
font-size: 1rem;
|
|
317
|
+
line-height: 1.5;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
.setup {
|
|
321
|
+
margin-top: var(--space-8);
|
|
322
|
+
border-top: 1px solid var(--border-subtle);
|
|
323
|
+
padding-top: var(--space-6);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
.setup h2 {
|
|
327
|
+
margin: 0;
|
|
328
|
+
font-family: var(--font-display);
|
|
329
|
+
font-size: 1.75rem;
|
|
330
|
+
font-weight: 400;
|
|
331
|
+
line-height: 1.1;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
.setup ol {
|
|
335
|
+
max-width: 76ch;
|
|
336
|
+
margin: var(--space-4) 0 0;
|
|
337
|
+
padding-left: 1.35rem;
|
|
338
|
+
color: var(--text-secondary);
|
|
339
|
+
font-family: var(--font-body);
|
|
340
|
+
font-size: 1.06rem;
|
|
341
|
+
line-height: 1.58;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
.setup li + li {
|
|
345
|
+
margin-top: var(--space-2);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
strong {
|
|
349
|
+
color: var(--text-primary);
|
|
350
|
+
font-weight: 500;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
code {
|
|
354
|
+
border: 1px solid var(--border-subtle);
|
|
355
|
+
border-radius: var(--radius-sm);
|
|
356
|
+
padding: 1px 6px;
|
|
357
|
+
background: var(--surface-code);
|
|
358
|
+
color: var(--text-primary);
|
|
359
|
+
font-family: var(--font-mono);
|
|
360
|
+
font-size: 0.86em;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
.callout {
|
|
364
|
+
max-width: 76ch;
|
|
365
|
+
margin-top: var(--space-4);
|
|
366
|
+
border: 1px solid var(--border-subtle);
|
|
367
|
+
border-radius: var(--radius-md);
|
|
368
|
+
padding: var(--space-4);
|
|
369
|
+
background: var(--surface-overlay);
|
|
370
|
+
color: var(--text-secondary);
|
|
371
|
+
font-family: var(--font-body);
|
|
372
|
+
font-size: 1.02rem;
|
|
373
|
+
line-height: 1.52;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
.callout strong {
|
|
377
|
+
color: var(--accent);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
.diagnostics {
|
|
381
|
+
max-width: 76ch;
|
|
382
|
+
margin: var(--space-4) 0 0;
|
|
383
|
+
border: 1px solid var(--border-subtle);
|
|
384
|
+
border-radius: var(--radius-md);
|
|
385
|
+
padding: var(--space-4);
|
|
386
|
+
overflow-x: auto;
|
|
387
|
+
background: var(--surface-code);
|
|
388
|
+
color: var(--text-secondary);
|
|
389
|
+
font-family: var(--font-mono);
|
|
390
|
+
font-size: 0.8125rem;
|
|
391
|
+
line-height: 1.5;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
@media (max-width: 640px) {
|
|
395
|
+
.shell {
|
|
396
|
+
padding: var(--space-3);
|
|
397
|
+
place-items: stretch;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
.panel {
|
|
401
|
+
padding: var(--space-5);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
.topbar {
|
|
405
|
+
display: block;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
.status {
|
|
409
|
+
width: fit-content;
|
|
410
|
+
margin-top: var(--space-4);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
.controls {
|
|
414
|
+
margin-top: var(--space-6);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
.controls button {
|
|
418
|
+
flex: 1 1 148px;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
.field {
|
|
422
|
+
align-items: stretch;
|
|
423
|
+
flex-direction: column;
|
|
424
|
+
gap: var(--space-2);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
.field select {
|
|
428
|
+
width: 100%;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
.toggle {
|
|
432
|
+
width: 100%;
|
|
433
|
+
align-items: flex-start;
|
|
434
|
+
margin-right: 0;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
textarea {
|
|
438
|
+
min-height: 220px;
|
|
439
|
+
padding: var(--space-4);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
@media (prefers-reduced-motion: reduce) {
|
|
444
|
+
*,
|
|
445
|
+
*::before,
|
|
446
|
+
*::after {
|
|
447
|
+
animation-duration: 0.01ms !important;
|
|
448
|
+
animation-iteration-count: 1 !important;
|
|
449
|
+
scroll-behavior: auto !important;
|
|
450
|
+
transition-duration: 0.01ms !important;
|
|
451
|
+
}
|
|
452
|
+
}
|
package/server.mjs
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { exec, execFile } from "child_process";
|
|
3
|
+
import { createServer } from "http";
|
|
4
|
+
import { promises as fs } from "fs";
|
|
5
|
+
import { extname, join, normalize } from "path";
|
|
6
|
+
import { fileURLToPath } from "url";
|
|
7
|
+
|
|
8
|
+
const __dirname = fileURLToPath(new URL(".", import.meta.url));
|
|
9
|
+
const publicDir = join(__dirname, "public");
|
|
10
|
+
const host = process.env.HOST || "127.0.0.1";
|
|
11
|
+
const port = Number(process.env.PORT || 8787);
|
|
12
|
+
|
|
13
|
+
const contentTypes = {
|
|
14
|
+
".html": "text/html; charset=utf-8",
|
|
15
|
+
".css": "text/css; charset=utf-8",
|
|
16
|
+
".js": "text/javascript; charset=utf-8",
|
|
17
|
+
".json": "application/json; charset=utf-8",
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
function sendJson(res, status, payload) {
|
|
21
|
+
res.writeHead(status, {
|
|
22
|
+
"content-type": "application/json; charset=utf-8",
|
|
23
|
+
"cache-control": "no-store",
|
|
24
|
+
});
|
|
25
|
+
res.end(JSON.stringify(payload));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function readRequestBody(req) {
|
|
29
|
+
const chunks = [];
|
|
30
|
+
let size = 0;
|
|
31
|
+
|
|
32
|
+
for await (const chunk of req) {
|
|
33
|
+
size += chunk.length;
|
|
34
|
+
if (size > 256_000) {
|
|
35
|
+
throw new Error("Request body is too large.");
|
|
36
|
+
}
|
|
37
|
+
chunks.push(chunk);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return Buffer.concat(chunks).toString("utf8");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function pasteIntoFocusedApp(text, pressEnter) {
|
|
44
|
+
const script = `
|
|
45
|
+
on run argv
|
|
46
|
+
set the clipboard to item 1 of argv
|
|
47
|
+
tell application "System Events"
|
|
48
|
+
keystroke "v" using command down
|
|
49
|
+
if item 2 of argv is "true" then
|
|
50
|
+
key code 36
|
|
51
|
+
end if
|
|
52
|
+
end tell
|
|
53
|
+
end run
|
|
54
|
+
`;
|
|
55
|
+
|
|
56
|
+
return new Promise((resolve, reject) => {
|
|
57
|
+
execFile("osascript", ["-e", script, text, String(Boolean(pressEnter))], (error, stdout, stderr) => {
|
|
58
|
+
if (error) {
|
|
59
|
+
const message = stderr.trim() || error.message;
|
|
60
|
+
if (message.includes("not allowed to send keystrokes") || message.includes("(1002)")) {
|
|
61
|
+
reject(
|
|
62
|
+
new Error(
|
|
63
|
+
"macOS blocked keystrokes. Open System Settings -> Privacy & Security -> Accessibility, then enable the terminal app running this server. Restart the server after granting permission."
|
|
64
|
+
)
|
|
65
|
+
);
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
reject(new Error(message));
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
resolve(stdout);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function typeIntoFocusedApp(text, pressEnter) {
|
|
77
|
+
const script = `
|
|
78
|
+
on run argv
|
|
79
|
+
tell application "System Events"
|
|
80
|
+
keystroke item 1 of argv
|
|
81
|
+
if item 2 of argv is "true" then
|
|
82
|
+
key code 36
|
|
83
|
+
end if
|
|
84
|
+
end tell
|
|
85
|
+
end run
|
|
86
|
+
`;
|
|
87
|
+
|
|
88
|
+
return new Promise((resolve, reject) => {
|
|
89
|
+
execFile("osascript", ["-e", script, text, String(Boolean(pressEnter))], (error, stdout, stderr) => {
|
|
90
|
+
if (error) {
|
|
91
|
+
const message = stderr.trim() || error.message;
|
|
92
|
+
if (message.includes("not allowed to send keystrokes") || message.includes("(1002)")) {
|
|
93
|
+
reject(
|
|
94
|
+
new Error(
|
|
95
|
+
"macOS blocked typed keystrokes. Open System Settings -> Privacy & Security -> Accessibility, then enable the terminal app running this server and restart npm start."
|
|
96
|
+
)
|
|
97
|
+
);
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
reject(new Error(message));
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
resolve(stdout);
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function writeToITerm(text, pressEnter) {
|
|
109
|
+
const script = `
|
|
110
|
+
on run argv
|
|
111
|
+
tell application id "com.googlecode.iterm2"
|
|
112
|
+
if (count of windows) is 0 then
|
|
113
|
+
create window with default profile
|
|
114
|
+
end if
|
|
115
|
+
tell current session of current window
|
|
116
|
+
write text item 1 of argv newline false
|
|
117
|
+
if item 2 of argv is "true" then
|
|
118
|
+
write text ""
|
|
119
|
+
end if
|
|
120
|
+
end tell
|
|
121
|
+
end tell
|
|
122
|
+
end run
|
|
123
|
+
`;
|
|
124
|
+
|
|
125
|
+
return new Promise((resolve, reject) => {
|
|
126
|
+
execFile("osascript", ["-e", script, text, String(Boolean(pressEnter))], (error, stdout, stderr) => {
|
|
127
|
+
if (error) {
|
|
128
|
+
const message = stderr.trim() || error.message;
|
|
129
|
+
if (message.includes("not allowed to send Apple events") || message.includes("Not authorized")) {
|
|
130
|
+
reject(
|
|
131
|
+
new Error(
|
|
132
|
+
"macOS blocked Apple Events to iTerm. Open System Settings -> Privacy & Security -> Automation and allow your terminal app to control iTerm. You may need to restart the server."
|
|
133
|
+
)
|
|
134
|
+
);
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
reject(new Error(message));
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
resolve(stdout);
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function runAppleScript(script, args = []) {
|
|
146
|
+
return new Promise((resolve, reject) => {
|
|
147
|
+
execFile("osascript", ["-e", script, ...args], (error, stdout, stderr) => {
|
|
148
|
+
if (error) {
|
|
149
|
+
reject(new Error(stderr.trim() || error.message));
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
resolve(stdout.trim());
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
async function getDiagnostics() {
|
|
158
|
+
const diagnostics = {
|
|
159
|
+
platform: process.platform,
|
|
160
|
+
node: process.version,
|
|
161
|
+
iTermBundleId: "unknown",
|
|
162
|
+
frontmostApp: "unknown",
|
|
163
|
+
controllingProcess: "osascript via node",
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
try {
|
|
167
|
+
diagnostics.iTermBundleId = await runAppleScript('id of application "iTerm"');
|
|
168
|
+
} catch (error) {
|
|
169
|
+
diagnostics.iTermBundleId = `unavailable: ${error.message}`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
diagnostics.frontmostApp = await runAppleScript(
|
|
174
|
+
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
175
|
+
);
|
|
176
|
+
} catch (error) {
|
|
177
|
+
diagnostics.frontmostApp = `unavailable: ${error.message}`;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return diagnostics;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async function sendToDestination(payload) {
|
|
184
|
+
const destination = payload.destination || "frontmost";
|
|
185
|
+
const text = String(payload.text || "");
|
|
186
|
+
|
|
187
|
+
if (!text) {
|
|
188
|
+
throw new Error("No text to paste.");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (destination === "iterm") {
|
|
192
|
+
await writeToITerm(text, Boolean(payload.pressEnter));
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (destination === "type") {
|
|
197
|
+
await typeIntoFocusedApp(text, Boolean(payload.pressEnter));
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
await pasteIntoFocusedApp(text, Boolean(payload.pressEnter));
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async function serveStatic(req, res) {
|
|
205
|
+
const url = new URL(req.url, `http://${req.headers.host}`);
|
|
206
|
+
const pathname = url.pathname === "/" ? "/index.html" : url.pathname;
|
|
207
|
+
const normalized = normalize(decodeURIComponent(pathname)).replace(/^(\.\.[/\\])+/, "");
|
|
208
|
+
const filePath = join(publicDir, normalized);
|
|
209
|
+
|
|
210
|
+
if (!filePath.startsWith(publicDir)) {
|
|
211
|
+
res.writeHead(403);
|
|
212
|
+
res.end("Forbidden");
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
try {
|
|
217
|
+
const content = await fs.readFile(filePath);
|
|
218
|
+
res.writeHead(200, {
|
|
219
|
+
"content-type": contentTypes[extname(filePath)] || "application/octet-stream",
|
|
220
|
+
"cache-control": "no-store",
|
|
221
|
+
});
|
|
222
|
+
res.end(req.method === "HEAD" ? undefined : content);
|
|
223
|
+
} catch {
|
|
224
|
+
res.writeHead(404);
|
|
225
|
+
res.end("Not found");
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const server = createServer(async (req, res) => {
|
|
230
|
+
try {
|
|
231
|
+
if (req.method === "GET" || req.method === "HEAD") {
|
|
232
|
+
await serveStatic(req, res);
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (req.method === "POST" && req.url === "/paste") {
|
|
237
|
+
const body = await readRequestBody(req);
|
|
238
|
+
const payload = JSON.parse(body || "{}");
|
|
239
|
+
await sendToDestination(payload);
|
|
240
|
+
sendJson(res, 200, { ok: true });
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if (req.method === "POST" && req.url === "/diagnostics") {
|
|
245
|
+
sendJson(res, 200, { ok: true, diagnostics: await getDiagnostics() });
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
sendJson(res, 404, { ok: false, error: "Not found." });
|
|
250
|
+
} catch (error) {
|
|
251
|
+
sendJson(res, 500, { ok: false, error: error.message });
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
server.listen(port, host, () => {
|
|
256
|
+
const url = `http://${host}:${port}`;
|
|
257
|
+
console.log(`Speech Terminal → ${url}`);
|
|
258
|
+
console.log("Web Speech API requires Chrome or Edge. Focus your target app before speaking.");
|
|
259
|
+
|
|
260
|
+
const openCmd =
|
|
261
|
+
process.platform === "win32" ? `start "" "${url}"` :
|
|
262
|
+
process.platform === "darwin" ? `open "${url}"` :
|
|
263
|
+
`xdg-open "${url}"`;
|
|
264
|
+
|
|
265
|
+
exec(openCmd, (err) => {
|
|
266
|
+
if (err) console.log(`Could not open browser automatically. Open ${url} in Chrome or Edge.`);
|
|
267
|
+
});
|
|
268
|
+
});
|