compact-agent 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/accessibility.d.ts +50 -0
- package/dist/accessibility.js +0 -0
- package/dist/accessibility.js.map +1 -0
- package/dist/audio.d.ts +50 -0
- package/dist/audio.js +382 -0
- package/dist/audio.js.map +1 -0
- package/dist/config.js +48 -1
- package/dist/config.js.map +1 -1
- package/dist/index.js +473 -5
- package/dist/index.js.map +1 -1
- package/dist/query.js +116 -3
- package/dist/query.js.map +1 -1
- package/dist/status.d.ts +49 -0
- package/dist/status.js +82 -0
- package/dist/status.js.map +1 -0
- package/dist/theme.d.ts +3 -0
- package/dist/theme.js +51 -29
- package/dist/theme.js.map +1 -1
- package/dist/types.d.ts +34 -0
- package/dist/types.js.map +1 -1
- package/dist/voice.d.ts +79 -0
- package/dist/voice.js +351 -0
- package/dist/voice.js.map +1 -0
- package/package.json +10 -3
package/dist/index.js
CHANGED
|
@@ -20,7 +20,7 @@ import { buildCommitPrompt, buildPRPrompt, printDiff, printLog } from './git-wor
|
|
|
20
20
|
import { buildReviewPrompt, buildTDDPrompt, buildSecurityReviewPrompt, runAudit, printAuditReport, buildPlanPrompt, buildE2EPrompt, buildBuildFixPrompt, buildEvalPrompt } from './evaluation.js';
|
|
21
21
|
import { printRules } from './rules.js';
|
|
22
22
|
import { buildOrchestrationPrompt } from './orchestration.js';
|
|
23
|
-
import { printBanner as printThemedBanner,
|
|
23
|
+
import { printBanner as printThemedBanner, theme, sym, formatDuration, installScreenReaderDispatch, uninstallScreenReaderDispatch } from './theme.js';
|
|
24
24
|
import { saveExport } from './export.js';
|
|
25
25
|
// New feature modules
|
|
26
26
|
import { buildVerifyPrompt, saveCheckpoint, listCheckpoints } from './verification.js';
|
|
@@ -55,6 +55,10 @@ import { installEcc, getEccCommandPrompt, loadEccState, eccResourcesAvailable, }
|
|
|
55
55
|
import { buildWalkthroughPrompt } from './walkthrough.js';
|
|
56
56
|
// Stitch (Google's AI UI/UX design tool) — /stitch, /stitch-config, /stitch-tools
|
|
57
57
|
import { buildStitchPrompt, buildStitchToolsPrompt, saveStitchConfig, printStitchStatus, stitchConfigured } from './stitch.js';
|
|
58
|
+
// Voice / accessibility — built-in dictation (Whisper) + readout (ElevenLabs)
|
|
59
|
+
import { printVoiceStatus, isVoiceEnabled, getTtsConfig, getSttConfig, getAccessibilityConfig, speak, dictateOnce, } from './voice.js';
|
|
60
|
+
import { isFfmpegAvailable, audioCue, startRecording } from './audio.js';
|
|
61
|
+
import { applyScreenReader, summarize } from './accessibility.js';
|
|
58
62
|
/**
|
|
59
63
|
* Unified prompt resolver — prefers the bundled ECC prompt for a given
|
|
60
64
|
* intent and falls back to the built-in builder when ECC isn't installed.
|
|
@@ -253,6 +257,18 @@ export function handleSlashCommand(input, config, messages, session, mode) {
|
|
|
253
257
|
// database-migration, add-language-rules) auto-inject when you describe
|
|
254
258
|
// matching work — no slash command needed. Status line below confirms
|
|
255
259
|
// it's enabled.
|
|
260
|
+
console.log(h('\n ── Voice & accessibility ──'));
|
|
261
|
+
console.log(d(' ') + c('/voice') + d(' — show voice config & status (off by default)'));
|
|
262
|
+
console.log(d(' ') + c('/voice on|off') + d(' — master switch for dictation + readout'));
|
|
263
|
+
console.log(d(' ') + c('/voice config') + d(' — quick setup walkthrough'));
|
|
264
|
+
console.log(d(' ') + c('/voice key stt <key>') + d(' — OpenAI key for Whisper dictation'));
|
|
265
|
+
console.log(d(' ') + c('/voice key tts <key>') + d(' — ElevenLabs key for assistant readout'));
|
|
266
|
+
console.log(d(' ') + c('/voice test') + d(' — play a short test utterance'));
|
|
267
|
+
console.log(d(' ') + c('/voice echo|skip-code|speed') + d(' — fine-tune behavior'));
|
|
268
|
+
console.log(d(' ') + c('/dictate [s]') + d(' — one-shot record + transcribe (default 30s)'));
|
|
269
|
+
console.log(d(' ') + c('/accessibility') + d(' — toggle screen-reader mode, audio cues, destructive-confirm'));
|
|
270
|
+
console.log(d(' Playback hotkeys (right block): INS dictate · HOME pause · PGUP replay · DEL skip · END speed+ · PGDN speed–'));
|
|
271
|
+
console.log(d(' Status hotkeys (F-row): F1 what now · F2 where am I · F3 read full · F4 read summary'));
|
|
256
272
|
console.log(h('\n ── Stitch (Google AI UI/UX design) ──'));
|
|
257
273
|
console.log(d(' Use ') + c('/mode design') + d(' or ') + c('/design <task>') + d(' for UI work — the agent uses Stitch automatically.'));
|
|
258
274
|
console.log(d(' ') + c('/stitch') + d(' — show config status'));
|
|
@@ -334,6 +350,15 @@ export function handleSlashCommand(input, config, messages, session, mode) {
|
|
|
334
350
|
mode.current = args;
|
|
335
351
|
const m = MODES[mode.current];
|
|
336
352
|
console.log(chalk.green(` Mode: ${m.label} — ${m.description}`));
|
|
353
|
+
// Accessibility: speak the mode-switch when configured. Doesn't
|
|
354
|
+
// block — fire-and-forget. Errors swallowed (voice should never
|
|
355
|
+
// break the REPL).
|
|
356
|
+
if (isVoiceEnabled(config) && getAccessibilityConfig(config).announceModeSwitches) {
|
|
357
|
+
const tts = getTtsConfig(config);
|
|
358
|
+
if (tts.apiKey) {
|
|
359
|
+
speak(`Mode switched to ${m.label}`, config, { voiceId: tts.assistantVoiceId }).catch(() => { });
|
|
360
|
+
}
|
|
361
|
+
}
|
|
337
362
|
}
|
|
338
363
|
else if (args) {
|
|
339
364
|
console.log(chalk.yellow(` Unknown mode: ${args}`));
|
|
@@ -1074,6 +1099,206 @@ export function handleSlashCommand(input, config, messages, session, mode) {
|
|
|
1074
1099
|
console.log(chalk.dim(' Restart the REPL for the tool to appear in /tools.'));
|
|
1075
1100
|
return { handled: true };
|
|
1076
1101
|
}
|
|
1102
|
+
// ── Voice / accessibility ────────────────────────
|
|
1103
|
+
// /voice — show current voice config + status
|
|
1104
|
+
// /voice on | off — master switch
|
|
1105
|
+
// /voice config — interactive setup (asks for keys)
|
|
1106
|
+
// /voice test — synth a short test utterance to verify TTS
|
|
1107
|
+
// /voice key stt <KEY> — set OpenAI key for Whisper STT only
|
|
1108
|
+
// /voice key tts <KEY> — set ElevenLabs key for TTS only
|
|
1109
|
+
// /voice echo on | off — toggle TTS-echo of user input
|
|
1110
|
+
// /voice skip-code on|off — toggle stripping code blocks from TTS
|
|
1111
|
+
// /voice speed <n> — set 0.5..2.0
|
|
1112
|
+
case '/voice': {
|
|
1113
|
+
const parts = args.trim().split(/\s+/).filter(Boolean);
|
|
1114
|
+
const sub = (parts[0] || '').toLowerCase();
|
|
1115
|
+
if (!sub) {
|
|
1116
|
+
printVoiceStatus(config);
|
|
1117
|
+
return { handled: true };
|
|
1118
|
+
}
|
|
1119
|
+
if (sub === 'on' || sub === 'off') {
|
|
1120
|
+
config.voice = config.voice || {};
|
|
1121
|
+
config.voice.enabled = sub === 'on';
|
|
1122
|
+
saveConfig(config);
|
|
1123
|
+
console.log(chalk.green(` Voice: ${sub === 'on' ? 'ON' : 'OFF'}`));
|
|
1124
|
+
if (sub === 'on') {
|
|
1125
|
+
if (!getTtsConfig(config).apiKey) {
|
|
1126
|
+
console.log(chalk.yellow(' ⚠ No ElevenLabs key set. Run /voice key tts <KEY> to enable readout.'));
|
|
1127
|
+
}
|
|
1128
|
+
if (!getSttConfig(config).apiKey) {
|
|
1129
|
+
console.log(chalk.yellow(' ⚠ No OpenAI key for Whisper. Run /voice key stt <KEY> to enable dictation.'));
|
|
1130
|
+
}
|
|
1131
|
+
isFfmpegAvailable().then((ok) => {
|
|
1132
|
+
if (!ok)
|
|
1133
|
+
console.log(chalk.yellow(' ⚠ ffmpeg not found on PATH. Install ffmpeg: https://ffmpeg.org/'));
|
|
1134
|
+
});
|
|
1135
|
+
}
|
|
1136
|
+
return { handled: true };
|
|
1137
|
+
}
|
|
1138
|
+
if (sub === 'config') {
|
|
1139
|
+
// Lightweight interactive setup deferred to the prompt — user can
|
|
1140
|
+
// also just use `/voice key stt ...` and `/voice key tts ...`.
|
|
1141
|
+
console.log(chalk.cyan('\n /voice config — quick setup'));
|
|
1142
|
+
console.log(chalk.dim(' 1. Get an OpenAI key for Whisper STT: https://platform.openai.com/api-keys'));
|
|
1143
|
+
console.log(chalk.dim(' 2. Get an ElevenLabs key for TTS: https://elevenlabs.io/app/settings/api-keys'));
|
|
1144
|
+
console.log(chalk.dim(' 3. Run: /voice key stt <openai-key>'));
|
|
1145
|
+
console.log(chalk.dim(' /voice key tts <elevenlabs-key>'));
|
|
1146
|
+
console.log(chalk.dim(' /voice on'));
|
|
1147
|
+
console.log(chalk.dim(' 4. Press F1 to dictate, hear assistant readout automatically.'));
|
|
1148
|
+
console.log();
|
|
1149
|
+
return { handled: true };
|
|
1150
|
+
}
|
|
1151
|
+
if (sub === 'key') {
|
|
1152
|
+
const target = (parts[1] || '').toLowerCase();
|
|
1153
|
+
const key = parts.slice(2).join(' ').trim();
|
|
1154
|
+
if ((target !== 'stt' && target !== 'tts') || !key) {
|
|
1155
|
+
console.log(chalk.yellow(' Usage: /voice key stt <openai-key> | /voice key tts <elevenlabs-key>'));
|
|
1156
|
+
return { handled: true };
|
|
1157
|
+
}
|
|
1158
|
+
config.voice = config.voice || {};
|
|
1159
|
+
if (target === 'stt') {
|
|
1160
|
+
config.voice.stt = { ...(config.voice.stt || {}), apiKey: key };
|
|
1161
|
+
console.log(chalk.green(` STT key saved (***${key.slice(-4)}).`));
|
|
1162
|
+
}
|
|
1163
|
+
else {
|
|
1164
|
+
config.voice.tts = { ...(config.voice.tts || {}), apiKey: key };
|
|
1165
|
+
console.log(chalk.green(` TTS key saved (***${key.slice(-4)}).`));
|
|
1166
|
+
}
|
|
1167
|
+
saveConfig(config);
|
|
1168
|
+
return { handled: true };
|
|
1169
|
+
}
|
|
1170
|
+
if (sub === 'test') {
|
|
1171
|
+
const tts = getTtsConfig(config);
|
|
1172
|
+
if (!tts.apiKey) {
|
|
1173
|
+
console.log(chalk.yellow(' No TTS key. Run /voice key tts <elevenlabs-key> first.'));
|
|
1174
|
+
return { handled: true };
|
|
1175
|
+
}
|
|
1176
|
+
console.log(chalk.dim(' Synthesizing test utterance…'));
|
|
1177
|
+
speak('Voice readout is working. This is the assistant voice.', config, { voiceId: tts.assistantVoiceId })
|
|
1178
|
+
.then((ok) => console.log(ok ? chalk.green(' ✓ Played.') : chalk.yellow(' ✗ Playback failed — check ffmpeg.')));
|
|
1179
|
+
return { handled: true };
|
|
1180
|
+
}
|
|
1181
|
+
if (sub === 'echo') {
|
|
1182
|
+
const v = (parts[1] || '').toLowerCase();
|
|
1183
|
+
if (v !== 'on' && v !== 'off') {
|
|
1184
|
+
console.log(chalk.yellow(' Usage: /voice echo on|off'));
|
|
1185
|
+
return { handled: true };
|
|
1186
|
+
}
|
|
1187
|
+
config.voice = config.voice || {};
|
|
1188
|
+
config.voice.tts = { ...(config.voice.tts || {}), echoUser: v === 'on' };
|
|
1189
|
+
saveConfig(config);
|
|
1190
|
+
console.log(chalk.green(` User-echo: ${v.toUpperCase()}`));
|
|
1191
|
+
return { handled: true };
|
|
1192
|
+
}
|
|
1193
|
+
if (sub === 'skip-code') {
|
|
1194
|
+
const v = (parts[1] || '').toLowerCase();
|
|
1195
|
+
if (v !== 'on' && v !== 'off') {
|
|
1196
|
+
console.log(chalk.yellow(' Usage: /voice skip-code on|off'));
|
|
1197
|
+
return { handled: true };
|
|
1198
|
+
}
|
|
1199
|
+
config.voice = config.voice || {};
|
|
1200
|
+
config.voice.tts = { ...(config.voice.tts || {}), skipCode: v === 'on' };
|
|
1201
|
+
saveConfig(config);
|
|
1202
|
+
console.log(chalk.green(` Skip-code: ${v.toUpperCase()}`));
|
|
1203
|
+
return { handled: true };
|
|
1204
|
+
}
|
|
1205
|
+
if (sub === 'speed') {
|
|
1206
|
+
const n = parseFloat(parts[1] || '');
|
|
1207
|
+
if (isNaN(n) || n < 0.25 || n > 4.0) {
|
|
1208
|
+
console.log(chalk.yellow(' Usage: /voice speed <0.25..4.0>'));
|
|
1209
|
+
return { handled: true };
|
|
1210
|
+
}
|
|
1211
|
+
config.voice = config.voice || {};
|
|
1212
|
+
config.voice.tts = { ...(config.voice.tts || {}), speed: n };
|
|
1213
|
+
saveConfig(config);
|
|
1214
|
+
console.log(chalk.green(` TTS speed: ${n}x`));
|
|
1215
|
+
return { handled: true };
|
|
1216
|
+
}
|
|
1217
|
+
console.log(chalk.yellow(` Unknown /voice subcommand: ${sub}`));
|
|
1218
|
+
console.log(chalk.dim(' Try: on, off, config, test, key, echo, skip-code, speed'));
|
|
1219
|
+
return { handled: true };
|
|
1220
|
+
}
|
|
1221
|
+
// /dictate — one-shot push-to-talk WITHOUT the F1 hotkey, useful when a
|
|
1222
|
+
// user is testing the pipeline or running under a terminal that strips
|
|
1223
|
+
// function keys. Records up to 30s, transcribes, injects as next prompt.
|
|
1224
|
+
case '/dictate': {
|
|
1225
|
+
const maxSec = parseInt(args, 10) || 30;
|
|
1226
|
+
console.log(chalk.dim(` /dictate — recording up to ${maxSec}s…`));
|
|
1227
|
+
// Return as an async-injected prompt; we resolve the recording
|
|
1228
|
+
// synchronously here for simplicity (REPL is blocking anyway).
|
|
1229
|
+
return { handled: true, injectPrompt: '__DICTATE__' + maxSec };
|
|
1230
|
+
}
|
|
1231
|
+
// /accessibility — show or toggle the accessibility sub-block
|
|
1232
|
+
// /accessibility — print status
|
|
1233
|
+
// /accessibility screen-reader on|off
|
|
1234
|
+
// /accessibility cues on|off
|
|
1235
|
+
// /accessibility announce-errors on|off
|
|
1236
|
+
// /accessibility announce-modes on|off
|
|
1237
|
+
// /accessibility confirm-destructive on|off
|
|
1238
|
+
// /accessibility long-resp <words>
|
|
1239
|
+
case '/accessibility':
|
|
1240
|
+
case '/a11y': {
|
|
1241
|
+
const parts = args.trim().split(/\s+/).filter(Boolean);
|
|
1242
|
+
const sub = (parts[0] || '').toLowerCase();
|
|
1243
|
+
const v = (parts[1] || '').toLowerCase();
|
|
1244
|
+
if (!sub) {
|
|
1245
|
+
printVoiceStatus(config);
|
|
1246
|
+
return { handled: true };
|
|
1247
|
+
}
|
|
1248
|
+
const setBool = (field, label) => {
|
|
1249
|
+
if (v !== 'on' && v !== 'off') {
|
|
1250
|
+
console.log(chalk.yellow(` Usage: /accessibility ${sub} on|off`));
|
|
1251
|
+
return;
|
|
1252
|
+
}
|
|
1253
|
+
config.voice = config.voice || {};
|
|
1254
|
+
config.voice.accessibility = { ...(config.voice.accessibility || {}), [field]: v === 'on' };
|
|
1255
|
+
saveConfig(config);
|
|
1256
|
+
// Screen-reader mode is special: install/uninstall the stdout filter
|
|
1257
|
+
// immediately so the toggle takes effect for the very next log line.
|
|
1258
|
+
if (field === 'screenReader') {
|
|
1259
|
+
if (v === 'on')
|
|
1260
|
+
installScreenReaderDispatch(applyScreenReader);
|
|
1261
|
+
else
|
|
1262
|
+
uninstallScreenReaderDispatch();
|
|
1263
|
+
}
|
|
1264
|
+
console.log(chalk.green(` ${label}: ${v.toUpperCase()}`));
|
|
1265
|
+
};
|
|
1266
|
+
if (sub === 'screen-reader' || sub === 'screenreader' || sub === 'sr') {
|
|
1267
|
+
setBool('screenReader', 'Screen-reader mode');
|
|
1268
|
+
return { handled: true };
|
|
1269
|
+
}
|
|
1270
|
+
if (sub === 'cues' || sub === 'audio-cues') {
|
|
1271
|
+
setBool('audioCues', 'Audio cues');
|
|
1272
|
+
return { handled: true };
|
|
1273
|
+
}
|
|
1274
|
+
if (sub === 'announce-errors' || sub === 'errors') {
|
|
1275
|
+
setBool('announceErrors', 'Announce errors');
|
|
1276
|
+
return { handled: true };
|
|
1277
|
+
}
|
|
1278
|
+
if (sub === 'announce-modes' || sub === 'modes') {
|
|
1279
|
+
setBool('announceModeSwitches', 'Announce mode switches');
|
|
1280
|
+
return { handled: true };
|
|
1281
|
+
}
|
|
1282
|
+
if (sub === 'confirm-destructive' || sub === 'destructive') {
|
|
1283
|
+
setBool('askBeforeDestructive', 'Ask before destructive');
|
|
1284
|
+
return { handled: true };
|
|
1285
|
+
}
|
|
1286
|
+
if (sub === 'long-resp' || sub === 'threshold') {
|
|
1287
|
+
const n = parseInt(parts[1] || '', 10);
|
|
1288
|
+
if (!n || n < 50) {
|
|
1289
|
+
console.log(chalk.yellow(' Usage: /accessibility long-resp <words≥50>'));
|
|
1290
|
+
return { handled: true };
|
|
1291
|
+
}
|
|
1292
|
+
config.voice = config.voice || {};
|
|
1293
|
+
config.voice.accessibility = { ...(config.voice.accessibility || {}), longResponseThreshold: n };
|
|
1294
|
+
saveConfig(config);
|
|
1295
|
+
console.log(chalk.green(` Long-response threshold: ${n} words`));
|
|
1296
|
+
return { handled: true };
|
|
1297
|
+
}
|
|
1298
|
+
console.log(chalk.yellow(` Unknown /accessibility subcommand: ${sub}`));
|
|
1299
|
+
console.log(chalk.dim(' Try: screen-reader, cues, announce-errors, announce-modes, confirm-destructive, long-resp'));
|
|
1300
|
+
return { handled: true };
|
|
1301
|
+
}
|
|
1077
1302
|
// ── ECC (everything-claude-code) — no slash commands ───
|
|
1078
1303
|
// ECC is bundled, free, auto-installed on first launch, and used
|
|
1079
1304
|
// automatically: built-in commands (/tdd /review /security-review /plan
|
|
@@ -1128,6 +1353,12 @@ async function main() {
|
|
|
1128
1353
|
else {
|
|
1129
1354
|
config = loadConfig();
|
|
1130
1355
|
}
|
|
1356
|
+
// Install the screen-reader output filter if the user's config has it on.
|
|
1357
|
+
// Done as early as possible so every subsequent console.log (banner, hooks,
|
|
1358
|
+
// ECC install report, etc.) gets the filter applied uniformly.
|
|
1359
|
+
if (config.voice?.accessibility?.screenReader) {
|
|
1360
|
+
installScreenReaderDispatch(applyScreenReader);
|
|
1361
|
+
}
|
|
1131
1362
|
// Create session
|
|
1132
1363
|
const mode = { current: 'dev' };
|
|
1133
1364
|
const session = createSession(process.cwd(), config.model, config.provider, mode.current);
|
|
@@ -1141,8 +1372,8 @@ async function main() {
|
|
|
1141
1372
|
// Show startup display based on theme setting
|
|
1142
1373
|
const themeMode = config.theme || 'full';
|
|
1143
1374
|
if (themeMode === 'full') {
|
|
1144
|
-
// Full mode: splash
|
|
1145
|
-
|
|
1375
|
+
// Full mode: banner. ASCII splash removed per user request — both `full`
|
|
1376
|
+
// and `compact` themes now render the same banner block.
|
|
1146
1377
|
printThemedBanner(config.provider, config.model, mode.current, config.permissionMode, session.id, ALL_TOOLS.map((t) => t.name));
|
|
1147
1378
|
}
|
|
1148
1379
|
else if (themeMode === 'compact') {
|
|
@@ -1155,12 +1386,234 @@ async function main() {
|
|
|
1155
1386
|
console.log('');
|
|
1156
1387
|
}
|
|
1157
1388
|
let autoRoute = false;
|
|
1389
|
+
// ── F-key hotkey listener ────────────────────────────────
|
|
1390
|
+
// Voice / accessibility hotkeys.
|
|
1391
|
+
//
|
|
1392
|
+
// The six right-side block keys handle PLAYBACK + DICTATION. They're
|
|
1393
|
+
// mapped here (and not to F-keys) because the right-side block is
|
|
1394
|
+
// tactile-locatable without sight — INS/HOME/PGUP form a tight triangle
|
|
1395
|
+
// above the arrow keys with raised nibs on many keyboards, and DEL/END/
|
|
1396
|
+
// PGDN sit directly below them. F-keys are repurposed for STATUS read-
|
|
1397
|
+
// outs because they're row-aligned (countable by touch).
|
|
1398
|
+
//
|
|
1399
|
+
// Playback / dictation block (right side):
|
|
1400
|
+
// INS push-to-talk dictation (toggle: first press starts, second stops)
|
|
1401
|
+
// HOME pause current TTS playback
|
|
1402
|
+
// PGUP replay last spoken chunk
|
|
1403
|
+
// DEL skip the current chunk
|
|
1404
|
+
// END speed up TTS (× 1.25, capped at 2.0)
|
|
1405
|
+
// PGDN slow down TTS (× 0.8, floored at 0.5)
|
|
1406
|
+
//
|
|
1407
|
+
// Status announcements (F-row, for "what's happening?" while waiting):
|
|
1408
|
+
// F1 current activity + elapsed ("calling claude-sonnet-4, 8s")
|
|
1409
|
+
// F2 where am I — model/provider/mode/permissions
|
|
1410
|
+
// F3 re-speak full last response (bypasses summary)
|
|
1411
|
+
// F4 re-speak summary of last response
|
|
1412
|
+
//
|
|
1413
|
+
// All keys are no-ops when voice is off, so installing the listener
|
|
1414
|
+
// unconditionally is safe and lets the user enable voice mid-session
|
|
1415
|
+
// without restarting.
|
|
1416
|
+
let dictateController = null;
|
|
1417
|
+
let dictateActive = false;
|
|
1418
|
+
// Track aborts + last-spoken text so query.ts can hand them off here.
|
|
1419
|
+
globalThis.__voicePlaybackCtl = null;
|
|
1420
|
+
globalThis.__voiceLastChunk = null;
|
|
1421
|
+
globalThis.__voiceLastFullResponse = null;
|
|
1422
|
+
// emitKeypressEvents lives on the callback-flavor 'node:readline' module
|
|
1423
|
+
// (the promises variant doesn't expose it). Some platforms / terminals
|
|
1424
|
+
// don't deliver the right escape sequences for INS/HOME/etc — failure
|
|
1425
|
+
// here is a silent no-op; users can fall back to /dictate.
|
|
1426
|
+
try {
|
|
1427
|
+
const readlineCb = await import('node:readline');
|
|
1428
|
+
const { describeStatus, describeLocation } = await import('./status.js');
|
|
1429
|
+
readlineCb.emitKeypressEvents(stdin);
|
|
1430
|
+
// Set of keys we intercept. Anything not in this set falls through to
|
|
1431
|
+
// readline so normal typing isn't affected.
|
|
1432
|
+
const INTERCEPT = new Set([
|
|
1433
|
+
'insert', 'home', 'pageup', 'delete', 'end', 'pagedown', // playback
|
|
1434
|
+
'f1', 'f2', 'f3', 'f4', // status
|
|
1435
|
+
]);
|
|
1436
|
+
stdin.on('keypress', (_str, key) => {
|
|
1437
|
+
if (!key)
|
|
1438
|
+
return;
|
|
1439
|
+
const name = String(key.name || '').toLowerCase();
|
|
1440
|
+
if (!INTERCEPT.has(name))
|
|
1441
|
+
return;
|
|
1442
|
+
if (!isVoiceEnabled(config))
|
|
1443
|
+
return;
|
|
1444
|
+
const a = getAccessibilityConfig(config);
|
|
1445
|
+
const tts = getTtsConfig(config);
|
|
1446
|
+
// ── INS: push-to-talk dictation toggle ──────────────
|
|
1447
|
+
if (name === 'insert') {
|
|
1448
|
+
if (dictateActive) {
|
|
1449
|
+
dictateActive = false;
|
|
1450
|
+
const ctl = dictateController;
|
|
1451
|
+
dictateController = null;
|
|
1452
|
+
if (!ctl)
|
|
1453
|
+
return;
|
|
1454
|
+
(async () => {
|
|
1455
|
+
if (a.audioCues)
|
|
1456
|
+
await audioCue('recording-stop');
|
|
1457
|
+
const buf = await ctl.stop();
|
|
1458
|
+
if (!buf) {
|
|
1459
|
+
console.log(chalk.dim(' [INS] no audio captured.'));
|
|
1460
|
+
return;
|
|
1461
|
+
}
|
|
1462
|
+
if (a.audioCues)
|
|
1463
|
+
await audioCue('processing');
|
|
1464
|
+
const { transcribeAudio } = await import('./voice.js');
|
|
1465
|
+
const { setStatus } = await import('./status.js');
|
|
1466
|
+
setStatus({ state: 'transcribing' });
|
|
1467
|
+
const transcript = await transcribeAudio(buf, config, 'wav');
|
|
1468
|
+
setStatus({ state: 'idle' });
|
|
1469
|
+
if (!transcript) {
|
|
1470
|
+
console.log(chalk.dim(' [INS] transcription failed.'));
|
|
1471
|
+
if (a.audioCues)
|
|
1472
|
+
await audioCue('error');
|
|
1473
|
+
return;
|
|
1474
|
+
}
|
|
1475
|
+
if (a.audioCues)
|
|
1476
|
+
await audioCue('done');
|
|
1477
|
+
const stt = getSttConfig(config);
|
|
1478
|
+
stdin.write(transcript);
|
|
1479
|
+
if (stt.autoSubmit)
|
|
1480
|
+
stdin.write('\n');
|
|
1481
|
+
})();
|
|
1482
|
+
}
|
|
1483
|
+
else {
|
|
1484
|
+
(async () => {
|
|
1485
|
+
if (!(await isFfmpegAvailable())) {
|
|
1486
|
+
console.log(chalk.yellow(' [INS] ffmpeg not on PATH. Install ffmpeg to dictate.'));
|
|
1487
|
+
return;
|
|
1488
|
+
}
|
|
1489
|
+
const ctl = await startRecording(60);
|
|
1490
|
+
if (!ctl) {
|
|
1491
|
+
console.log(chalk.yellow(' [INS] could not start mic capture.'));
|
|
1492
|
+
return;
|
|
1493
|
+
}
|
|
1494
|
+
dictateController = ctl;
|
|
1495
|
+
dictateActive = true;
|
|
1496
|
+
const { setStatus } = await import('./status.js');
|
|
1497
|
+
setStatus({ state: 'recording' });
|
|
1498
|
+
if (a.audioCues)
|
|
1499
|
+
await audioCue('recording-start');
|
|
1500
|
+
console.log(chalk.dim(' [INS] recording — press INS again to stop.'));
|
|
1501
|
+
})();
|
|
1502
|
+
}
|
|
1503
|
+
return;
|
|
1504
|
+
}
|
|
1505
|
+
// ── HOME: pause TTS ─────────────────────────────────
|
|
1506
|
+
if (name === 'home') {
|
|
1507
|
+
const g = globalThis;
|
|
1508
|
+
if (g.__voicePlaybackCtl && !g.__voicePlaybackCtl.signal.aborted) {
|
|
1509
|
+
g.__voicePlaybackCtl.abort();
|
|
1510
|
+
console.log(chalk.dim(' [HOME] TTS paused.'));
|
|
1511
|
+
}
|
|
1512
|
+
return;
|
|
1513
|
+
}
|
|
1514
|
+
// ── PGUP: replay last chunk ─────────────────────────
|
|
1515
|
+
if (name === 'pageup') {
|
|
1516
|
+
const g = globalThis;
|
|
1517
|
+
const chunk = g.__voiceLastChunk;
|
|
1518
|
+
if (!chunk) {
|
|
1519
|
+
console.log(chalk.dim(' [PGUP] nothing to replay.'));
|
|
1520
|
+
return;
|
|
1521
|
+
}
|
|
1522
|
+
if (!tts.apiKey)
|
|
1523
|
+
return;
|
|
1524
|
+
(async () => { await speak(chunk, config, { voiceId: tts.assistantVoiceId }); })();
|
|
1525
|
+
return;
|
|
1526
|
+
}
|
|
1527
|
+
// ── DEL: skip current chunk ─────────────────────────
|
|
1528
|
+
if (name === 'delete') {
|
|
1529
|
+
const g = globalThis;
|
|
1530
|
+
if (g.__voicePlaybackCtl)
|
|
1531
|
+
g.__voicePlaybackCtl.abort();
|
|
1532
|
+
console.log(chalk.dim(' [DEL] TTS skipped.'));
|
|
1533
|
+
return;
|
|
1534
|
+
}
|
|
1535
|
+
// ── END / PGDN: TTS speed ± ───────────────────────
|
|
1536
|
+
if (name === 'end' || name === 'pagedown') {
|
|
1537
|
+
config.voice = config.voice || {};
|
|
1538
|
+
const ttsCfg = config.voice.tts = { ...(config.voice.tts || {}) };
|
|
1539
|
+
const cur = ttsCfg.speed ?? 1.0;
|
|
1540
|
+
const next = name === 'end' ? Math.min(2.0, cur * 1.25) : Math.max(0.5, cur * 0.8);
|
|
1541
|
+
ttsCfg.speed = Math.round(next * 100) / 100;
|
|
1542
|
+
saveConfig(config);
|
|
1543
|
+
console.log(chalk.dim(` [${name === 'end' ? 'END' : 'PGDN'}] TTS speed: ${ttsCfg.speed}x`));
|
|
1544
|
+
return;
|
|
1545
|
+
}
|
|
1546
|
+
// ── F1: "what's happening?" — current activity + elapsed ───
|
|
1547
|
+
if (name === 'f1') {
|
|
1548
|
+
const msg = describeStatus();
|
|
1549
|
+
console.log(chalk.dim(` [F1] ${msg}`));
|
|
1550
|
+
if (tts.apiKey) {
|
|
1551
|
+
speak(msg, config, { voiceId: tts.assistantVoiceId }).catch(() => { });
|
|
1552
|
+
}
|
|
1553
|
+
return;
|
|
1554
|
+
}
|
|
1555
|
+
// ── F2: "where am I?" — model/provider/mode/permissions ────
|
|
1556
|
+
if (name === 'f2') {
|
|
1557
|
+
const msg = describeLocation();
|
|
1558
|
+
console.log(chalk.dim(` [F2] ${msg}`));
|
|
1559
|
+
if (tts.apiKey) {
|
|
1560
|
+
speak(msg, config, { voiceId: tts.assistantVoiceId }).catch(() => { });
|
|
1561
|
+
}
|
|
1562
|
+
return;
|
|
1563
|
+
}
|
|
1564
|
+
// ── F3: re-speak FULL last response ────────────────
|
|
1565
|
+
if (name === 'f3') {
|
|
1566
|
+
const g = globalThis;
|
|
1567
|
+
const text = g.__voiceLastFullResponse;
|
|
1568
|
+
if (!text) {
|
|
1569
|
+
console.log(chalk.dim(' [F3] nothing to read.'));
|
|
1570
|
+
return;
|
|
1571
|
+
}
|
|
1572
|
+
if (!tts.apiKey)
|
|
1573
|
+
return;
|
|
1574
|
+
(async () => {
|
|
1575
|
+
const { speakAssistantResponse } = await import('./voice.js');
|
|
1576
|
+
const ctl = new AbortController();
|
|
1577
|
+
globalThis.__voicePlaybackCtl = ctl;
|
|
1578
|
+
await speakAssistantResponse(text, config, ctl.signal);
|
|
1579
|
+
})();
|
|
1580
|
+
return;
|
|
1581
|
+
}
|
|
1582
|
+
// ── F4: re-speak SUMMARY of last response ──────────
|
|
1583
|
+
if (name === 'f4') {
|
|
1584
|
+
const g = globalThis;
|
|
1585
|
+
const text = g.__voiceLastFullResponse;
|
|
1586
|
+
if (!text) {
|
|
1587
|
+
console.log(chalk.dim(' [F4] nothing to summarize.'));
|
|
1588
|
+
return;
|
|
1589
|
+
}
|
|
1590
|
+
if (!tts.apiKey)
|
|
1591
|
+
return;
|
|
1592
|
+
const summary = summarize(text, a.longResponseThreshold);
|
|
1593
|
+
(async () => {
|
|
1594
|
+
const ctl = new AbortController();
|
|
1595
|
+
globalThis.__voicePlaybackCtl = ctl;
|
|
1596
|
+
await speak(summary, config, { voiceId: tts.assistantVoiceId, signal: ctl.signal });
|
|
1597
|
+
})();
|
|
1598
|
+
return;
|
|
1599
|
+
}
|
|
1600
|
+
});
|
|
1601
|
+
}
|
|
1602
|
+
catch {
|
|
1603
|
+
// No keypress support — accessibility users can still use /dictate.
|
|
1604
|
+
}
|
|
1605
|
+
// Session-start anchor — used by the [Nm Ns] tag prepended to every prompt
|
|
1606
|
+
// so the user can see at a glance how long the REPL has been open. Combined
|
|
1607
|
+
// with the per-chain timer printed after each model response (see runQuery),
|
|
1608
|
+
// gives both "how long am I here" and "how long was that last response."
|
|
1609
|
+
const sessionStartMs = new Date(session.createdAt).getTime();
|
|
1158
1610
|
// Main REPL loop
|
|
1159
1611
|
while (true) {
|
|
1160
1612
|
let input;
|
|
1161
1613
|
try {
|
|
1614
|
+
const sessionTag = theme.dim(`[${formatDuration(Date.now() - sessionStartMs)}] `);
|
|
1162
1615
|
const modeTag = mode.current !== 'dev' ? theme.dim(`[${mode.current}] `) : '';
|
|
1163
|
-
input = await rl.question(modeTag + theme.prompt(`${sym.prompt} `));
|
|
1616
|
+
input = await rl.question(sessionTag + modeTag + theme.prompt(`${sym.prompt} `));
|
|
1164
1617
|
}
|
|
1165
1618
|
catch {
|
|
1166
1619
|
break;
|
|
@@ -1205,7 +1658,22 @@ async function main() {
|
|
|
1205
1658
|
}
|
|
1206
1659
|
// Some commands inject a prompt into the conversation (e.g. /commit, /review, /tdd)
|
|
1207
1660
|
if (result.injectPrompt) {
|
|
1208
|
-
|
|
1661
|
+
// Special-case the /dictate flow: synthesize the prompt from the mic
|
|
1662
|
+
// before pushing it as a user message. We use the sentinel
|
|
1663
|
+
// "__DICTATE__<seconds>" so the slash handler stays purely sync.
|
|
1664
|
+
if (result.injectPrompt.startsWith('__DICTATE__')) {
|
|
1665
|
+
const maxSec = parseInt(result.injectPrompt.slice('__DICTATE__'.length), 10) || 30;
|
|
1666
|
+
const transcript = await dictateOnce(config, maxSec);
|
|
1667
|
+
if (!transcript) {
|
|
1668
|
+
console.log(chalk.dim(' [dictate] no transcript captured.'));
|
|
1669
|
+
continue;
|
|
1670
|
+
}
|
|
1671
|
+
console.log(theme.dim(' [dictate] ') + chalk.white(transcript));
|
|
1672
|
+
messages.push({ role: 'user', content: transcript });
|
|
1673
|
+
}
|
|
1674
|
+
else {
|
|
1675
|
+
messages.push({ role: 'user', content: result.injectPrompt });
|
|
1676
|
+
}
|
|
1209
1677
|
await runQuery({ config, messages, cwd: process.cwd(), rl, sessionId: session.id, mode: mode.current });
|
|
1210
1678
|
await autoSave(session, messages);
|
|
1211
1679
|
continue;
|