compact-agent 1.25.0 → 1.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -25,7 +25,7 @@ First run prompts you for a provider, key, model, and permission mode. After tha
25
25
  - Parallel agent swarm: `/swarm <agent,agent,...> <task>` fans out N specialized ECC agents against the same prompt and prints attributed results.
26
26
  - Bundled [everything-claude-code](https://github.com/Crownelius/everything-claude-code): 228 skills, 60 agents, 75 workflow commands, 19 language rule bundles. Auto-installed on first launch; refresh with `/ecc-install`.
27
27
  - 9 modes (`/mode <name>`): `dev`, `review`, `tdd`, `research`, `plan`, `debug`, `architect`, `hermes`, `design`. Each rewrites the system-prompt addendum.
28
- - Optional voice: Whisper dictation (push-to-talk `F5`) and ElevenLabs TTS readout. Screen-reader mode for blind / low-vision users. All off by default — opt in with `/voice on`.
28
+ - Optional voice + accessibility: Whisper dictation, ElevenLabs TTS readout, and a 19-binding F-row hotkey scheme designed for blind / low-vision users (see [Accessibility](#accessibility)). Off by default — opt in with `/voice on`.
29
29
  - Zero telemetry. The only outbound traffic is to your chosen LLM provider when you send a turn.
30
30
 
31
31
  ---
@@ -160,6 +160,27 @@ No analytics SDKs, no crash reporting, no auto-update beacon. `rm -rf ~/.compact
160
160
 
161
161
  ---
162
162
 
163
+ ## Accessibility
164
+
165
+ Built with blind / low-vision users in mind. `/accessibility screenReader on` strips ANSI colors and replaces Unicode glyphs with words so NVDA, JAWS, Narrator, Orca, and VoiceOver can read output cleanly. A 19-binding F-row hotkey scheme covers everything you'd otherwise have to scroll for:
166
+
167
+ | Key | Function |
168
+ | :--- | :--- |
169
+ | F1–F4 | Status: what's happening · where am I · re-read full last response · re-read summary |
170
+ | F5–F10 | Dictation + TTS playback: F5 push-to-talk · F6 pause · F7 replay · F8 skip · F9/F10 speed ± |
171
+ | **F11 / F12** | Read current input buffer · read your previous submitted turn |
172
+ | **Shift+F1–F4** | Queued input · key-pool health · last tool-call · toggle screen-reader |
173
+ | **Shift+F5 / F6** | Soft-cancel current turn · panic-stop TTS (5s suppression window) |
174
+ | **Shift+F12** | Read the hotkey list aloud (discoverability without sighted help) |
175
+
176
+ Every binding prints to stdout first, then layers TTS on top only if an ElevenLabs key is configured — so users running compact-agent alongside their OS-level screen reader get the announcements without paying for TTS.
177
+
178
+ Key choice rationale: bare F-keys and Shift+F-keys are the only space that's both screen-reader-safe (no Insert / CapsLock / Ctrl+Option modifier collisions with NVDA, JAWS, Narrator, Orca, or VoiceOver) and terminal-safe (no `readline` collisions). F11 and F12 specifically are browser-reserved keys that terminals don't grab.
179
+
180
+ Voice setup: `/voice config` saves API keys, `/voice on` enables, `/voice test` confirms playback. ffmpeg is required for dictation but optional for TTS-only setups. Speed, voice IDs, and code-skipping behavior are all in `/voice` sub-commands.
181
+
182
+ ---
183
+
163
184
  ## From source
164
185
 
165
186
  ```bash
package/dist/index.js CHANGED
@@ -107,6 +107,37 @@ async function setupWizard(rl) {
107
107
  const modelInput = await rl.question(chalk.yellow(` Model [${provider.defaultModel}]: `));
108
108
  if (modelInput.trim())
109
109
  model = modelInput.trim();
110
+ // Warn on known-flaky experimental models. These are free / preview
111
+ // models on OpenRouter (and similar gateways) that frequently return
112
+ // empty responses, the literal string "ERROR", or hang past 30s. The
113
+ // auto-fallback in runQuery tries to recover but on a free-tier key
114
+ // the fallback may also be unreachable — so a user picking one of
115
+ // these by name ends up staring at the live-queue box wondering
116
+ // what's broken. Flag it now while we can still talk them out of it.
117
+ //
118
+ // Heuristic-only — we don't block. The matched substrings cover
119
+ // the cases that have shown up in user reports without false-positiving
120
+ // on legitimate model names that happen to share a prefix.
121
+ const flakyPatterns = [
122
+ 'owl-alpha', // perpetual-experimental, returns "ERROR"
123
+ 'horizon-alpha',
124
+ 'horizon-beta',
125
+ 'optimus-alpha',
126
+ 'quasar-alpha',
127
+ ];
128
+ const lowerModel = model.toLowerCase();
129
+ if (flakyPatterns.some((p) => lowerModel.includes(p))) {
130
+ console.log('');
131
+ console.log(chalk.yellow(` ⚠ "${model}" is an experimental / free model that's been reported to return`));
132
+ console.log(chalk.yellow(` empty or "ERROR" responses. Compact Agent's auto-fallback will try to`));
133
+ console.log(chalk.yellow(` recover, but on a free-tier API key the fallback may not be reachable.`));
134
+ console.log(chalk.dim(' More reliable free options on OpenRouter:'));
135
+ console.log(chalk.dim(' meta-llama/llama-3.3-70b-instruct:free'));
136
+ console.log(chalk.dim(' google/gemini-2.0-flash-exp:free'));
137
+ console.log(chalk.dim(' deepseek/deepseek-chat:free'));
138
+ console.log(chalk.dim(' You can change this any time with /model <id> in the REPL.'));
139
+ console.log('');
140
+ }
110
141
  console.log(chalk.white('\n Permission modes:'));
111
142
  console.log(chalk.dim(' 1. ask — prompt before writes/commands (safest)'));
112
143
  console.log(chalk.dim(' 2. auto — auto-approve reads, ask for destructive'));
@@ -319,6 +350,8 @@ export function handleSlashCommand(input, config, messages, session, mode) {
319
350
  console.log(d(' ') + c('/accessibility') + d(' — toggle screen-reader mode, audio cues, destructive-confirm'));
320
351
  console.log(d(' Status hotkeys: F1 what now · F2 where am I · F3 read full · F4 read summary'));
321
352
  console.log(d(' Playback hotkeys: F5 dictate · F6 pause · F7 replay · F8 skip · F9 speed+ · F10 speed–'));
353
+ console.log(d(' Read hotkeys: F11 input buffer · F12 your last turn'));
354
+ console.log(d(' Shift+Fn: Shift+F1 queued · F2 key pool · F3 last tool · F4 toggle SR · F5 cancel · F6 panic · F12 hotkey list'));
322
355
  console.log(h('\n ── Stitch (Google AI UI/UX design) ──'));
323
356
  console.log(d(' Use ') + c('/mode design') + d(' or ') + c('/design <task>') + d(' for UI work — the agent uses Stitch automatically.'));
324
357
  console.log(d(' ') + c('/stitch') + d(' — show config status'));
@@ -2128,36 +2161,218 @@ async function main() {
2128
2161
  const { describeStatus, describeLocation } = await import('./status.js');
2129
2162
  readlineCb.emitKeypressEvents(stdin);
2130
2163
  // Set of keys we intercept. Anything not in this set falls through to
2131
- // readline so normal typing isn't affected. All bare F-keys; no
2132
- // modifiers needed, no screen-reader conflicts.
2164
+ // readline so normal typing isn't affected. All bare or shifted F-keys
2165
+ // no Insert/CapsLock/Ctrl-Option modifiers, so we never collide with
2166
+ // NVDA, JAWS, Narrator, Orca, or VoiceOver. F11 + F12 are also browser-
2167
+ // reserved keys (fullscreen / devtools) and therefore reliably free in
2168
+ // every terminal that isn't masquerading as a browser.
2133
2169
  const INTERCEPT = new Set([
2134
- 'f1', 'f2', 'f3', 'f4', // status announcements
2135
- 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', // dictation + playback
2170
+ 'f1', 'f2', 'f3', 'f4', // status announcements (bare)
2171
+ 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', // dictation + playback (bare)
2172
+ 'f11', 'f12', // Tier 1: input + last turn (bare)
2173
+ // Shifted F-keys carry the Tier-2 and Tier-3 a11y functions. Each
2174
+ // is checked alongside key.shift below, so a bare F1 still routes
2175
+ // to "status" while Shift+F1 routes to "queued input."
2136
2176
  ]);
2137
2177
  // Define the hotkey listener as a NAMED, TAGGED function so
2138
2178
  // suppressInputDuringStream() in query.ts can isolate it among stdin's
2139
2179
  // 'keypress' listeners. During streaming we detach readline's own
2140
2180
  // keypress listener (to prevent echo + line-buffer pollution) while
2141
- // keeping this one attached so F1–F10 keep working mid-response.
2181
+ // keeping this one attached so F1–F12 keep working mid-response.
2142
2182
  const hotkeyListener = function hotkeyListener(_str, key) {
2143
2183
  if (!key)
2144
2184
  return;
2145
2185
  const name = String(key.name || '').toLowerCase();
2146
2186
  if (!INTERCEPT.has(name))
2147
2187
  return;
2188
+ const shift = !!key.shift;
2148
2189
  const a = getAccessibilityConfig(config);
2149
2190
  const tts = getTtsConfig(config);
2150
- // F1–F4 are STATUS hotkeys. They always work, even when voice is off
2151
- // and even when there's no TTS key they print the status line to
2152
- // stdout regardless. TTS is only added on top when a key is present.
2153
- // The whole point of these keys is "tell me what's happening", which
2154
- // is just as useful via text + screen reader as via voice.
2155
- const isStatusKey = name === 'f1' || name === 'f2' || name === 'f3' || name === 'f4';
2156
- // F5–F10 are DICTATION/PLAYBACK hotkeys they only make sense when
2157
- // voice features are enabled. Bail early to avoid spurious ffmpeg
2158
- // spawns and "TTS not configured" log lines.
2191
+ // Helper: print to stdout (always picked up by the OS screen reader)
2192
+ // and optionally layer TTS on top if a key is configured. Used by
2193
+ // every "announce something" branch in the new tier of bindings.
2194
+ const announce = (label, text) => {
2195
+ console.log(chalk.dim(` [${label}] `) + text);
2196
+ if (tts.apiKey) {
2197
+ speak(text, config, { voiceId: tts.assistantVoiceId }).catch(() => { });
2198
+ }
2199
+ };
2200
+ // STATUS hotkeys always work, even when voice is off and even when
2201
+ // there's no TTS key — they print to stdout so an OS-level screen
2202
+ // reader still has something to announce. TTS layers on top when a
2203
+ // key is present. Applies to:
2204
+ // - F1–F4 : original status / location / replay set
2205
+ // - F11/F12 : input buffer / last user turn (Tier 1)
2206
+ // - Shift+* : every shifted F-key is information or control,
2207
+ // never voice-only
2208
+ const isStatusKey = name === 'f1' || name === 'f2' || name === 'f3' || name === 'f4' ||
2209
+ name === 'f11' || name === 'f12' || shift;
2210
+ // F5–F10 (bare) are DICTATION/PLAYBACK hotkeys — they only make
2211
+ // sense when voice features are enabled. Bail early to avoid
2212
+ // spurious ffmpeg spawns and "TTS not configured" log lines.
2159
2213
  if (!isStatusKey && !isVoiceEnabled(config))
2160
2214
  return;
2215
+ // ──────────────────────────────────────────────────────────────
2216
+ // Tier 2 + 3: shifted F-keys.
2217
+ //
2218
+ // Dispatched BEFORE the bare F-key branches because Shift+F5
2219
+ // shares its `name` ('f5') with the bare F5 dictation toggle —
2220
+ // we want the shifted variant to win without each bare branch
2221
+ // having to add a `!shift` guard.
2222
+ //
2223
+ // Shift+F1 queued input ("3 messages queued: …")
2224
+ // Shift+F2 key-pool health ("3 keys healthy, 1 cooling")
2225
+ // Shift+F3 last tool-call ("bash: ok, 'ls -la' → …")
2226
+ // Shift+F4 toggle screen-reader (persists to config.json)
2227
+ // Shift+F5 soft-cancel turn (graceful abort, partial kept)
2228
+ // Shift+F6 panic-stop TTS (silences for 5s, drops queue)
2229
+ // Shift+F12 read hotkey list (discoverability)
2230
+ //
2231
+ // Unbound shifted F-keys are no-ops and fall through (returning
2232
+ // here keeps them out of the bare-F-key branches below).
2233
+ // ──────────────────────────────────────────────────────────────
2234
+ if (shift) {
2235
+ // ── Shift+F1: queued input ─────────────────────────
2236
+ if (name === 'f1') {
2237
+ const g = globalThis;
2238
+ const q = (g.__crowcoderQueuedInput || '').trim();
2239
+ announce('Shift+F1', q
2240
+ ? `Queued during last chain: ${q.slice(0, 200)}`
2241
+ : 'Nothing queued.');
2242
+ return;
2243
+ }
2244
+ // ── Shift+F2: key-pool health ──────────────────────
2245
+ if (name === 'f2') {
2246
+ const ks = keyPoolStatus();
2247
+ if (ks.length === 0) {
2248
+ announce('Shift+F2', 'Key pool: 1 key (no pool configured). Use /keys add to add more.');
2249
+ return;
2250
+ }
2251
+ const healthy = ks.filter((s) => s.healthy).length;
2252
+ const cooling = ks.length - healthy;
2253
+ const cooldownNotes = ks
2254
+ .filter((s) => !s.healthy && s.coolDownRemainingSec)
2255
+ .map((s) => `${s.tail} cooling ${s.coolDownRemainingSec}s`)
2256
+ .join(', ');
2257
+ const text = cooling > 0
2258
+ ? `Key pool: ${healthy} healthy, ${cooling} cooling. ${cooldownNotes}.`
2259
+ : `Key pool: ${healthy} healthy, all keys ready.`;
2260
+ announce('Shift+F2', text);
2261
+ return;
2262
+ }
2263
+ // ── Shift+F3: last tool call ───────────────────────
2264
+ if (name === 'f3') {
2265
+ const g = globalThis;
2266
+ const tc = g.__lastToolCall;
2267
+ if (!tc) {
2268
+ announce('Shift+F3', 'No tool calls yet this session.');
2269
+ return;
2270
+ }
2271
+ const status = tc.isError ? 'error' : 'ok';
2272
+ // Output preview kept short for TTS; full output is already on
2273
+ // stdout from the original tool-call print.
2274
+ announce('Shift+F3', `Last tool: ${tc.name}, ${status}. ${tc.argsPreview}${tc.outputPreview ? ' → ' + tc.outputPreview.slice(0, 100) : ''}`);
2275
+ return;
2276
+ }
2277
+ // ── Shift+F4: toggle screen-reader mode ────────────
2278
+ if (name === 'f4') {
2279
+ config.voice = config.voice || {};
2280
+ config.voice.accessibility = config.voice.accessibility || {};
2281
+ const cur = config.voice.accessibility.screenReader === true;
2282
+ config.voice.accessibility.screenReader = !cur;
2283
+ saveConfig(config);
2284
+ const text = !cur
2285
+ ? 'Screen-reader mode ON. ANSI colors stripped. Restart recommended for full effect.'
2286
+ : 'Screen-reader mode OFF. Colors restored on next prompt.';
2287
+ announce('Shift+F4', text);
2288
+ return;
2289
+ }
2290
+ // ── Shift+F5: soft-cancel current turn ─────────────
2291
+ if (name === 'f5') {
2292
+ const g = globalThis;
2293
+ if (g.__turnAbortCtl && !g.__turnAbortCtl.signal.aborted) {
2294
+ try {
2295
+ g.__turnAbortCtl.abort();
2296
+ }
2297
+ catch { /* noop */ }
2298
+ announce('Shift+F5', 'Turn cancelled. Partial response kept.');
2299
+ }
2300
+ else {
2301
+ announce('Shift+F5', 'No turn in progress.');
2302
+ }
2303
+ return;
2304
+ }
2305
+ // ── Shift+F6: panic-stop TTS ───────────────────────
2306
+ if (name === 'f6') {
2307
+ // Abort the current playback (same as F6/F8) AND open a 5-second
2308
+ // suppression window so incidental utterances (error
2309
+ // announcements, mode switches, audio cues fired by other code
2310
+ // paths) can't immediately fill the silence.
2311
+ const g = globalThis;
2312
+ if (g.__voicePlaybackCtl && !g.__voicePlaybackCtl.signal.aborted) {
2313
+ try {
2314
+ g.__voicePlaybackCtl.abort();
2315
+ }
2316
+ catch { /* noop */ }
2317
+ }
2318
+ g.__voiceSuppressUntilMs = Date.now() + 5000;
2319
+ // Print only — don't speak this acknowledgement (would defeat
2320
+ // the purpose of "shut up now").
2321
+ console.log(chalk.dim(' [Shift+F6] TTS panic-stop — silenced for 5s.'));
2322
+ return;
2323
+ }
2324
+ // ── Shift+F12: read hotkey list ────────────────────
2325
+ if (name === 'f12') {
2326
+ const lines = [
2327
+ 'Hotkey reference.',
2328
+ 'F1 status. F2 location. F3 read full last response. F4 read summary.',
2329
+ 'F5 dictate. F6 pause. F7 replay. F8 skip. F9 speed up. F10 slow down.',
2330
+ 'F11 read input buffer. F12 read your previous turn.',
2331
+ 'Shift+F1 queued input. Shift+F2 key pool. Shift+F3 last tool. Shift+F4 toggle screen-reader.',
2332
+ 'Shift+F5 soft-cancel turn. Shift+F6 panic-stop TTS. Shift+F12 this list.',
2333
+ ];
2334
+ for (const ln of lines)
2335
+ console.log(chalk.dim(' [Shift+F12] ') + ln);
2336
+ if (tts.apiKey) {
2337
+ // Speak as one continuous string so the chunker can pace it.
2338
+ speak(lines.join(' '), config, { voiceId: tts.assistantVoiceId }).catch(() => { });
2339
+ }
2340
+ return;
2341
+ }
2342
+ // Any other shifted F-key: no-op (don't fall through to bare).
2343
+ return;
2344
+ }
2345
+ // ── F11: read current input buffer (Tier 1, bare) ──
2346
+ if (name === 'f11') {
2347
+ // rl.line is readline's internal "what the user has typed so far
2348
+ // on the current prompt." Empty string when the prompt is fresh
2349
+ // or the buffer was just submitted.
2350
+ const buf = rl.line ?? '';
2351
+ announce('F11', buf
2352
+ ? `Input buffer: ${buf}`
2353
+ : 'Input buffer is empty.');
2354
+ return;
2355
+ }
2356
+ // ── F12: read previous submitted user turn (Tier 1) ──
2357
+ if (name === 'f12') {
2358
+ // Walk messages newest-first looking for the most-recent user
2359
+ // message. `messages` is the live REPL conversation array; the
2360
+ // last user entry is the prompt the model just answered (or is
2361
+ // answering). Skips system-injected "auto-resume" markers and
2362
+ // tool-result envelopes (those have role 'tool', not 'user').
2363
+ let last = null;
2364
+ for (let i = messages.length - 1; i >= 0; i--) {
2365
+ const m = messages[i];
2366
+ if (m.role === 'user' && typeof m.content === 'string' && m.content.trim()) {
2367
+ last = m.content;
2368
+ break;
2369
+ }
2370
+ }
2371
+ announce('F12', last
2372
+ ? `Your last message: ${last.slice(0, 400)}`
2373
+ : 'No prior user message this session.');
2374
+ return;
2375
+ }
2161
2376
  // ── F5: push-to-talk dictation toggle ──────────────
2162
2377
  if (name === 'f5') {
2163
2378
  if (dictateActive) {