discoclaw 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.env.example +4 -6
  2. package/.env.example.full +13 -32
  3. package/README.md +1 -1
  4. package/dist/cli/dashboard.test.js +0 -4
  5. package/dist/cli/init-wizard.js +4 -8
  6. package/dist/cli/init-wizard.test.js +4 -10
  7. package/dist/config.js +2 -42
  8. package/dist/config.test.js +8 -72
  9. package/dist/dashboard/server.js +1 -5
  10. package/dist/dashboard/server.test.js +3 -6
  11. package/dist/discord/actions.js +112 -6
  12. package/dist/discord/actions.test.js +117 -1
  13. package/dist/discord/help-command.js +1 -1
  14. package/dist/discord/message-coordinator.js +3 -8
  15. package/dist/discord/models-command.js +1 -1
  16. package/dist/discord/reaction-handler.js +2 -2
  17. package/dist/discord/reaction-handler.test.js +55 -0
  18. package/dist/discord/verify-push.js +31 -36
  19. package/dist/discord/verify-push.test.js +34 -6
  20. package/dist/discord/voice-command.js +1 -31
  21. package/dist/discord/voice-command.test.js +21 -259
  22. package/dist/discord/voice-status-command.js +3 -22
  23. package/dist/discord/voice-status-command.test.js +16 -124
  24. package/dist/discord-followup.test.js +133 -0
  25. package/dist/health/config-doctor.js +5 -27
  26. package/dist/health/config-doctor.test.js +1 -4
  27. package/dist/index.js +1 -28
  28. package/dist/runtime-overrides.js +2 -3
  29. package/dist/runtime-overrides.test.js +27 -193
  30. package/dist/tasks/store.js +10 -6
  31. package/dist/tasks/store.test.js +44 -0
  32. package/dist/tasks/task-action-executor.test.js +162 -50
  33. package/dist/tasks/task-action-mutations.js +22 -2
  34. package/dist/tasks/task-action-read-ops.js +7 -1
  35. package/dist/tasks/task-action-runner-types.js +19 -1
  36. package/dist/voice/audio-pipeline.js +145 -298
  37. package/docs/configuration.md +4 -9
  38. package/docs/official-docs.md +6 -9
  39. package/docs/runtime-switching.md +1 -1
  40. package/package.json +1 -1
  41. package/dist/voice/audio-pipeline.test.js +0 -1100
  42. package/dist/voice/stt-deepgram.js +0 -154
  43. package/dist/voice/stt-deepgram.test.js +0 -275
  44. package/dist/voice/stt-factory.js +0 -42
  45. package/dist/voice/stt-factory.test.js +0 -45
  46. package/dist/voice/stt-openai.js +0 -156
  47. package/dist/voice/stt-openai.test.js +0 -281
  48. package/dist/voice/tts-cartesia.js +0 -169
  49. package/dist/voice/tts-cartesia.test.js +0 -228
  50. package/dist/voice/tts-deepgram.js +0 -84
  51. package/dist/voice/tts-deepgram.test.js +0 -220
  52. package/dist/voice/tts-factory.js +0 -52
  53. package/dist/voice/tts-factory.test.js +0 -53
  54. package/dist/voice/tts-openai.js +0 -70
  55. package/dist/voice/tts-openai.test.js +0 -138
  56. package/dist/voice/types.test.js +0 -90
package/.env.example CHANGED
@@ -90,7 +90,7 @@ DISCORD_GUILD_ID=
90
90
  # - PRIMARY_RUNTIME sets the default adapter the instance boots with.
91
91
  # - `!models set chat <runtime>` can live-switch the main runtime in memory, but chat runtime swaps do not persist.
92
92
  # - Persistent model-role defaults live in data/models.json.
93
- # - Persistent runtime-only overlays live in data/runtime-overrides.json (`voiceRuntime`, `fastRuntime`, `ttsVoice`).
93
+ # - Persistent runtime-only overlays live in data/runtime-overrides.json (`voiceRuntime`, `fastRuntime`).
94
94
  # - `!models reset` writes startup-default model strings back into models.json and clears fast/voice runtime overlays;
95
95
  # it does not rewrite PRIMARY_RUNTIME in .env.
96
96
  # Supported runtime-path notes:
@@ -191,11 +191,9 @@ DISCORD_GUILD_ID=
191
191
  # Voice — configure via `pnpm setup` or `discoclaw init`
192
192
  # ----------------------------------------------------------
193
193
  # Run `pnpm setup` or `discoclaw init` to enable voice interactively,
194
- # or set these vars manually to enable voice chat (STT/TTS via Deepgram).
194
+ # or set these vars manually to enable Gemini Live voice chat.
195
195
  #DISCOCLAW_VOICE_ENABLED=0
196
- # Voice pipeline provider: pipeline (default, Deepgram STT/TTS) or gemini-live
197
- # (Gemini Live WebSocket — requires GEMINI_API_KEY).
198
- #DISCOCLAW_VOICE_PIPELINE_PROVIDER=pipeline
196
+ # Gemini Live requires GEMINI_API_KEY.
199
197
  # Gemini Live session rotation threshold (ms). The provider proactively reconnects
200
198
  # before Gemini's ~15 min session limit to minimize audio gap. Default: 780000 (13 min).
201
199
  #DISCOCLAW_GEMINI_SESSION_ROTATION_MS=780000
@@ -206,7 +204,7 @@ DISCORD_GUILD_ID=
206
204
  # (the bot creates a "voice-log" text channel and stores its ID in system-scaffold.json).
207
205
  # Only set this to override the auto-discovered channel.
208
206
  #DISCOCLAW_VOICE_LOG_CHANNEL=
209
- #DEEPGRAM_API_KEY=
207
+ #GEMINI_API_KEY=
210
208
  # Optional voice-only Anthropic runtime for voice responses.
211
209
  # `claude-api` is not a valid PRIMARY_RUNTIME and does not persist in models.json.
212
210
  # Use `!models set voice claude-api` to persist the voice runtime path in runtime-overrides.json.
package/.env.example.full CHANGED
@@ -703,32 +703,22 @@ DISCOCLAW_DISCORD_ACTIONS_IMAGEGEN=0
703
703
  # IMAGEGEN_GEMINI_API_KEY is set.
704
704
 
705
705
  # ----------------------------------------------------------
706
- # Voice (STT/TTS) join voice channels, listen and respond
706
+ # Voice — Gemini Live voice chat
707
707
  # ----------------------------------------------------------
708
- # Master switch — enables voice channel interaction (default: off).
709
- # When enabled, the bot can join Discord voice channels, transcribe speech via STT,
710
- # and respond with synthesized speech via TTS.
708
+ # Master switch — enables Discord voice interaction (default: off).
709
+ # Gemini Live handles speech recognition, reasoning, and speech synthesis in one session.
711
710
  #DISCOCLAW_VOICE_ENABLED=0
712
711
  # Enable voice Discord action category (voiceJoin, voiceLeave, voiceStatus, voiceMute, voiceDeafen).
713
712
  # Requires DISCOCLAW_VOICE_ENABLED=1 to take effect (default: off).
714
713
  #DISCOCLAW_DISCORD_ACTIONS_VOICE=0
715
714
  # Auto-join voice channels when a non-bot user joins, and auto-leave when the last
716
- # non-bot user leaves. Starts/tears down the audio pipeline (STT receiver) automatically.
717
- # Requires DISCOCLAW_VOICE_ENABLED=1 (default: off).
715
+ # non-bot user leaves. Requires DISCOCLAW_VOICE_ENABLED=1 (default: off).
718
716
  #DISCOCLAW_VOICE_AUTO_JOIN=0
719
- # Speech-to-text provider: deepgram (Deepgram Nova-3 API) or whisper (whisper.cpp local).
720
- # deepgram requires DEEPGRAM_API_KEY; whisper runs locally with no API key.
721
- #DISCOCLAW_STT_PROVIDER=deepgram
722
- # Text-to-speech provider: cartesia | deepgram | kokoro | openai.
723
- # cartesia requires CARTESIA_API_KEY; deepgram reuses DEEPGRAM_API_KEY;
724
- # openai requires OPENAI_API_KEY; kokoro runs locally with no API key.
725
- #DISCOCLAW_TTS_PROVIDER=cartesia
726
- # Voice audio channel name or ID — the channel the bot joins for voice interaction.
727
- # Used as the prompt context source (root policy, PA files, channel context, durable memory).
717
+ # Voice text channel name or ID used for prompt context and voice-triggered actions.
728
718
  # The old env var DISCOCLAW_VOICE_TRANSCRIPT_CHANNEL is still accepted as a fallback.
729
719
  # Leave unset to skip voice channel context in prompts.
730
720
  #DISCOCLAW_VOICE_HOME_CHANNEL= # e.g. "voice" if using the default scaffold
731
- # Text channel name or ID for posting voice transcripts (user STT and bot TTS responses).
721
+ # Text channel name or ID for posting voice transcripts.
732
722
  # Optional — auto-discovered via bootstrap (the bot creates "voice-log" and stores its ID
733
723
  # in system-scaffold.json). Only set this to override the auto-discovered channel.
734
724
  # Leave unset to disable transcript mirroring.
@@ -741,20 +731,11 @@ DISCOCLAW_DISCORD_ACTIONS_IMAGEGEN=0
741
731
  # Custom system prompt prepended to voice AI invocations. Max 4000 chars.
742
732
  # Use this to set a conversational tone, brevity instructions, or persona for voice responses.
743
733
  #DISCOCLAW_VOICE_SYSTEM_PROMPT=
744
- # Anthropic API key for direct Messages API access (bypasses Claude CLI cold-start).
745
- # When set and voice is enabled, voice invocations use the Anthropic REST adapter
746
- # instead of the CLI subprocess, eliminating ~2-5s cold-start latency per response.
734
+ # Gemini Live session rotation threshold (ms). The provider proactively reconnects
735
+ # before Gemini's ~15 min session limit to minimize audio gap. Default: 780000 (13 min).
736
+ #DISCOCLAW_GEMINI_SESSION_ROTATION_MS=780000
737
+ # Google Gemini API key. Required when voice is enabled.
738
+ #GEMINI_API_KEY=
739
+ # Anthropic API key for the optional direct Messages API voice runtime.
740
+ # When set, `!models set voice claude-api` can bypass CLI cold-start for voice responses.
747
741
  #ANTHROPIC_API_KEY=
748
- # API key for Deepgram Nova-3 STT. Required when DISCOCLAW_STT_PROVIDER=deepgram.
749
- #DEEPGRAM_API_KEY=
750
- # Deepgram STT model for voice transcription (default: nova-3-conversationalai).
751
- # See https://developers.deepgram.com/docs/models-languages-overview for available models.
752
- #DEEPGRAM_STT_MODEL=nova-3-conversationalai
753
- # Deepgram TTS voice for speech synthesis (default: aura-2-asteria-en).
754
- # See https://developers.deepgram.com/docs/tts-models for available voices.
755
- #DEEPGRAM_TTS_VOICE=aura-2-asteria-en
756
- # Deepgram TTS playback speed (range: 0.5–1.5, default: 1.3).
757
- # Values below 1.0 slow down speech; values above 1.0 speed it up.
758
- #DEEPGRAM_TTS_SPEED=1.3
759
- # API key for Cartesia Sonic-3 TTS. Required when DISCOCLAW_TTS_PROVIDER=cartesia.
760
- #CARTESIA_API_KEY=
package/README.md CHANGED
@@ -39,7 +39,7 @@ No gateways, no proxies, no web UI. Discord *is* the interface.
39
39
 
40
40
  ## Voice — the bot talks back
41
41
 
42
- Real-time voice with STT (Deepgram), TTS (Cartesia), barge-in, and transcript mirroring. Off by default. [Setup guide →](docs/voice.md)
42
+ Real-time voice with Gemini Live, barge-in, tool calls, and transcript mirroring. Off by default. [Setup guide →](docs/voice.md)
43
43
 
44
44
  ## Self-management
45
45
 
@@ -591,7 +591,6 @@ describe('runDashboard', () => {
591
591
  runtimeOverrides: {
592
592
  fastRuntime: 'openrouter',
593
593
  voiceRuntime: 'anthropic',
594
- ttsVoice: 'alloy',
595
594
  },
596
595
  envDefaults: {
597
596
  ...makeDoctorContext().envDefaults,
@@ -628,7 +627,6 @@ describe('runDashboard', () => {
628
627
  expect(saveModelConfigMock).toHaveBeenCalledWith('/repo/data/models.json', {});
629
628
  expect(saveOverridesMock).toHaveBeenCalledWith('/repo/data/runtime-overrides.json', {
630
629
  voiceRuntime: 'anthropic',
631
- ttsVoice: 'alloy',
632
630
  });
633
631
  expect(frames.some((frame) => frame.includes('Reset fast to default: capable. Cleared fastRuntime override. Changes take effect on next service restart.'))).toBe(true);
634
632
  });
@@ -638,7 +636,6 @@ describe('runDashboard', () => {
638
636
  runtimeOverrides: {
639
637
  fastRuntime: 'openrouter',
640
638
  voiceRuntime: 'anthropic',
641
- ttsVoice: 'alloy',
642
639
  },
643
640
  envDefaults: {
644
641
  ...makeDoctorContext().envDefaults,
@@ -675,7 +672,6 @@ describe('runDashboard', () => {
675
672
  expect(saveModelConfigMock).toHaveBeenCalledWith('/repo/data/models.json', {});
676
673
  expect(saveOverridesMock).toHaveBeenCalledWith('/repo/data/runtime-overrides.json', {
677
674
  fastRuntime: 'openrouter',
678
- ttsVoice: 'alloy',
679
675
  });
680
676
  expect(frames.some((frame) => frame.includes('Reset voice to default: capable. Cleared voiceRuntime override. Changes take effect on next service restart.'))).toBe(true);
681
677
  });
@@ -88,10 +88,8 @@ export function buildEnvContent(vals, now = new Date()) {
88
88
  // Voice
89
89
  const voiceKeys = [
90
90
  'DISCOCLAW_VOICE_ENABLED',
91
- 'DEEPGRAM_API_KEY',
91
+ 'GEMINI_API_KEY',
92
92
  'DISCOCLAW_DISCORD_ACTIONS_VOICE',
93
- 'DISCOCLAW_STT_PROVIDER',
94
- 'DISCOCLAW_TTS_PROVIDER',
95
93
  ];
96
94
  const hasVoice = voiceKeys.some((k) => vals[k]);
97
95
  if (hasVoice) {
@@ -347,14 +345,12 @@ export async function runInitWizard() {
347
345
  }
348
346
  values.DISCOCLAW_DISCORD_ACTIONS = '1';
349
347
  // ── Voice setup ───────────────────────────────────────────────────────────
350
- const enableVoice = await ask('\nEnable voice chat? (requires a Deepgram API key — you can skip this and enable later) [y/N] ');
348
+ const enableVoice = await ask('\nEnable voice chat? (requires a Gemini API key — you can skip this and enable later) [y/N] ');
351
349
  if (enableVoice.toLowerCase() === 'y') {
352
- const deepgramKey = await askValidated('Deepgram API key: ', (val) => (val ? null : 'Deepgram API key is required'));
350
+ const geminiKey = await askValidated('Gemini API key: ', (val) => (val ? null : 'Gemini API key is required'));
353
351
  values.DISCOCLAW_VOICE_ENABLED = '1';
354
- values.DEEPGRAM_API_KEY = deepgramKey;
352
+ values.GEMINI_API_KEY = geminiKey;
355
353
  values.DISCOCLAW_DISCORD_ACTIONS_VOICE = '1';
356
- values.DISCOCLAW_STT_PROVIDER = 'deepgram';
357
- values.DISCOCLAW_TTS_PROVIDER = 'deepgram';
358
354
  }
359
355
  // ── Write .env ────────────────────────────────────────────────────────────
360
356
  const envContent = buildEnvContent(values);
@@ -155,16 +155,12 @@ describe('init wizard helpers', () => {
155
155
  DISCORD_TOKEN: 'a.b.c',
156
156
  DISCORD_ALLOW_USER_IDS: '1000000000000000001',
157
157
  DISCOCLAW_VOICE_ENABLED: '1',
158
- DEEPGRAM_API_KEY: 'dg-key',
158
+ GEMINI_API_KEY: 'gm-key',
159
159
  DISCOCLAW_DISCORD_ACTIONS_VOICE: '1',
160
- DISCOCLAW_STT_PROVIDER: 'deepgram',
161
- DISCOCLAW_TTS_PROVIDER: 'deepgram',
162
160
  }, new Date('2026-02-26T00:00:00.000Z'));
163
161
  expect(content).toContain('# VOICE');
164
162
  expect(content).toContain('DISCOCLAW_VOICE_ENABLED=1');
165
- expect(content).toContain('DEEPGRAM_API_KEY=dg-key');
166
- expect(content).toContain('DISCOCLAW_STT_PROVIDER=deepgram');
167
- expect(content).toContain('DISCOCLAW_TTS_PROVIDER=deepgram');
163
+ expect(content).toContain('GEMINI_API_KEY=gm-key');
168
164
  });
169
165
  it('omits voice section when no voice vars are provided', () => {
170
166
  const content = buildEnvContent({
@@ -555,7 +551,7 @@ describe('runInitWizard', () => {
555
551
  '5000000000000000001', // DISCORD_GUILD_ID
556
552
  '', // provider selection -> default (Claude)
557
553
  'y', // enable voice -> yes
558
- 'dg-test-key', // Deepgram API key
554
+ 'gemini-test-key', // Gemini API key
559
555
  ];
560
556
  process.chdir(tmpDir);
561
557
  vi.mocked(createInterface).mockReturnValue(makeReadline(answers));
@@ -573,9 +569,7 @@ describe('runInitWizard', () => {
573
569
  const newEnv = fs.readFileSync(path.join(tmpDir, '.env'), 'utf8');
574
570
  expect(newEnv).toContain('# VOICE');
575
571
  expect(newEnv).toContain('DISCOCLAW_VOICE_ENABLED=1');
576
- expect(newEnv).toContain('DEEPGRAM_API_KEY=dg-test-key');
577
- expect(newEnv).toContain('DISCOCLAW_STT_PROVIDER=deepgram');
578
- expect(newEnv).toContain('DISCOCLAW_TTS_PROVIDER=deepgram');
572
+ expect(newEnv).toContain('GEMINI_API_KEY=gemini-test-key');
579
573
  expect(newEnv).toContain('DISCOCLAW_DISCORD_ACTIONS_VOICE=1');
580
574
  });
581
575
  });
package/dist/config.js CHANGED
@@ -510,9 +510,6 @@ export function parseConfig(env) {
510
510
  const anthropicApiKey = parseTrimmedString(env, 'ANTHROPIC_API_KEY');
511
511
  const voiceEnabled = parseBoolean(env, 'DISCOCLAW_VOICE_ENABLED', false);
512
512
  const voiceAutoJoin = parseBoolean(env, 'DISCOCLAW_VOICE_AUTO_JOIN', false);
513
- const voiceSttProvider = parseEnum(env, 'DISCOCLAW_STT_PROVIDER', ['deepgram', 'whisper', 'openai'], 'deepgram');
514
- const voiceTtsProvider = parseEnum(env, 'DISCOCLAW_TTS_PROVIDER', ['cartesia', 'deepgram', 'kokoro', 'openai'], 'cartesia');
515
- const voicePipelineProvider = parseEnum(env, 'DISCOCLAW_VOICE_PIPELINE_PROVIDER', ['pipeline', 'gemini-live'], 'pipeline');
516
513
  const geminiSessionRotationMs = parseNonNegativeInt(env, 'DISCOCLAW_GEMINI_SESSION_ROTATION_MS', 780_000);
517
514
  let voiceHomeChannel = parseTrimmedString(env, 'DISCOCLAW_VOICE_HOME_CHANNEL');
518
515
  if (!voiceHomeChannel) {
@@ -523,20 +520,6 @@ export function parseConfig(env) {
523
520
  }
524
521
  }
525
522
  const voiceLogChannel = parseTrimmedString(env, 'DISCOCLAW_VOICE_LOG_CHANNEL');
526
- const deepgramApiKey = parseTrimmedString(env, 'DEEPGRAM_API_KEY');
527
- const deepgramSttModel = parseTrimmedString(env, 'DEEPGRAM_STT_MODEL') ?? 'nova-3-general';
528
- const deepgramTtsVoice = parseTrimmedString(env, 'DEEPGRAM_TTS_VOICE') ?? 'aura-2-asteria-en';
529
- const deepgramTtsSpeed = (() => {
530
- const raw = parseTrimmedString(env, 'DEEPGRAM_TTS_SPEED');
531
- if (raw == null)
532
- return 1.3;
533
- const n = parseFloat(raw);
534
- if (!Number.isFinite(n) || n < 0.5 || n > 1.5) {
535
- throw new Error(`DEEPGRAM_TTS_SPEED must be a number between 0.5 and 1.5, got "${raw}"`);
536
- }
537
- return n;
538
- })();
539
- const cartesiaApiKey = parseTrimmedString(env, 'CARTESIA_API_KEY');
540
523
  const voiceModelRaw = parseTrimmedString(env, 'DISCOCLAW_VOICE_MODEL');
541
524
  const voiceSystemPrompt = (() => {
542
525
  const raw = parseTrimmedString(env, 'DISCOCLAW_VOICE_SYSTEM_PROMPT');
@@ -547,26 +530,11 @@ export function parseConfig(env) {
547
530
  }
548
531
  return raw;
549
532
  })();
550
- if (voiceEnabled && voiceSttProvider === 'deepgram' && !deepgramApiKey) {
551
- warnings.push('DISCOCLAW_VOICE_ENABLED=1 with STT provider "deepgram" but DEEPGRAM_API_KEY is not set; voice STT will fail at runtime.');
552
- }
553
- if (voiceEnabled && voiceSttProvider === 'openai' && !openaiApiKey) {
554
- warnings.push('DISCOCLAW_VOICE_ENABLED=1 with STT provider "openai" but OPENAI_API_KEY is not set; voice STT will fail at runtime.');
555
- }
556
- if (voiceEnabled && voiceTtsProvider === 'cartesia' && !cartesiaApiKey) {
557
- warnings.push('DISCOCLAW_VOICE_ENABLED=1 with TTS provider "cartesia" but CARTESIA_API_KEY is not set; voice TTS will fail at runtime.');
558
- }
559
- if (voiceEnabled && voiceTtsProvider === 'deepgram' && !deepgramApiKey) {
560
- warnings.push('DISCOCLAW_VOICE_ENABLED=1 with TTS provider "deepgram" but DEEPGRAM_API_KEY is not set; voice TTS will fail at runtime.');
561
- }
562
- if (voiceEnabled && voiceTtsProvider === 'openai' && !openaiApiKey) {
563
- warnings.push('DISCOCLAW_VOICE_ENABLED=1 with TTS provider "openai" but OPENAI_API_KEY is not set; voice TTS will fail at runtime.');
564
- }
565
533
  if (voiceEnabled && !voiceHomeChannel) {
566
534
  warnings.push('DISCOCLAW_VOICE_ENABLED=1 but DISCOCLAW_VOICE_HOME_CHANNEL is not set; voice actions will be disabled (no target channel for action execution).');
567
535
  }
568
- if (voiceEnabled && voicePipelineProvider === 'gemini-live' && !geminiApiKey) {
569
- warnings.push('DISCOCLAW_VOICE_PIPELINE_PROVIDER=gemini-live but GEMINI_API_KEY is not set; voice pipeline will fail at runtime.');
536
+ if (voiceEnabled && !geminiApiKey) {
537
+ warnings.push('DISCOCLAW_VOICE_ENABLED=1 but GEMINI_API_KEY is not set; Gemini Live voice will fail at runtime.');
570
538
  }
571
539
  const coldStorageEnabled = parseBoolean(env, 'DISCOCLAW_COLD_STORAGE_ENABLED', false);
572
540
  const coldStorageApiKey = parseTrimmedString(env, 'COLD_STORAGE_API_KEY') ?? openaiApiKey;
@@ -746,17 +714,9 @@ export function parseConfig(env) {
746
714
  voiceAutoJoin,
747
715
  voiceModel,
748
716
  voiceSystemPrompt,
749
- voiceSttProvider,
750
- voiceTtsProvider,
751
- voicePipelineProvider,
752
717
  geminiSessionRotationMs,
753
718
  voiceHomeChannel,
754
719
  voiceLogChannel,
755
- deepgramApiKey,
756
- deepgramSttModel,
757
- deepgramTtsVoice,
758
- deepgramTtsSpeed,
759
- cartesiaApiKey,
760
720
  forgeDrafterRuntime,
761
721
  forgeAuditorRuntime,
762
722
  openrouterApiKey,
@@ -1021,41 +1021,9 @@ describe('parseConfig', () => {
1021
1021
  expect(config.voiceEnabled).toBe(false);
1022
1022
  });
1023
1023
  it('parses DISCOCLAW_VOICE_ENABLED=1 as true', () => {
1024
- const { config } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', DEEPGRAM_API_KEY: 'dg-key', CARTESIA_API_KEY: 'ca-key' }));
1024
+ const { config } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', GEMINI_API_KEY: 'gm-key' }));
1025
1025
  expect(config.voiceEnabled).toBe(true);
1026
1026
  });
1027
- it('defaults voiceSttProvider to "deepgram"', () => {
1028
- const { config } = parseConfig(env());
1029
- expect(config.voiceSttProvider).toBe('deepgram');
1030
- });
1031
- it('parses DISCOCLAW_STT_PROVIDER=whisper', () => {
1032
- const { config } = parseConfig(env({ DISCOCLAW_STT_PROVIDER: 'whisper' }));
1033
- expect(config.voiceSttProvider).toBe('whisper');
1034
- });
1035
- it('parses STT provider case-insensitively', () => {
1036
- const { config } = parseConfig(env({ DISCOCLAW_STT_PROVIDER: 'Deepgram' }));
1037
- expect(config.voiceSttProvider).toBe('deepgram');
1038
- });
1039
- it('throws on invalid STT provider', () => {
1040
- expect(() => parseConfig(env({ DISCOCLAW_STT_PROVIDER: 'invalid' })))
1041
- .toThrow(/DISCOCLAW_STT_PROVIDER must be one of deepgram\|whisper/);
1042
- });
1043
- it('defaults voiceTtsProvider to "cartesia"', () => {
1044
- const { config } = parseConfig(env());
1045
- expect(config.voiceTtsProvider).toBe('cartesia');
1046
- });
1047
- it('parses DISCOCLAW_TTS_PROVIDER=kokoro', () => {
1048
- const { config } = parseConfig(env({ DISCOCLAW_TTS_PROVIDER: 'kokoro' }));
1049
- expect(config.voiceTtsProvider).toBe('kokoro');
1050
- });
1051
- it('parses TTS provider case-insensitively', () => {
1052
- const { config } = parseConfig(env({ DISCOCLAW_TTS_PROVIDER: 'Cartesia' }));
1053
- expect(config.voiceTtsProvider).toBe('cartesia');
1054
- });
1055
- it('throws on invalid TTS provider', () => {
1056
- expect(() => parseConfig(env({ DISCOCLAW_TTS_PROVIDER: 'elevenlabs' })))
1057
- .toThrow(/DISCOCLAW_TTS_PROVIDER must be one of cartesia\|deepgram\|kokoro\|openai/);
1058
- });
1059
1027
  it('parses DISCOCLAW_VOICE_HOME_CHANNEL when set', () => {
1060
1028
  const { config } = parseConfig(env({ DISCOCLAW_VOICE_HOME_CHANNEL: 'voice-log' }));
1061
1029
  expect(config.voiceHomeChannel).toBe('voice-log');
@@ -1085,33 +1053,13 @@ describe('parseConfig', () => {
1085
1053
  const { config } = parseConfig(env());
1086
1054
  expect(config.voiceLogChannel).toBeUndefined();
1087
1055
  });
1088
- it('parses DEEPGRAM_API_KEY when set', () => {
1089
- const { config } = parseConfig(env({ DEEPGRAM_API_KEY: 'dg-key' }));
1090
- expect(config.deepgramApiKey).toBe('dg-key');
1056
+ it('warns when voice enabled but GEMINI_API_KEY is missing', () => {
1057
+ const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1' }));
1058
+ expect(warnings.some((w) => w.includes('GEMINI_API_KEY'))).toBe(true);
1091
1059
  });
1092
- it('returns undefined for deepgramApiKey when unset', () => {
1093
- const { config } = parseConfig(env());
1094
- expect(config.deepgramApiKey).toBeUndefined();
1095
- });
1096
- it('parses CARTESIA_API_KEY when set', () => {
1097
- const { config } = parseConfig(env({ CARTESIA_API_KEY: 'ca-key' }));
1098
- expect(config.cartesiaApiKey).toBe('ca-key');
1099
- });
1100
- it('returns undefined for cartesiaApiKey when unset', () => {
1101
- const { config } = parseConfig(env());
1102
- expect(config.cartesiaApiKey).toBeUndefined();
1103
- });
1104
- it('warns when voice enabled with deepgram STT but DEEPGRAM_API_KEY missing', () => {
1105
- const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', CARTESIA_API_KEY: 'ca-key' }));
1106
- expect(warnings.some((w) => w.includes('DEEPGRAM_API_KEY'))).toBe(true);
1107
- });
1108
- it('does not warn about DEEPGRAM_API_KEY when voice disabled', () => {
1060
+ it('does not warn about GEMINI_API_KEY when voice disabled', () => {
1109
1061
  const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '0' }));
1110
- expect(warnings.some((w) => w.includes('DEEPGRAM_API_KEY'))).toBe(false);
1111
- });
1112
- it('does not warn about DEEPGRAM_API_KEY when STT provider is whisper', () => {
1113
- const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', DISCOCLAW_STT_PROVIDER: 'whisper', CARTESIA_API_KEY: 'ca-key' }));
1114
- expect(warnings.some((w) => w.includes('DEEPGRAM_API_KEY'))).toBe(false);
1062
+ expect(warnings.some((w) => w.includes('GEMINI_API_KEY'))).toBe(false);
1115
1063
  });
1116
1064
  // --- voiceAutoJoin ---
1117
1065
  it('defaults voiceAutoJoin to false', () => {
@@ -1130,24 +1078,12 @@ describe('parseConfig', () => {
1130
1078
  const { config } = parseConfig(env({ DISCOCLAW_VOICE_AUTO_JOIN: '0' }));
1131
1079
  expect(config.voiceAutoJoin).toBe(false);
1132
1080
  });
1133
- it('warns when voice enabled with cartesia TTS but CARTESIA_API_KEY missing', () => {
1134
- const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', DEEPGRAM_API_KEY: 'dg-key' }));
1135
- expect(warnings.some((w) => w.includes('CARTESIA_API_KEY'))).toBe(true);
1136
- });
1137
- it('does not warn about CARTESIA_API_KEY when voice disabled', () => {
1138
- const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '0' }));
1139
- expect(warnings.some((w) => w.includes('CARTESIA_API_KEY'))).toBe(false);
1140
- });
1141
- it('does not warn about CARTESIA_API_KEY when TTS provider is kokoro', () => {
1142
- const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', DISCOCLAW_TTS_PROVIDER: 'kokoro', DEEPGRAM_API_KEY: 'dg-key' }));
1143
- expect(warnings.some((w) => w.includes('CARTESIA_API_KEY'))).toBe(false);
1144
- });
1145
1081
  it('warns when voice enabled but DISCOCLAW_VOICE_HOME_CHANNEL is unset', () => {
1146
- const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', DEEPGRAM_API_KEY: 'dg-key', CARTESIA_API_KEY: 'ca-key' }));
1082
+ const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', GEMINI_API_KEY: 'gm-key' }));
1147
1083
  expect(warnings.some((w) => w.includes('DISCOCLAW_VOICE_HOME_CHANNEL'))).toBe(true);
1148
1084
  });
1149
1085
  it('does not warn about DISCOCLAW_VOICE_HOME_CHANNEL when both voice and home channel are set', () => {
1150
- const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', DISCOCLAW_VOICE_HOME_CHANNEL: '1000000000000000003', DEEPGRAM_API_KEY: 'dg-key', CARTESIA_API_KEY: 'ca-key' }));
1086
+ const { warnings } = parseConfig(env({ DISCOCLAW_VOICE_ENABLED: '1', DISCOCLAW_VOICE_HOME_CHANNEL: '1000000000000000003', GEMINI_API_KEY: 'gm-key' }));
1151
1087
  expect(warnings.some((w) => w.includes('DISCOCLAW_VOICE_HOME_CHANNEL'))).toBe(false);
1152
1088
  });
1153
1089
  // --- cold storage ---
@@ -361,11 +361,7 @@ async function applyPreset(preset, inspectOpts, deps) {
361
361
  const ctx = await deps.loadDoctorContext(inspectOpts);
362
362
  const primaryRuntime = presetToPrimaryRuntime(preset);
363
363
  await deps.updateEnvKey(ctx.configPaths.env, 'PRIMARY_RUNTIME', primaryRuntime);
364
- const preservedOverrides = {};
365
- if (ctx.runtimeOverrides.ttsVoice) {
366
- preservedOverrides.ttsVoice = ctx.runtimeOverrides.ttsVoice;
367
- }
368
- await deps.saveOverrides(ctx.configPaths.runtimeOverrides, preservedOverrides);
364
+ await deps.saveOverrides(ctx.configPaths.runtimeOverrides, {});
369
365
  await deps.saveModelConfig(ctx.configPaths.models, { ...MODEL_DEFAULTS });
370
366
  return {
371
367
  message: `Preset switched to ${preset}. Models reset to tier defaults. Restart the service to apply.`,
@@ -762,7 +762,6 @@ describe('startDashboardServer', () => {
762
762
  const ctx = makeDoctorContext({
763
763
  runtimeOverrides: {
764
764
  fastRuntime: 'openrouter',
765
- ttsVoice: 'alloy',
766
765
  },
767
766
  runtimeOverridesFile: {
768
767
  exists: true,
@@ -770,7 +769,6 @@ describe('startDashboardServer', () => {
770
769
  raw: {},
771
770
  values: {
772
771
  fastRuntime: 'openrouter',
773
- ttsVoice: 'alloy',
774
772
  },
775
773
  },
776
774
  });
@@ -802,7 +800,7 @@ describe('startDashboardServer', () => {
802
800
  expect(body.message).toContain('codex');
803
801
  expect(body.message).toContain('tier defaults');
804
802
  expect(updateEnvKeyMock).toHaveBeenCalledWith('/repo/.env', 'PRIMARY_RUNTIME', 'codex-cli');
805
- expect(saveOverridesMock).toHaveBeenCalledWith('/repo/data/runtime-overrides.json', { ttsVoice: 'alloy' });
803
+ expect(saveOverridesMock).toHaveBeenCalledWith('/repo/data/runtime-overrides.json', {});
806
804
  expect(saveModelConfigMock).toHaveBeenCalledWith('/repo/data/models.json', expect.objectContaining({}));
807
805
  expect(body.snapshot).toBeDefined();
808
806
  expect(body.snapshot.primaryRuntime).toBe('codex');
@@ -829,12 +827,11 @@ describe('startDashboardServer', () => {
829
827
  expect(body.message).toContain('claude');
830
828
  expect(updateEnvKeyMock).toHaveBeenCalledWith('/repo/.env', 'PRIMARY_RUNTIME', 'claude-cli');
831
829
  });
832
- it('preserves ttsVoice when clearing overrides via /api/preset', async () => {
830
+ it('clears legacy voice overrides when applying /api/preset', async () => {
833
831
  const ctx = makeDoctorContext({
834
832
  runtimeOverrides: {
835
833
  fastRuntime: 'openrouter',
836
834
  voiceRuntime: 'anthropic',
837
- ttsVoice: 'shimmer',
838
835
  },
839
836
  });
840
837
  const saveOverridesMock = vi.fn(async () => undefined);
@@ -848,7 +845,7 @@ describe('startDashboardServer', () => {
848
845
  method: 'POST',
849
846
  body: JSON.stringify({ preset: 'codex' }),
850
847
  });
851
- expect(saveOverridesMock).toHaveBeenCalledWith('/repo/data/runtime-overrides.json', { ttsVoice: 'shimmer' });
848
+ expect(saveOverridesMock).toHaveBeenCalledWith('/repo/data/runtime-overrides.json', {});
852
849
  });
853
850
  it('rejects GET requests on /api/model', async () => {
854
851
  const { port } = await startServer();
@@ -534,21 +534,127 @@ export function buildAllResultLines(results) {
534
534
  return results.map((r) => r.ok ? `Done: ${r.summary}` : `Failed: ${r.error}`);
535
535
  }
536
536
  /**
537
- * Cap a single result line to approximately `maxChars` characters.
537
+ * Cap a single result line to `maxChars` characters.
538
538
  * If truncated, appends a visible `...[truncated]` suffix.
539
539
  */
540
540
  export function capResultLine(line, maxChars = 1500) {
541
541
  if (line.length <= maxChars)
542
542
  return line;
543
- return `${line.slice(0, maxChars)}...[truncated]`;
543
+ const suffix = '...[truncated]';
544
+ if (maxChars <= suffix.length)
545
+ return suffix.slice(0, maxChars);
546
+ return `${line.slice(0, maxChars - suffix.length)}${suffix}`;
547
+ }
548
+ const RESULT_LINE_PREFIX_RE = /^(Done|Failed):\s*/;
549
+ const RESULT_LINE_IMPORTANT_FIELD_RE = /^(Status|Thread|Model|Next run|Last error|State):\s/i;
550
+ const RESULT_LINE_GENERIC_FIELD_RE = /^[A-Z][A-Za-z0-9 /_-]{1,24}:\s/;
551
+ const RESULT_LINE_SECTION_HEADER_RE = /^\*\*[^*\n]+:\*\*$/;
552
+ const RESULT_LINE_ERROR_RE = /\b(error|failed|failure|missing|invalid|denied|not found|cannot|unable|exception|timeout|timed out)\b/i;
553
+ const RESULT_LINE_PATH_RE = /(?:^|[\s(])(?:\/[^\s)`]+|\.{1,2}\/[^\s)`]+|[A-Za-z]:\\\S+)/;
554
+ const RESULT_LINE_ID_RE = /\b(?:id[:=][^\s,)]+|[a-z]+-\d+\b|\d{8,})/i;
555
+ const RESULT_LINE_NEXT_ACTION_RE = /\b(?:retry|rerun|re-run|resume|check|open|use)\b/i;
556
+ const RESULT_LINE_MICROCOMPACT_TRIGGER_LINES = 6;
557
+ const RESULT_LINE_MICROCOMPACT_TRIGGER_CHARS = 500;
558
+ const RESULT_LINE_MAX_RETAINED_LINES = 8;
559
+ const RESULT_LINE_ID_REPRESENTATIVE_COUNT = 4;
560
+ const RESULT_LINE_REMAINDER_REPRESENTATIVE_COUNT = 2;
561
+ function splitResultLine(line) {
562
+ const match = RESULT_LINE_PREFIX_RE.exec(line);
563
+ if (!match)
564
+ return { prefix: '', body: line };
565
+ return { prefix: match[0], body: line.slice(match[0].length) };
566
+ }
567
+ function parseResultLineBody(body) {
568
+ return body
569
+ .split(/\r?\n/)
570
+ .map((line) => line.trimEnd())
571
+ .filter((line) => line.trim().length > 0)
572
+ .map((text, index) => ({
573
+ index,
574
+ text,
575
+ isImportantField: RESULT_LINE_IMPORTANT_FIELD_RE.test(text),
576
+ isGenericField: RESULT_LINE_GENERIC_FIELD_RE.test(text),
577
+ isSectionHeader: RESULT_LINE_SECTION_HEADER_RE.test(text),
578
+ hasErrorText: RESULT_LINE_ERROR_RE.test(text),
579
+ hasPath: RESULT_LINE_PATH_RE.test(text),
580
+ hasId: RESULT_LINE_ID_RE.test(text),
581
+ hasNextAction: RESULT_LINE_NEXT_ACTION_RE.test(text),
582
+ }));
583
+ }
584
+ function takeRepresentativeIndexes(indexes, count) {
585
+ if (indexes.length <= count)
586
+ return indexes;
587
+ const headCount = Math.ceil(count / 2);
588
+ const tailCount = Math.floor(count / 2);
589
+ return [...indexes.slice(0, headCount), ...indexes.slice(-tailCount)];
590
+ }
591
+ function appendUniqueIndexes(target, indexes, maxCount) {
592
+ for (const index of indexes) {
593
+ if (target.includes(index))
594
+ continue;
595
+ target.push(index);
596
+ if (target.length >= maxCount)
597
+ return;
598
+ }
599
+ }
600
+ function collectSectionValueIndexes(lines) {
601
+ const indexes = [];
602
+ for (let i = 0; i < lines.length - 1; i += 1) {
603
+ if (!lines[i]?.isSectionHeader)
604
+ continue;
605
+ indexes.push(lines[i + 1].index);
606
+ }
607
+ return indexes;
608
+ }
609
+ function buildResultLineOmissionMarker(omittedCount) {
610
+ return `...[omitted ${omittedCount} line${omittedCount === 1 ? '' : 's'}]`;
611
+ }
612
+ function selectInformativeResultBodyIndexes(lines) {
613
+ if (lines.length <= RESULT_LINE_MAX_RETAINED_LINES)
614
+ return lines.map((line) => line.index);
615
+ const maxCount = Math.min(RESULT_LINE_MAX_RETAINED_LINES, lines.length);
616
+ const selected = [];
617
+ appendUniqueIndexes(selected, [0], maxCount);
618
+ appendUniqueIndexes(selected, lines.filter((line) => line.isImportantField).map((line) => line.index), maxCount);
619
+ appendUniqueIndexes(selected, lines
620
+ .filter((line) => line.hasErrorText || line.hasPath || line.hasNextAction)
621
+ .map((line) => line.index), maxCount);
622
+ appendUniqueIndexes(selected, lines.filter((line) => line.isSectionHeader).map((line) => line.index), maxCount);
623
+ appendUniqueIndexes(selected, collectSectionValueIndexes(lines), maxCount);
624
+ appendUniqueIndexes(selected, takeRepresentativeIndexes(lines.filter((line) => line.hasId).map((line) => line.index), RESULT_LINE_ID_REPRESENTATIVE_COUNT), maxCount);
625
+ appendUniqueIndexes(selected, [lines.length - 1], maxCount);
626
+ appendUniqueIndexes(selected, takeRepresentativeIndexes(lines
627
+ .filter((line) => !line.isGenericField && !line.isSectionHeader && !line.hasId)
628
+ .map((line) => line.index), RESULT_LINE_REMAINDER_REPRESENTATIVE_COUNT), maxCount);
629
+ return selected.sort((a, b) => a - b);
630
+ }
631
+ function microcompactResultLine(line, maxChars) {
632
+ const { prefix, body } = splitResultLine(line);
633
+ const parsedLines = parseResultLineBody(body);
634
+ if (parsedLines.length <= 1)
635
+ return capResultLine(line, maxChars);
636
+ const shouldCompact = parsedLines.length > RESULT_LINE_MICROCOMPACT_TRIGGER_LINES
637
+ || body.length > Math.min(maxChars, RESULT_LINE_MICROCOMPACT_TRIGGER_CHARS);
638
+ if (!shouldCompact)
639
+ return capResultLine(line, maxChars);
640
+ const selectedIndexes = selectInformativeResultBodyIndexes(parsedLines);
641
+ if (selectedIndexes.length >= parsedLines.length)
642
+ return capResultLine(line, maxChars);
643
+ const selectedIndexSet = new Set(selectedIndexes);
644
+ const retainedLines = parsedLines
645
+ .filter((lineInfo) => selectedIndexSet.has(lineInfo.index))
646
+ .map((lineInfo) => lineInfo.text);
647
+ const omittedCount = parsedLines.length - retainedLines.length;
648
+ const compactedBody = `${retainedLines.join('\n')}\n${buildResultLineOmissionMarker(omittedCount)}`;
649
+ return capResultLine(`${prefix}${compactedBody}`, maxChars);
544
650
  }
545
651
  /**
546
- * Build result lines for follow-up prompts with per-line length capping.
547
- * Each line is capped at `maxChars` characters to prevent oversized payloads
548
- * from crowding out reasoning and action blocks in follow-up prompts.
652
+ * Build result lines for follow-up prompts with microcompaction before
653
+ * the final hard cap so oversized payloads preserve continuation-critical
654
+ * details without crowding out reasoning and action blocks.
549
655
  */
550
656
  export function buildCappedResultLines(results, maxChars = 1500) {
551
- return buildAllResultLines(results).map((line) => capResultLine(line, maxChars));
657
+ return buildAllResultLines(results).map((line) => microcompactResultLine(line, maxChars));
552
658
  }
553
659
  /**
554
660
  * Append display result lines to body text, automatically closing any