npm - discoclaw - Versions diffs - 1.2.4 → 2.0.0 - Mend

discoclaw 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/.context/voice.md +30 -2
package/.env.example +7 -3
package/.env.example.full +13 -32
package/README.md +1 -1
package/dist/cli/dashboard.js +7 -1
package/dist/cli/dashboard.test.js +0 -4
package/dist/cli/init-wizard.js +4 -8
package/dist/cli/init-wizard.test.js +4 -10
package/dist/config.js +5 -38
package/dist/config.test.js +8 -72
package/dist/cron/executor.js +72 -1
package/dist/dashboard/api/metrics.js +7 -0
package/dist/dashboard/api/metrics.test.js +16 -0
package/dist/dashboard/api/traces.js +14 -0
package/dist/dashboard/api/traces.test.js +40 -0
package/dist/dashboard/page.js +187 -8
package/dist/dashboard/server.js +82 -19
package/dist/dashboard/server.test.js +123 -10
package/dist/discord/actions.js +112 -6
package/dist/discord/actions.test.js +117 -1
package/dist/discord/deferred-runner.js +306 -219
package/dist/discord/help-command.js +1 -1
package/dist/discord/message-coordinator.js +4 -36
package/dist/discord/models-command.js +1 -1
package/dist/discord/reaction-handler.js +83 -5
package/dist/discord/reaction-handler.test.js +55 -0
package/dist/discord/verify-push.js +31 -36
package/dist/discord/verify-push.test.js +34 -6
package/dist/discord/voice-command.js +1 -31
package/dist/discord/voice-command.test.js +21 -259
package/dist/discord/voice-status-command.js +3 -22
package/dist/discord/voice-status-command.test.js +16 -124
package/dist/discord-followup.test.js +133 -0
package/dist/health/config-doctor.js +5 -27
package/dist/health/config-doctor.test.js +1 -4
package/dist/index.js +15 -28
package/dist/observability/trace-store.js +56 -0
package/dist/observability/trace-utils.js +31 -0
package/dist/runtime/codex-cli.js +3 -2
package/dist/runtime/codex-cli.test.js +33 -0
package/dist/runtime/model-tiers.js +1 -1
package/dist/runtime/model-tiers.test.js +9 -0
package/dist/runtime/openai-tool-schemas.js +17 -0
package/dist/runtime-overrides.js +2 -3
package/dist/runtime-overrides.test.js +27 -193
package/dist/tasks/store.js +10 -6
package/dist/tasks/store.test.js +44 -0
package/dist/tasks/task-action-executor.test.js +162 -50
package/dist/tasks/task-action-mutations.js +22 -2
package/dist/tasks/task-action-read-ops.js +7 -1
package/dist/tasks/task-action-runner-types.js +19 -1
package/dist/voice/audio-pipeline.js +183 -96
package/dist/voice/audio-receiver.js +8 -0
package/dist/voice/audio-receiver.test.js +16 -0
package/dist/voice/conversation-buffer.js +16 -6
package/dist/voice/providers/gemini-live-provider.js +481 -0
package/dist/voice/providers/gemini-live-provider.test.js +834 -0
package/dist/voice/providers/gemini-live-responder.js +267 -0
package/dist/voice/providers/gemini-live-responder.test.js +615 -0
package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
package/dist/voice/providers/gemini-live-types.js +32 -0
package/dist/voice/providers/gemini-tool-mapper.js +91 -0
package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
package/dist/voice/providers/index.js +3 -0
package/dist/voice/voice-prompt-builder.js +26 -17
package/dist/voice/voice-prompt-builder.test.js +16 -1
package/docs/configuration.md +4 -9
package/docs/official-docs.md +6 -9
package/docs/runtime-switching.md +1 -1
package/package.json +1 -1
package/dist/voice/audio-pipeline.test.js +0 -619
package/dist/voice/stt-deepgram.js +0 -154
package/dist/voice/stt-deepgram.test.js +0 -275
package/dist/voice/stt-factory.js +0 -42
package/dist/voice/stt-factory.test.js +0 -45
package/dist/voice/stt-openai.js +0 -156
package/dist/voice/stt-openai.test.js +0 -281
package/dist/voice/tts-cartesia.js +0 -169
package/dist/voice/tts-cartesia.test.js +0 -228
package/dist/voice/tts-deepgram.js +0 -84
package/dist/voice/tts-deepgram.test.js +0 -220
package/dist/voice/tts-factory.js +0 -52
package/dist/voice/tts-factory.test.js +0 -53
package/dist/voice/tts-openai.js +0 -70
package/dist/voice/tts-openai.test.js +0 -138
package/dist/voice/types.test.js +0 -84

package/dist/voice/voice-prompt-builder.js CHANGED Viewed

@@ -125,6 +125,21 @@ function estimateSection(chars) {
         included: safeChars > 0,
     };
 }
+function buildVoiceContextSections(parts) {
+    const sections = [];
+    sections.push(buildPromptPreamble(parts.identity, { skipTrackedTools: true }));
+    if (parts.actionsSection) {
+        sections.push(parts.actionsSection);
+    }
+    if (parts.voiceSystemPrompt) {
+        sections.push(parts.voiceSystemPrompt);
+    }
+    sections.push(VOICE_STYLE_INSTRUCTION);
+    if (parts.durableMemory) {
+        sections.push(`---\nDurable memory (user-specific notes):\n${parts.durableMemory}`);
+    }
+    return sections;
+}
 export function buildVoicePromptSectionEstimates(parts) {
     const charsBySection = {
         rootPolicy: ROOT_POLICY_CHARS,
@@ -162,28 +177,22 @@ export function buildVoicePromptSectionEstimates(parts) {
  * 8. User text
  */
 export function buildVoicePrompt(parts) {
-    const sections = [];
-    // 1. Root policy + identity.
-    sections.push(buildPromptPreamble(parts.identity, { skipTrackedTools: true }));
-    // 2. Actions section.
-    if (parts.actionsSection) {
-        sections.push(parts.actionsSection);
-    }
-    // 3. Voice system prompt (user-configurable).
-    if (parts.voiceSystemPrompt) {
-        sections.push(parts.voiceSystemPrompt);
-    }
-    // 4. Voice style instruction.
-    sections.push(VOICE_STYLE_INSTRUCTION);
-    // 5. Durable memory.
-    if (parts.durableMemory) {
-        sections.push(`---\nDurable memory (user-specific notes):\n${parts.durableMemory}`);
-    }
+    const sections = buildVoiceContextSections(parts);
     // 6. Separator + user text.
     sections.push(VOICE_INTERNAL_CONTEXT_SEPARATOR);
     sections.push(parts.userText);
     return sections.join('\n\n');
 }
+/**
+ * Build the static session instruction used by live voice providers.
+ *
+ * This contains the same persistent voice context as the classic per-turn
+ * prompt, excluding the current user utterance and the internal-context
+ * separator that only makes sense for single-shot prompt assembly.
+ */
+export function buildVoiceSystemInstruction(parts) {
+    return buildVoiceContextSections(parts).join('\n\n');
+}
 /**
  * Build a follow-up prompt for voice action result processing.
  *

package/dist/voice/voice-prompt-builder.test.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import fs from 'node:fs/promises';
 import path from 'node:path';
-import { extractSections, extractSoulEssentials, extractUserEssentials, loadVoiceIdentity, buildVoicePrompt, buildVoiceFollowUpPrompt, buildVoicePromptSectionEstimates, VOICE_INTERNAL_CONTEXT_SEPARATOR, VOICE_IDENTITY_MAX_CHARS, } from './voice-prompt-builder.js';
+import { extractSections, extractSoulEssentials, extractUserEssentials, loadVoiceIdentity, buildVoicePrompt, buildVoiceSystemInstruction, buildVoiceFollowUpPrompt, buildVoicePromptSectionEstimates, VOICE_INTERNAL_CONTEXT_SEPARATOR, VOICE_IDENTITY_MAX_CHARS, } from './voice-prompt-builder.js';
 import { VOICE_STYLE_INSTRUCTION } from './voice-style-prompt.js';
 import { ROOT_POLICY, TRACKED_DEFAULTS_PREAMBLE, TRACKED_TOOLS_PREAMBLE, buildPromptPreamble, } from '../discord/prompt-common.js';
 // ---------------------------------------------------------------------------
@@ -180,6 +180,21 @@ describe('loadVoiceIdentity', () => {
         expect(identityIdx).toBeLessThan(userIdx);
     });
 });
+describe('buildVoiceSystemInstruction', () => {
+    it('builds the static voice context without the user-turn separator', () => {
+        const result = buildVoiceSystemInstruction({
+            identity: 'identity block',
+            durableMemory: '',
+            actionsSection: '',
+            voiceSystemPrompt: 'custom voice system prompt',
+        });
+        expect(result).toContain(buildPromptPreamble('identity block', { skipTrackedTools: true }));
+        expect(result).toContain('custom voice system prompt');
+        expect(result).toContain(VOICE_STYLE_INSTRUCTION);
+        expect(result).not.toContain(VOICE_INTERNAL_CONTEXT_SEPARATOR);
+        expect(result).not.toContain('Current user message:');
+    });
+});
 // ---------------------------------------------------------------------------
 // buildVoicePrompt
 // ---------------------------------------------------------------------------

package/docs/configuration.md CHANGED Viewed

@@ -37,7 +37,7 @@ For npm-managed daemon installs, readiness is currently constrained by service e
 Model/runtime state is intentionally split across three storage modes:
 - `models.json` stores persisted model strings per role (`chat`, `fast`, `plan-run`, `voice`, forge roles, cron roles, etc.).
-- `runtime-overrides.json` stores persisted runtime-only overlays such as `fastRuntime` and `voiceRuntime` (plus non-model keys such as `ttsVoice`).
+- `runtime-overrides.json` stores persisted runtime-only overlays such as `fastRuntime` and `voiceRuntime`.
 - Live chat runtime swaps stay in memory only. `!models set chat <runtime>` changes the active chat runtime immediately, but there is no persisted `chatRuntime` overlay.
 On first run, `models.json` is scaffolded from the instance startup defaults. After that:
@@ -322,24 +322,19 @@ The same forum-boundary rule applies to tasks: `DISCOCLAW_TASKS_FORUM` is the di
 ## Voice
-See [docs/voice.md](voice.md) for the full setup guide and provider details.
+See [docs/voice.md](voice.md) for the full Gemini Live setup guide.
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `DISCOCLAW_VOICE_ENABLED` | `false` | Master switch for voice subsystem |
 | `DISCOCLAW_VOICE_AUTO_JOIN` | `false` | Auto-join voice channels when users enter |
 | `ANTHROPIC_API_KEY` | — | Anthropic API key (required for direct Messages API voice responses) |
+| `GEMINI_API_KEY` | — | Gemini API key required for Gemini Live voice |
 | `DISCOCLAW_VOICE_MODEL` | follows startup chat model | Model override for voice responses |
 | `DISCOCLAW_VOICE_SYSTEM_PROMPT` | — | System prompt override for voice (max 4000 chars) |
-| `DISCOCLAW_STT_PROVIDER` | `deepgram` | Speech-to-text provider: `deepgram`, `whisper`, `openai` |
-| `DISCOCLAW_TTS_PROVIDER` | `cartesia` | Text-to-speech provider: `cartesia`, `deepgram`, `kokoro`, `openai` |
+| `DISCOCLAW_GEMINI_SESSION_ROTATION_MS` | `780000` | Proactive Gemini Live session rotation interval in milliseconds |
 | `DISCOCLAW_VOICE_HOME_CHANNEL` | — | Voice channel name or ID for prompt context |
 | `DISCOCLAW_VOICE_LOG_CHANNEL` | `voice-log` | Text channel for transcript mirror |
-| `DEEPGRAM_API_KEY` | — | Deepgram API key (required for Deepgram STT/TTS) |
-| `DEEPGRAM_STT_MODEL` | `nova-3-general` | Deepgram STT model |
-| `DEEPGRAM_TTS_VOICE` | `aura-2-asteria-en` | Deepgram TTS voice |
-| `DEEPGRAM_TTS_SPEED` | `1.3` | Deepgram TTS playback speed multiplier (0.5–1.5) |
-| `CARTESIA_API_KEY` | — | Cartesia API key (required for Cartesia TTS) |
 ## Webhook

package/docs/official-docs.md CHANGED Viewed

@@ -6,8 +6,8 @@ Completeness pass for this index was cross-checked against:
 - `package.json`
 - `.context/runtime.md`
-- `src/voice/tts-factory.ts`
-- `src/voice/stt-factory.ts`
+- `src/voice/audio-pipeline.ts`
+- `src/voice/providers/gemini-live-provider.ts`
 - `src/cold-storage/embeddings.ts`
 - `src/cold-storage/openai-compat.ts`
 - `src/discord/actions-imagegen.ts`
@@ -24,8 +24,8 @@ Completeness pass for this index was cross-checked against:
 | Provider | What DiscoClaw uses | Official docs |
 |----------|----------------------|---------------|
 | Anthropic | Claude model families via `src/runtime/anthropic-rest.ts` and Claude Code CLI runtime | Models overview: <https://docs.anthropic.com/en/docs/about-claude/models/overview><br>Messages API: <https://platform.claude.com/docs/en/api/messages><br>Claude Code docs: <https://code.claude.com/docs/en/overview> |
-| OpenAI | OpenAI-compatible runtime, Codex runtime docs, OpenAI voice, embeddings, and image generation | Model IDs: <https://developers.openai.com/api/model-ids/><br>API reference overview: <https://platform.openai.com/docs/api-reference><br>Codex docs: <https://developers.openai.com/codex/><br>Codex app-server API: <https://developers.openai.com/codex/app-server> |
-| Google | Gemini API runtime and Gemini/Imagen image generation | Gemini models: <https://ai.google.dev/models/gemini><br>Gemini API docs: <https://ai.google.dev/gemini-api/docs> |
+| OpenAI | OpenAI-compatible runtime, Codex runtime docs, embeddings, and image generation | Model IDs: <https://developers.openai.com/api/model-ids/><br>API reference overview: <https://platform.openai.com/docs/api-reference><br>Codex docs: <https://developers.openai.com/codex/><br>Codex app-server API: <https://developers.openai.com/codex/app-server> |
+| Google | Gemini API runtime, Gemini Live voice, and Gemini/Imagen image generation | Gemini models: <https://ai.google.dev/models/gemini><br>Gemini API docs: <https://ai.google.dev/gemini-api/docs><br>Gemini Live API: <https://ai.google.dev/gemini-api/docs/live> |
 | OpenRouter | OpenRouter runtime through `src/runtime/openai-compat.ts` | Model list: <https://openrouter.ai/models><br>API docs: <https://openrouter.ai/docs/api/reference/overview> |
 ## Discord
@@ -53,11 +53,8 @@ Completeness pass for this index was cross-checked against:
 | Provider | Used in DiscoClaw | Official docs |
 |----------|-------------------|---------------|
-| Deepgram STT | `src/voice/stt-deepgram.ts` with Nova-3 streaming (`nova-3-general`) | STT API overview: <https://developers.deepgram.com/docs/speech-to-text><br>Streaming API: <https://developers.deepgram.com/reference/speech-to-text/listen-streaming><br>Nova-3 models: <https://developers.deepgram.com/docs/models-languages-overview> |
-| Deepgram TTS | `src/voice/tts-deepgram.ts` with Aura (`aura-2-asteria-en`) | TTS API overview: <https://developers.deepgram.com/docs/text-to-speech><br>Speak endpoint: <https://developers.deepgram.com/reference/text-to-speech/speak-streaming><br>Aura voices/models: <https://developers.deepgram.com/docs/tts-models> |
-| Cartesia TTS | `src/voice/tts-cartesia.ts` with Sonic-3 over WebSocket | API docs: <https://docs.cartesia.ai/api-reference><br>TTS WebSocket: <https://docs.cartesia.ai/api-reference/tts/websocket> |
-| OpenAI TTS | `src/voice/tts-openai.ts` (`/v1/audio/speech`, default `tts-1`) | Audio speech API reference: <https://platform.openai.com/docs/api-reference/audio/createSpeech> |
-| OpenAI STT | `src/voice/stt-openai.ts` (`/v1/audio/transcriptions`, `whisper-1`) | Audio transcription API reference: <https://platform.openai.com/docs/api-reference/audio/createTranscription> |
+| Gemini Live | `src/voice/audio-pipeline.ts` and the Gemini Live provider handle speech recognition, reasoning, and speech synthesis in one session | Live API overview: <https://ai.google.dev/gemini-api/docs/live><br>Realtime guide: <https://ai.google.dev/gemini-api/docs/live-guide> |
+| Anthropic Messages API (optional voice runtime) | `!models set voice claude-api` can switch voice response generation to direct Anthropic API calls while Discord audio transport stays on Gemini Live | API overview: <https://docs.anthropic.com/en/api/messages> |
 ## Image Generation

package/docs/runtime-switching.md CHANGED Viewed

@@ -135,7 +135,7 @@ Do not print the full `.env` into Discord, terminal transcripts, or audit logs u
 Keep the files separate:
 - `models.json` stores model strings per role.
-- `runtime-overrides.json` stores runtime-only overlays such as `voiceRuntime`, `fastRuntime`, and `ttsVoice`.
+- `runtime-overrides.json` stores runtime-only overlays such as `voiceRuntime` and `fastRuntime`.
 - There is no `chatRuntime` key because chat runtime swaps do not persist.
 On first run, `models.json` is scaffolded from the startup defaults that instance booted with.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "discoclaw",
-  "version": "1.2.4",
+  "version": "2.0.0",
   "description": "Personal AI orchestrator that turns Discord into a persistent workspace",
   "license": "MIT",
   "keywords": [