discoclaw 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/voice.md +30 -2
- package/.env.example +7 -3
- package/.env.example.full +13 -32
- package/README.md +1 -1
- package/dist/cli/dashboard.js +7 -1
- package/dist/cli/dashboard.test.js +0 -4
- package/dist/cli/init-wizard.js +4 -8
- package/dist/cli/init-wizard.test.js +4 -10
- package/dist/config.js +5 -38
- package/dist/config.test.js +8 -72
- package/dist/cron/executor.js +72 -1
- package/dist/dashboard/api/metrics.js +7 -0
- package/dist/dashboard/api/metrics.test.js +16 -0
- package/dist/dashboard/api/traces.js +14 -0
- package/dist/dashboard/api/traces.test.js +40 -0
- package/dist/dashboard/page.js +187 -8
- package/dist/dashboard/server.js +82 -19
- package/dist/dashboard/server.test.js +123 -10
- package/dist/discord/actions.js +112 -6
- package/dist/discord/actions.test.js +117 -1
- package/dist/discord/deferred-runner.js +306 -219
- package/dist/discord/help-command.js +1 -1
- package/dist/discord/message-coordinator.js +4 -36
- package/dist/discord/models-command.js +1 -1
- package/dist/discord/reaction-handler.js +83 -5
- package/dist/discord/reaction-handler.test.js +55 -0
- package/dist/discord/verify-push.js +31 -36
- package/dist/discord/verify-push.test.js +34 -6
- package/dist/discord/voice-command.js +1 -31
- package/dist/discord/voice-command.test.js +21 -259
- package/dist/discord/voice-status-command.js +3 -22
- package/dist/discord/voice-status-command.test.js +16 -124
- package/dist/discord-followup.test.js +133 -0
- package/dist/health/config-doctor.js +5 -27
- package/dist/health/config-doctor.test.js +1 -4
- package/dist/index.js +15 -28
- package/dist/observability/trace-store.js +56 -0
- package/dist/observability/trace-utils.js +31 -0
- package/dist/runtime/codex-cli.js +3 -2
- package/dist/runtime/codex-cli.test.js +33 -0
- package/dist/runtime/model-tiers.js +1 -1
- package/dist/runtime/model-tiers.test.js +9 -0
- package/dist/runtime/openai-tool-schemas.js +17 -0
- package/dist/runtime-overrides.js +2 -3
- package/dist/runtime-overrides.test.js +27 -193
- package/dist/tasks/store.js +10 -6
- package/dist/tasks/store.test.js +44 -0
- package/dist/tasks/task-action-executor.test.js +162 -50
- package/dist/tasks/task-action-mutations.js +22 -2
- package/dist/tasks/task-action-read-ops.js +7 -1
- package/dist/tasks/task-action-runner-types.js +19 -1
- package/dist/voice/audio-pipeline.js +183 -96
- package/dist/voice/audio-receiver.js +8 -0
- package/dist/voice/audio-receiver.test.js +16 -0
- package/dist/voice/conversation-buffer.js +16 -6
- package/dist/voice/providers/gemini-live-provider.js +481 -0
- package/dist/voice/providers/gemini-live-provider.test.js +834 -0
- package/dist/voice/providers/gemini-live-responder.js +267 -0
- package/dist/voice/providers/gemini-live-responder.test.js +615 -0
- package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
- package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
- package/dist/voice/providers/gemini-live-types.js +32 -0
- package/dist/voice/providers/gemini-tool-mapper.js +91 -0
- package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
- package/dist/voice/providers/index.js +3 -0
- package/dist/voice/voice-prompt-builder.js +26 -17
- package/dist/voice/voice-prompt-builder.test.js +16 -1
- package/docs/configuration.md +4 -9
- package/docs/official-docs.md +6 -9
- package/docs/runtime-switching.md +1 -1
- package/package.json +1 -1
- package/dist/voice/audio-pipeline.test.js +0 -619
- package/dist/voice/stt-deepgram.js +0 -154
- package/dist/voice/stt-deepgram.test.js +0 -275
- package/dist/voice/stt-factory.js +0 -42
- package/dist/voice/stt-factory.test.js +0 -45
- package/dist/voice/stt-openai.js +0 -156
- package/dist/voice/stt-openai.test.js +0 -281
- package/dist/voice/tts-cartesia.js +0 -169
- package/dist/voice/tts-cartesia.test.js +0 -228
- package/dist/voice/tts-deepgram.js +0 -84
- package/dist/voice/tts-deepgram.test.js +0 -220
- package/dist/voice/tts-factory.js +0 -52
- package/dist/voice/tts-factory.test.js +0 -53
- package/dist/voice/tts-openai.js +0 -70
- package/dist/voice/tts-openai.test.js +0 -138
- package/dist/voice/types.test.js +0 -84
|
@@ -125,6 +125,21 @@ function estimateSection(chars) {
|
|
|
125
125
|
included: safeChars > 0,
|
|
126
126
|
};
|
|
127
127
|
}
|
|
128
|
+
function buildVoiceContextSections(parts) {
|
|
129
|
+
const sections = [];
|
|
130
|
+
sections.push(buildPromptPreamble(parts.identity, { skipTrackedTools: true }));
|
|
131
|
+
if (parts.actionsSection) {
|
|
132
|
+
sections.push(parts.actionsSection);
|
|
133
|
+
}
|
|
134
|
+
if (parts.voiceSystemPrompt) {
|
|
135
|
+
sections.push(parts.voiceSystemPrompt);
|
|
136
|
+
}
|
|
137
|
+
sections.push(VOICE_STYLE_INSTRUCTION);
|
|
138
|
+
if (parts.durableMemory) {
|
|
139
|
+
sections.push(`---\nDurable memory (user-specific notes):\n${parts.durableMemory}`);
|
|
140
|
+
}
|
|
141
|
+
return sections;
|
|
142
|
+
}
|
|
128
143
|
export function buildVoicePromptSectionEstimates(parts) {
|
|
129
144
|
const charsBySection = {
|
|
130
145
|
rootPolicy: ROOT_POLICY_CHARS,
|
|
@@ -162,28 +177,22 @@ export function buildVoicePromptSectionEstimates(parts) {
|
|
|
162
177
|
* 8. User text
|
|
163
178
|
*/
|
|
164
179
|
export function buildVoicePrompt(parts) {
|
|
165
|
-
const sections =
|
|
166
|
-
// 1. Root policy + identity.
|
|
167
|
-
sections.push(buildPromptPreamble(parts.identity, { skipTrackedTools: true }));
|
|
168
|
-
// 2. Actions section.
|
|
169
|
-
if (parts.actionsSection) {
|
|
170
|
-
sections.push(parts.actionsSection);
|
|
171
|
-
}
|
|
172
|
-
// 3. Voice system prompt (user-configurable).
|
|
173
|
-
if (parts.voiceSystemPrompt) {
|
|
174
|
-
sections.push(parts.voiceSystemPrompt);
|
|
175
|
-
}
|
|
176
|
-
// 4. Voice style instruction.
|
|
177
|
-
sections.push(VOICE_STYLE_INSTRUCTION);
|
|
178
|
-
// 5. Durable memory.
|
|
179
|
-
if (parts.durableMemory) {
|
|
180
|
-
sections.push(`---\nDurable memory (user-specific notes):\n${parts.durableMemory}`);
|
|
181
|
-
}
|
|
180
|
+
const sections = buildVoiceContextSections(parts);
|
|
182
181
|
// 6. Separator + user text.
|
|
183
182
|
sections.push(VOICE_INTERNAL_CONTEXT_SEPARATOR);
|
|
184
183
|
sections.push(parts.userText);
|
|
185
184
|
return sections.join('\n\n');
|
|
186
185
|
}
|
|
186
|
+
/**
|
|
187
|
+
* Build the static session instruction used by live voice providers.
|
|
188
|
+
*
|
|
189
|
+
* This contains the same persistent voice context as the classic per-turn
|
|
190
|
+
* prompt, excluding the current user utterance and the internal-context
|
|
191
|
+
* separator that only makes sense for single-shot prompt assembly.
|
|
192
|
+
*/
|
|
193
|
+
export function buildVoiceSystemInstruction(parts) {
|
|
194
|
+
return buildVoiceContextSections(parts).join('\n\n');
|
|
195
|
+
}
|
|
187
196
|
/**
|
|
188
197
|
* Build a follow-up prompt for voice action result processing.
|
|
189
198
|
*
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
2
|
import fs from 'node:fs/promises';
|
|
3
3
|
import path from 'node:path';
|
|
4
|
-
import { extractSections, extractSoulEssentials, extractUserEssentials, loadVoiceIdentity, buildVoicePrompt, buildVoiceFollowUpPrompt, buildVoicePromptSectionEstimates, VOICE_INTERNAL_CONTEXT_SEPARATOR, VOICE_IDENTITY_MAX_CHARS, } from './voice-prompt-builder.js';
|
|
4
|
+
import { extractSections, extractSoulEssentials, extractUserEssentials, loadVoiceIdentity, buildVoicePrompt, buildVoiceSystemInstruction, buildVoiceFollowUpPrompt, buildVoicePromptSectionEstimates, VOICE_INTERNAL_CONTEXT_SEPARATOR, VOICE_IDENTITY_MAX_CHARS, } from './voice-prompt-builder.js';
|
|
5
5
|
import { VOICE_STYLE_INSTRUCTION } from './voice-style-prompt.js';
|
|
6
6
|
import { ROOT_POLICY, TRACKED_DEFAULTS_PREAMBLE, TRACKED_TOOLS_PREAMBLE, buildPromptPreamble, } from '../discord/prompt-common.js';
|
|
7
7
|
// ---------------------------------------------------------------------------
|
|
@@ -180,6 +180,21 @@ describe('loadVoiceIdentity', () => {
|
|
|
180
180
|
expect(identityIdx).toBeLessThan(userIdx);
|
|
181
181
|
});
|
|
182
182
|
});
|
|
183
|
+
describe('buildVoiceSystemInstruction', () => {
|
|
184
|
+
it('builds the static voice context without the user-turn separator', () => {
|
|
185
|
+
const result = buildVoiceSystemInstruction({
|
|
186
|
+
identity: 'identity block',
|
|
187
|
+
durableMemory: '',
|
|
188
|
+
actionsSection: '',
|
|
189
|
+
voiceSystemPrompt: 'custom voice system prompt',
|
|
190
|
+
});
|
|
191
|
+
expect(result).toContain(buildPromptPreamble('identity block', { skipTrackedTools: true }));
|
|
192
|
+
expect(result).toContain('custom voice system prompt');
|
|
193
|
+
expect(result).toContain(VOICE_STYLE_INSTRUCTION);
|
|
194
|
+
expect(result).not.toContain(VOICE_INTERNAL_CONTEXT_SEPARATOR);
|
|
195
|
+
expect(result).not.toContain('Current user message:');
|
|
196
|
+
});
|
|
197
|
+
});
|
|
183
198
|
// ---------------------------------------------------------------------------
|
|
184
199
|
// buildVoicePrompt
|
|
185
200
|
// ---------------------------------------------------------------------------
|
package/docs/configuration.md
CHANGED
|
@@ -37,7 +37,7 @@ For npm-managed daemon installs, readiness is currently constrained by service e
|
|
|
37
37
|
Model/runtime state is intentionally split across three storage modes:
|
|
38
38
|
|
|
39
39
|
- `models.json` stores persisted model strings per role (`chat`, `fast`, `plan-run`, `voice`, forge roles, cron roles, etc.).
|
|
40
|
-
- `runtime-overrides.json` stores persisted runtime-only overlays such as `fastRuntime` and `voiceRuntime
|
|
40
|
+
- `runtime-overrides.json` stores persisted runtime-only overlays such as `fastRuntime` and `voiceRuntime`.
|
|
41
41
|
- Live chat runtime swaps stay in memory only. `!models set chat <runtime>` changes the active chat runtime immediately, but there is no persisted `chatRuntime` overlay.
|
|
42
42
|
|
|
43
43
|
On first run, `models.json` is scaffolded from the instance startup defaults. After that:
|
|
@@ -322,24 +322,19 @@ The same forum-boundary rule applies to tasks: `DISCOCLAW_TASKS_FORUM` is the di
|
|
|
322
322
|
|
|
323
323
|
## Voice
|
|
324
324
|
|
|
325
|
-
See [docs/voice.md](voice.md) for the full setup guide
|
|
325
|
+
See [docs/voice.md](voice.md) for the full Gemini Live setup guide.
|
|
326
326
|
|
|
327
327
|
| Variable | Default | Description |
|
|
328
328
|
|----------|---------|-------------|
|
|
329
329
|
| `DISCOCLAW_VOICE_ENABLED` | `false` | Master switch for voice subsystem |
|
|
330
330
|
| `DISCOCLAW_VOICE_AUTO_JOIN` | `false` | Auto-join voice channels when users enter |
|
|
331
331
|
| `ANTHROPIC_API_KEY` | — | Anthropic API key (required for direct Messages API voice responses) |
|
|
332
|
+
| `GEMINI_API_KEY` | — | Gemini API key required for Gemini Live voice |
|
|
332
333
|
| `DISCOCLAW_VOICE_MODEL` | follows startup chat model | Model override for voice responses |
|
|
333
334
|
| `DISCOCLAW_VOICE_SYSTEM_PROMPT` | — | System prompt override for voice (max 4000 chars) |
|
|
334
|
-
| `
|
|
335
|
-
| `DISCOCLAW_TTS_PROVIDER` | `cartesia` | Text-to-speech provider: `cartesia`, `deepgram`, `kokoro`, `openai` |
|
|
335
|
+
| `DISCOCLAW_GEMINI_SESSION_ROTATION_MS` | `780000` | Proactive Gemini Live session rotation interval in milliseconds |
|
|
336
336
|
| `DISCOCLAW_VOICE_HOME_CHANNEL` | — | Voice channel name or ID for prompt context |
|
|
337
337
|
| `DISCOCLAW_VOICE_LOG_CHANNEL` | `voice-log` | Text channel for transcript mirror |
|
|
338
|
-
| `DEEPGRAM_API_KEY` | — | Deepgram API key (required for Deepgram STT/TTS) |
|
|
339
|
-
| `DEEPGRAM_STT_MODEL` | `nova-3-general` | Deepgram STT model |
|
|
340
|
-
| `DEEPGRAM_TTS_VOICE` | `aura-2-asteria-en` | Deepgram TTS voice |
|
|
341
|
-
| `DEEPGRAM_TTS_SPEED` | `1.3` | Deepgram TTS playback speed multiplier (0.5–1.5) |
|
|
342
|
-
| `CARTESIA_API_KEY` | — | Cartesia API key (required for Cartesia TTS) |
|
|
343
338
|
|
|
344
339
|
## Webhook
|
|
345
340
|
|
package/docs/official-docs.md
CHANGED
|
@@ -6,8 +6,8 @@ Completeness pass for this index was cross-checked against:
|
|
|
6
6
|
|
|
7
7
|
- `package.json`
|
|
8
8
|
- `.context/runtime.md`
|
|
9
|
-
- `src/voice/
|
|
10
|
-
- `src/voice/
|
|
9
|
+
- `src/voice/audio-pipeline.ts`
|
|
10
|
+
- `src/voice/providers/gemini-live-provider.ts`
|
|
11
11
|
- `src/cold-storage/embeddings.ts`
|
|
12
12
|
- `src/cold-storage/openai-compat.ts`
|
|
13
13
|
- `src/discord/actions-imagegen.ts`
|
|
@@ -24,8 +24,8 @@ Completeness pass for this index was cross-checked against:
|
|
|
24
24
|
| Provider | What DiscoClaw uses | Official docs |
|
|
25
25
|
|----------|----------------------|---------------|
|
|
26
26
|
| Anthropic | Claude model families via `src/runtime/anthropic-rest.ts` and Claude Code CLI runtime | Models overview: <https://docs.anthropic.com/en/docs/about-claude/models/overview><br>Messages API: <https://platform.claude.com/docs/en/api/messages><br>Claude Code docs: <https://code.claude.com/docs/en/overview> |
|
|
27
|
-
| OpenAI | OpenAI-compatible runtime, Codex runtime docs,
|
|
28
|
-
| Google | Gemini API runtime and Gemini/Imagen image generation | Gemini models: <https://ai.google.dev/models/gemini><br>Gemini API docs: <https://ai.google.dev/gemini-api/docs> |
|
|
27
|
+
| OpenAI | OpenAI-compatible runtime, Codex runtime docs, embeddings, and image generation | Model IDs: <https://developers.openai.com/api/model-ids/><br>API reference overview: <https://platform.openai.com/docs/api-reference><br>Codex docs: <https://developers.openai.com/codex/><br>Codex app-server API: <https://developers.openai.com/codex/app-server> |
|
|
28
|
+
| Google | Gemini API runtime, Gemini Live voice, and Gemini/Imagen image generation | Gemini models: <https://ai.google.dev/models/gemini><br>Gemini API docs: <https://ai.google.dev/gemini-api/docs><br>Gemini Live API: <https://ai.google.dev/gemini-api/docs/live> |
|
|
29
29
|
| OpenRouter | OpenRouter runtime through `src/runtime/openai-compat.ts` | Model list: <https://openrouter.ai/models><br>API docs: <https://openrouter.ai/docs/api/reference/overview> |
|
|
30
30
|
|
|
31
31
|
## Discord
|
|
@@ -53,11 +53,8 @@ Completeness pass for this index was cross-checked against:
|
|
|
53
53
|
|
|
54
54
|
| Provider | Used in DiscoClaw | Official docs |
|
|
55
55
|
|----------|-------------------|---------------|
|
|
56
|
-
|
|
|
57
|
-
|
|
|
58
|
-
| Cartesia TTS | `src/voice/tts-cartesia.ts` with Sonic-3 over WebSocket | API docs: <https://docs.cartesia.ai/api-reference><br>TTS WebSocket: <https://docs.cartesia.ai/api-reference/tts/websocket> |
|
|
59
|
-
| OpenAI TTS | `src/voice/tts-openai.ts` (`/v1/audio/speech`, default `tts-1`) | Audio speech API reference: <https://platform.openai.com/docs/api-reference/audio/createSpeech> |
|
|
60
|
-
| OpenAI STT | `src/voice/stt-openai.ts` (`/v1/audio/transcriptions`, `whisper-1`) | Audio transcription API reference: <https://platform.openai.com/docs/api-reference/audio/createTranscription> |
|
|
56
|
+
| Gemini Live | `src/voice/audio-pipeline.ts` and the Gemini Live provider handle speech recognition, reasoning, and speech synthesis in one session | Live API overview: <https://ai.google.dev/gemini-api/docs/live><br>Realtime guide: <https://ai.google.dev/gemini-api/docs/live-guide> |
|
|
57
|
+
| Anthropic Messages API (optional voice runtime) | `!models set voice claude-api` can switch voice response generation to direct Anthropic API calls while Discord audio transport stays on Gemini Live | API overview: <https://docs.anthropic.com/en/api/messages> |
|
|
61
58
|
|
|
62
59
|
## Image Generation
|
|
63
60
|
|
|
@@ -135,7 +135,7 @@ Do not print the full `.env` into Discord, terminal transcripts, or audit logs u
|
|
|
135
135
|
|
|
136
136
|
Keep the files separate:
|
|
137
137
|
- `models.json` stores model strings per role.
|
|
138
|
-
- `runtime-overrides.json` stores runtime-only overlays such as `voiceRuntime
|
|
138
|
+
- `runtime-overrides.json` stores runtime-only overlays such as `voiceRuntime` and `fastRuntime`.
|
|
139
139
|
- There is no `chatRuntime` key because chat runtime swaps do not persist.
|
|
140
140
|
|
|
141
141
|
On first run, `models.json` is scaffolded from the startup defaults that instance booted with.
|