npm - discoclaw - Versions diffs - 0.5.7 → 0.6.0 - Mend

discoclaw 0.5.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

package/.context/dev.md +2 -2
package/.context/pa.md +20 -11
package/.context/runtime.md +36 -1
package/.context/voice.md +3 -0
package/.env.example +17 -1
package/.env.example.full +88 -9
package/README.md +36 -12
package/dist/cli/init-wizard.js +22 -0
package/dist/cli/init-wizard.test.js +47 -0
package/dist/cold-storage/chunker.js +140 -0
package/dist/cold-storage/chunker.test.js +141 -0
package/dist/cold-storage/embeddings.js +59 -0
package/dist/cold-storage/embeddings.test.js +172 -0
package/dist/cold-storage/index.js +59 -0
package/dist/cold-storage/index.test.js +131 -0
package/dist/cold-storage/openai-compat.js +62 -0
package/dist/cold-storage/openai-compat.test.js +129 -0
package/dist/cold-storage/prompt-section.js +64 -0
package/dist/cold-storage/prompt-section.test.js +107 -0
package/dist/cold-storage/store.js +246 -0
package/dist/cold-storage/store.test.js +376 -0
package/dist/cold-storage/types.js +8 -0
package/dist/config.js +96 -5
package/dist/config.test.js +161 -3
package/dist/cron/executor.js +8 -0
package/dist/cron/executor.test.js +59 -2
package/dist/discord/action-categories.js +2 -0
package/dist/discord/actions-config.js +125 -61
package/dist/discord/actions-config.test.js +36 -4
package/dist/discord/actions-defer.js +20 -2
package/dist/discord/actions-defer.test.js +270 -2
package/dist/discord/actions-forge.js +192 -8
package/dist/discord/actions-forge.test.js +91 -7
package/dist/discord/actions-memory.js +38 -4
package/dist/discord/actions-memory.test.js +88 -5
package/dist/discord/actions-messaging.js +71 -6
package/dist/discord/actions-messaging.test.js +280 -6
package/dist/discord/actions-plan.js +266 -105
package/dist/discord/actions-plan.test.js +59 -6
package/dist/discord/actions-spawn.js +117 -18
package/dist/discord/actions-spawn.test.js +609 -8
package/dist/discord/actions.js +252 -74
package/dist/discord/actions.test.js +85 -3
package/dist/discord/audit-handler.js +32 -2
package/dist/discord/audit-handler.test.js +71 -0
package/dist/discord/cold-storage-ingest.js +93 -0
package/dist/discord/cold-storage-ingest.test.js +220 -0
package/dist/discord/defer-scheduler.js +36 -6
package/dist/discord/deferred-runner.js +54 -10
package/dist/discord/deferred-runner.test.js +240 -2
package/dist/discord/durable-memory.js +117 -6
package/dist/discord/durable-memory.test.js +264 -1
package/dist/discord/forge-auto-implement.js +3 -0
package/dist/discord/forge-auto-implement.test.js +12 -0
package/dist/discord/forge-commands.js +446 -197
package/dist/discord/forge-commands.test.js +642 -50
package/dist/discord/forge-plan-registry.js +36 -7
package/dist/discord/forge-plan-registry.test.js +87 -12
package/dist/discord/health-command.js +7 -1
package/dist/discord/health-command.test.js +82 -0
package/dist/discord/image-download.js +14 -2
package/dist/discord/image-download.test.js +42 -0
package/dist/discord/long-run-watchdog.js +398 -0
package/dist/discord/long-run-watchdog.test.js +290 -0
package/dist/discord/memory-commands.js +48 -6
package/dist/discord/memory-commands.test.js +84 -1
package/dist/discord/memory-timing.integration.test.js +318 -5
package/dist/discord/message-coordinator.js +752 -132
package/dist/discord/message-coordinator.plan-run.test.js +241 -12
package/dist/discord/message-coordinator.reaction-action-ordering.test.js +72 -13
package/dist/discord/message-coordinator.reaction-cleanup.test.js +86 -3
package/dist/discord/models-command.js +5 -5
package/dist/discord/output-utils.js +129 -6
package/dist/discord/phase-status-heartbeat.js +248 -0
package/dist/discord/phase-status-heartbeat.test.js +126 -0
package/dist/discord/plan-commands.js +220 -22
package/dist/discord/plan-commands.test.js +383 -2
package/dist/discord/plan-manager.js +415 -29
package/dist/discord/plan-manager.test.js +464 -14
package/dist/discord/plan-parser.js +8 -1
package/dist/discord/plan-parser.test.js +25 -0
package/dist/discord/prompt-common.js +287 -26
package/dist/discord/prompt-common.test.js +616 -9
package/dist/discord/reaction-handler.js +227 -35
package/dist/discord/reaction-handler.test.js +439 -32
package/dist/discord/runtime-event-text-adapter.js +152 -0
package/dist/discord/runtime-event-text-adapter.test.js +241 -0
package/dist/discord/runtime-signal-budget.js +172 -0
package/dist/discord/runtime-signal-budget.test.js +68 -0
package/dist/discord/runtime-utils.js +25 -3
package/dist/discord/runtime-utils.test.js +59 -0
package/dist/discord/shutdown-context.js +48 -0
package/dist/discord/shutdown-context.test.js +283 -1
package/dist/discord/spawn-registry.js +49 -0
package/dist/discord/spawn-registry.test.js +90 -0
package/dist/discord/status-channel.js +32 -1
package/dist/discord/status-channel.test.js +98 -1
package/dist/discord/status-command.js +8 -0
package/dist/discord/status-command.test.js +18 -0
package/dist/discord/streaming-progress.js +79 -5
package/dist/discord/streaming-progress.test.js +379 -5
package/dist/discord/summarizer-recency.test.js +130 -0
package/dist/discord/summarizer.js +126 -6
package/dist/discord/summarizer.test.js +111 -1
package/dist/discord/tool-aware-queue.js +32 -6
package/dist/discord/tool-aware-queue.test.js +67 -0
package/dist/discord/update-command.js +1 -2
package/dist/discord-followup.test.js +276 -2
package/dist/discord.prompt-context.test.js +181 -12
package/dist/discord.render.test.js +107 -3
package/dist/image/resize.js +47 -0
package/dist/image/resize.test.js +145 -0
package/dist/index.js +545 -133
package/dist/index.post-connect.js +4 -0
package/dist/index.runtime.js +35 -3
package/dist/index.runtime.test.js +263 -0
package/dist/instructions/system-defaults.js +57 -0
package/dist/instructions/system-defaults.test.js +94 -0
package/dist/instructions/tracked-tools.js +59 -0
package/dist/instructions/tracked-tools.test.js +89 -0
package/dist/model-config.js +166 -0
package/dist/model-config.test.js +276 -0
package/dist/npm-managed.js +0 -1
package/dist/npm-managed.test.js +0 -1
package/dist/pipeline/engine.js +18 -1
package/dist/pipeline/engine.test.js +88 -2
package/dist/runtime/anthropic-rest.js +177 -0
package/dist/runtime/anthropic-rest.test.js +337 -0
package/dist/runtime/claude-code-cli.test.js +114 -0
package/dist/runtime/cli-adapter.js +530 -371
package/dist/runtime/cli-adapter.test.js +67 -0
package/dist/runtime/cli-shared.js +2 -1
package/dist/runtime/cli-shared.test.js +5 -0
package/dist/runtime/codex-cli.js +5 -1
package/dist/runtime/codex-cli.test.js +494 -4
package/dist/runtime/concurrency-limit.test.js +52 -0
package/dist/runtime/gemini-cli.test.js +21 -3
package/dist/runtime/global-supervisor.js +382 -0
package/dist/runtime/global-supervisor.test.js +301 -0
package/dist/runtime/long-running-process.js +156 -1
package/dist/runtime/long-running-process.test.js +74 -0
package/dist/runtime/model-smoke-helpers.js +2 -2
package/dist/runtime/model-tiers.js +25 -3
package/dist/runtime/model-tiers.test.js +49 -12
package/dist/runtime/openai-compat.js +16 -2
package/dist/runtime/openai-compat.test.js +114 -2
package/dist/runtime/openai-tool-exec.js +1207 -5
package/dist/runtime/openai-tool-exec.test.js +535 -1
package/dist/runtime/openai-tool-schemas.js +211 -14
package/dist/runtime/openai-tool-schemas.test.js +59 -4
package/dist/runtime/process-pool.js +29 -5
package/dist/runtime/process-pool.test.js +27 -0
package/dist/runtime/session-scanner.js +24 -2
package/dist/runtime/session-scanner.test.js +5 -1
package/dist/runtime/strategies/claude-strategy.js +108 -7
package/dist/runtime/strategies/codex-strategy.js +220 -13
package/dist/runtime/tools/fs-glob.js +92 -7
package/dist/runtime/tools/fs-glob.test.js +76 -1
package/dist/runtime/tools/path-security.js +7 -0
package/dist/runtime-overrides.js +2 -10
package/dist/runtime-overrides.test.js +15 -81
package/dist/tasks/task-action-mutations.js +12 -5
package/dist/tasks/task-action-thread-sync.js +4 -0
package/dist/voice/voice-prompt-builder.js +36 -3
package/dist/voice/voice-prompt-builder.test.js +60 -9
package/dist/voice/voice-responder.js +31 -7
package/dist/voice/voice-responder.test.js +10 -8
package/dist/voice/voice-sanitize.js +47 -0
package/dist/voice/voice-sanitize.test.js +30 -1
package/dist/webhook/server.js +4 -2
package/dist/webhook/server.test.js +91 -3
package/dist/workspace-bootstrap.js +100 -13
package/dist/workspace-bootstrap.test.js +283 -115
package/dist/workspace-permissions.js +6 -4
package/dist/workspace-permissions.test.js +9 -2
package/package.json +14 -4
package/templates/instructions/SYSTEM_DEFAULTS.md +102 -0
package/templates/instructions/TOOLS.md +143 -0
package/templates/workspace/AGENTS.md +14 -205
package/templates/workspace/DISCOCLAW.md +15 -0
package/templates/workspace/TOOLS.md +10 -496

package/.context/dev.md CHANGED Viewed

@@ -79,12 +79,12 @@ Two setup paths:
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `PRIMARY_RUNTIME` | `claude` | Runtime engine (`claude`, `openai`, `openrouter`, `gemini`, `codex`) |
-| `RUNTIME_MODEL` | `capable` | Model tier (`fast`, `capable`) or concrete model name passed to the CLI |
+| `RUNTIME_MODEL` | `capable` | **Deprecated — use `models.json` instead.** Model tier (`fast`, `capable`) or concrete model name passed to the CLI |
 | `RUNTIME_TOOLS` | `Bash,Read,Write,Edit,Glob,Grep,WebSearch,WebFetch` | Comma-separated tool list |
 | `RUNTIME_TIMEOUT_MS` | `1800000` | Per-invocation timeout in milliseconds |
 | `RUNTIME_FALLBACK_MODEL` | *(unset)* | Auto-fallback model when primary is overloaded (e.g. `sonnet`) |
 | `RUNTIME_MAX_BUDGET_USD` | *(unset)* | Max USD per CLI process; one-shot = per invocation, multi-turn = per session lifetime |
-| `DISCOCLAW_FAST_MODEL` | `fast` | Default "fast" model tier alias used for summarization, auto-tag, and cron parsing |
+| `DISCOCLAW_FAST_MODEL` | `fast` | **Deprecated — use `models.json` instead.** Default "fast" model tier alias used for summarization, auto-tag, and cron parsing |
 | `DISCOCLAW_RUNTIME_SESSIONS` | `1` | Persist Claude session IDs across messages |
 | `DISCOCLAW_SESSION_SCANNING` | `1` | Enable session ID scanning for resume detection |
 | `DISCOCLAW_ACTION_FOLLOWUP_DEPTH` | `3` | Max depth for chained action follow-ups |

package/.context/pa.md CHANGED Viewed

@@ -9,17 +9,20 @@ For architecture details, see `.context/architecture.md`.
 ## Workspace Files
-| File | Purpose | Loaded |
-|------|---------|--------|
-| `SOUL.md` | Core personality and values | Every prompt |
-| `IDENTITY.md` | Name and vibe | Every prompt |
-| `USER.md` | Who you're helping | Every prompt |
-| `AGENTS.md` | Your personal rules and conventions | Every prompt |
-| `TOOLS.md` | Available tools and integrations | Every prompt |
-| `MEMORY.md` | Curated long-term memory | DM prompts |
-| `BOOTSTRAP.md` | First-run onboarding (deleted after) | Once |
+| File | Purpose | Owner | Loaded |
+|------|---------|-------|--------|
+| `SOUL.md` | Core personality and values | User | Every prompt |
+| `IDENTITY.md` | Name and vibe | User | Every prompt |
+| `USER.md` | Who you're helping | User | Every prompt |
+| `templates/instructions/SYSTEM_DEFAULTS.md` | Tracked default instructions (runtime-injected) | Discoclaw repo (tracked) | Every prompt |
+| `AGENTS.md` | Personal rules and preferences | User (never overwritten) | Every prompt |
+| `TOOLS.md` | Available tools and integrations | Discoclaw | Every prompt |
+| `MEMORY.md` | Curated long-term memory | User | DM prompts |
+| `BOOTSTRAP.md` | First-run onboarding (deleted after) | User | Once |
 Templates live in `templates/workspace/` and are scaffolded on first run (copy-if-missing).
+Tracked defaults come from `templates/instructions/SYSTEM_DEFAULTS.md` and are injected at runtime.
+Legacy `workspace/DISCOCLAW.md` files are not authoritative.
 ## Operational Essentials
@@ -115,5 +118,11 @@ See `.context/memory.md` for full architecture, examples, and config reference.
 ## Customization
-These rules are generic defaults. Override or extend them in `workspace/AGENTS.md`,
-which is your personal space — not tracked by git, not overwritten on updates.
+Instruction precedence is deterministic:
+1. immutable security policy (`ROOT_POLICY`)
+2. tracked defaults (`templates/instructions/SYSTEM_DEFAULTS.md`)
+3. `workspace/AGENTS.md` overrides
+4. memory/context sections
+Customize behavior in `workspace/AGENTS.md` (user-owned, never overwritten).
+Do not rely on `workspace/DISCOCLAW.md`; defaults are sourced from the tracked template and injected at runtime.

package/.context/runtime.md CHANGED Viewed

@@ -45,7 +45,7 @@ The factory provides: subprocess tracking, process pool, stall detection, sessio
 | Strategy | File | Multi-turn | Notes |
 |----------|------|------------|-------|
 | Claude Code | `strategies/claude-strategy.ts` | process-pool | Default JSONL parsing, image support |
-| Codex CLI | `strategies/codex-strategy.ts` | session-resume | Custom JSONL (thread.started, item.completed), error sanitization; reasoning items surface in the Discord preview during streaming but are excluded from the final reply |
+| Codex CLI | `strategies/codex-strategy.ts` | session-resume | Custom JSONL (thread.started, item.completed), error sanitization; reasoning items surface in the Discord preview during streaming but are excluded from the final reply; image support via `--image` temp files |
 | Gemini CLI | `strategies/gemini-strategy.ts` | none (Phase 1) | Text-only output mode; no sessions; stdin fallback for large prompts |
 | Template | `strategies/template-strategy.ts` | — | Commented starting point for new models |
@@ -102,6 +102,39 @@ Shutdown: `killAllSubprocesses()` from `cli-adapter.ts` kills all tracked subpro
   - No tool execution, no fs tools
   - No image input/output support
+## Anthropic REST Runtime
+Direct HTTP adapter for the Anthropic Messages API — no CLI subprocess, no cold-start. Designed for latency-sensitive paths like voice where the ~2-4 s CLI bootstrap is unacceptable.
+- Adapter: `src/runtime/anthropic-rest.ts`
+- Factory: `createAnthropicRestRuntime(opts)`
+- Auth: `x-api-key` header (from `ANTHROPIC_API_KEY` env var)
+- Streaming: SSE (`stream: true`) — emits `text_delta`, `usage`, `text_final`, `done` engine events
+- Runtime ID: `claude_code` (same as CLI adapter so model tier resolution is compatible)
+- Default model: `claude-sonnet-4-6` (set at registration time in `src/index.ts`)
+- Capabilities: `streaming_text` only (no tools, no sessions)
+- Conditional registration: only registered as `'anthropic'` in the runtime registry when `ANTHROPIC_API_KEY` is set
+Env vars:
+| Var | Default | Purpose |
+|-----|---------|---------|
+| `ANTHROPIC_API_KEY` | *(required)* | API key; also gates adapter registration |
+Configurable via `AnthropicRestOpts`: `baseUrl` (default `https://api.anthropic.com`), `apiVersion` (default `2023-06-01`), `defaultMaxTokens` (default `1024`).
+### Voice auto-wiring
+When both `ANTHROPIC_API_KEY` and `DISCOCLAW_VOICE_ENABLED=1` are set, the startup path in `src/index.ts` auto-wires the Anthropic REST adapter as the voice runtime. `resolveVoiceRuntime()` checks `voiceModelRef.runtime` first, then falls back to the `'anthropic'` registry entry, then the primary CLI runtime. Model overrides are now configured in `models.json`; the voice runtime override is still in `runtime-overrides.json` (`voiceRuntime` key). The model can also be changed via the `!models` command.
+### Key files
+| File | Role |
+|------|------|
+| `src/runtime/anthropic-rest.ts` | Adapter: SSE streaming, abort/timeout, system prompt extraction |
+| `src/runtime/anthropic-rest.test.ts` | Unit tests (mocked fetch, SSE parsing, error handling, abort) |
+| `src/runtime/openai-compat.ts` | Provides `splitSystemPrompt()` used by the adapter |
 ## OpenRouter Adapter
 - Implementation: reuses `src/runtime/openai-compat.ts` with `id: 'openrouter'` — no separate adapter file needed.
@@ -279,6 +312,8 @@ When a Discord message or reaction target has image attachments (PNG, JPEG, WebP
 3. **Download** — `downloadAttachment()` fetches the image with a 10 s timeout, post-checks actual size, and returns base64.
 4. **Delivery** — The runtime adapter writes a `stream-json` stdin message containing `[{ type: 'text', text: prompt }, { type: 'image', source: { type: 'base64', ... } }, ...]`. When images are present, `--output-format` is forced to `stream-json` regardless of the configured format.
+**Codex delivery:** Codex CLI does not accept base64 image data via stdin — it requires file paths on disk via `--image <path>` flags. The Codex strategy writes each `ImageData` (base64) to a temp file before invocation and cleans up after. The full pipeline is: Discord attachment → base64 `ImageData` → temp file → `--image <path>` → cleanup.
 ### Security controls
 | Control | Detail |

package/.context/voice.md CHANGED Viewed

@@ -51,6 +51,8 @@ User speaks in Discord voice channel
 - **Allowlist gating** — `AudioReceiver` only subscribes to users in `DISCORD_ALLOW_USER_IDS`. Empty allowlist = ignore everyone (fail-closed).
 - **Dual-flag voice actions** — Voice action execution requires both `VOICE_ENABLED` and `DISCORD_ACTIONS_VOICE`. The `buildVoiceActionFlags()` function intersects a voice-specific allowlist (messaging, tasks, memory) with env config; all other action categories are hard-disabled.
+- **Queued invocations** — `VoiceResponder` queues new transcriptions when a pipeline is already in-flight instead of aborting the active AI call. Only the most recent pending text is kept (coalesced). On completion the responder drains the queue, processing the next pending transcription. This eliminates the death-spiral where CLI cold-start latency caused cascading cancellations. Barge-in still stops *playback* immediately but never cancels the running AI request.
+- **Fast invoke path** — When `ANTHROPIC_API_KEY` is set, voice auto-wires to the Anthropic REST adapter (`src/runtime/anthropic-rest.ts`) instead of the CLI subprocess path. Direct HTTP eliminates the ~2-4 s CLI cold-start, bringing first-token latency under 500 ms. The wiring happens at startup in `src/index.ts`; at invoke time `resolveVoiceRuntime()` picks the `'anthropic'` adapter from the registry. Model configuration is now in `models.json`; the voice runtime override is still in `runtime-overrides.json` (`voiceRuntime` key). The model can also be changed via the `!models` command.
 - **Generation-based cancellation** — `VoiceResponder` increments a generation counter on each new transcription. If a newer transcription arrives mid-pipeline, the older one is silently abandoned.
 - **Barge-in** — Gated on a non-empty STT transcription result, not the raw VAD `speaking.start` event. Echo from the bot's own TTS leaking through the user's mic produces empty transcriptions and is ignored. Only when `VoiceResponder.handleTranscription()` receives a non-empty transcript while the player is active does it stop playback and advance the generation counter. This eliminates false positives from echo without relying on a static grace-period timeout.
 - **Conversation ring buffer** — `ConversationBuffer` maintains a per-guild 10-turn ring buffer of user/model exchanges that gets injected into the voice prompt as formatted conversation history. Turns are appended live during a session. On voice join, the buffer backfills from recent voice-log channel messages so context carries across disconnects. The buffer is cleared when the bot leaves the voice channel.
@@ -87,4 +89,5 @@ When `voiceEnabled=true`, the post-connect block in `src/index.ts` initializes t
 | `DEEPGRAM_TTS_VOICE` | `aura-2-asteria-en` | Deepgram TTS voice name |
 | `DEEPGRAM_TTS_SPEED` | `1.3` | Deepgram TTS playback speed (range 0.5–1.5) |
 | `CARTESIA_API_KEY` | — | Required for cartesia TTS |
+| `ANTHROPIC_API_KEY` | — | Enables the Anthropic REST adapter; when set and voice is enabled, voice auto-wires to the direct Messages API path (zero CLI cold-start). See `runtime.md § Anthropic REST Runtime`. |
 | *(built-in)* | — | Telegraphic style instruction hardcoded into every voice AI invocation — front-loads the answer, strips preambles/markdown/filler, keeps responses short for TTS latency. Not an env var; not overridable by `DISCOCLAW_VOICE_SYSTEM_PROMPT`. |

package/.env.example CHANGED Viewed

@@ -59,9 +59,24 @@ DISCORD_GUILD_ID=
 #CODEX_DANGEROUSLY_BYPASS_APPROVALS_AND_SANDBOX=1
 # Optional: isolate Codex state/sessions from ~/.codex (helps avoid stale rollout DB issues):
 #CODEX_HOME=/absolute/path/to/.codex-home-discoclaw
+# Runtime launcher state hardening for CLI providers.
+# When enabled, launcher state/path errors (e.g. Codex rollout-path corruption) trigger
+# one automatic retry with CODEX_HOME set to a clean stable home.
+# Set to 0 to disable this behavior.
+#DISCOCLAW_CLI_LAUNCHER_STATE_HARDENING=1
+# Optional stable home override used by the hardening retry above.
+# Default: <discoclaw cwd>/.codex-home-discoclaw
+#DISCOCLAW_CODEX_STABLE_HOME=/absolute/path/to/.codex-home-discoclaw
 # Disable Codex session persistence/resume (workaround for session DB issues):
 #CODEX_DISABLE_SESSIONS=1
+# Emit Codex item lifecycle debug events in stream preview (item.started/item.completed + item.type):
+#DISCOCLAW_CODEX_ITEM_TYPE_DEBUG=1
+# Log each Discord preview line decision (allowed/suppressed + rendered line) to journald:
+#DISCOCLAW_DEBUG_STREAM_PREVIEW_LINES=1
+# [DEPRECATED] Model configuration has moved to models.json (managed via !models commands).
+# RUNTIME_MODEL is still read as a fallback when models.json is missing, but new deployments
+# should use `!models set chat <model>` instead. See docs for migration details.
 # Model tier: fast | capable | deep (provider-agnostic).
 # Concrete model names (e.g. opus, sonnet, gpt-4o) are still accepted as passthrough.
 #RUNTIME_MODEL=capable
@@ -117,6 +132,7 @@ DISCORD_GUILD_ID=
 # Only set this to override the auto-discovered channel.
 #DISCOCLAW_VOICE_LOG_CHANNEL=
 #DEEPGRAM_API_KEY=
+#ANTHROPIC_API_KEY=
 # ----------------------------------------------------------
 # Secret management via Discord DM

package/.env.example.full CHANGED Viewed

@@ -49,14 +49,26 @@ DISCORD_ALLOW_USER_IDS=
 #PRIMARY_RUNTIME=claude
 # --- Primary models ---
+# [DEPRECATED] Model configuration has moved to models.json (managed via !models commands).
+# RUNTIME_MODEL is still read as a fallback when models.json is missing, but new deployments
+# should use `!models set chat <model>` instead. See docs for migration details.
 # Model tier: fast | capable | deep (provider-agnostic).
 # Concrete model names (e.g. opus, sonnet, gpt-4o) are still accepted as passthrough.
 #RUNTIME_MODEL=capable
 # --- Fast-tier default ---
+# [DEPRECATED] Model configuration has moved to models.json (managed via !models commands).
+# DISCOCLAW_FAST_MODEL is still read as a fallback when models.json is missing, but new
+# deployments should use `!models set fast <model>` instead.
 # Sets the default model for all "small" tasks (summary, cron, cron auto-tag, task auto-tag).
 # Valid tiers: fast | capable | deep. Individual overrides (DISCOCLAW_SUMMARY_MODEL, etc.) still win when set.
 #DISCOCLAW_FAST_MODEL=fast
+# [DEPRECATED] Fast-tier runtime selection has moved to models.json (managed via !models commands).
+# Use `!models set fast <provider>/<model>` to route fast-tier through a different runtime —
+# the provider prefix auto-selects the runtime adapter. DISCOCLAW_FAST_RUNTIME is still read
+# as a fallback when models.json has no fast-tier entry, but new deployments should migrate.
+# Valid values: claude | gemini | codex | openai | openrouter
+#DISCOCLAW_FAST_RUNTIME=
 # --- Tier model overrides ---
 # Override the concrete model resolved for any runtime × tier combination.
@@ -177,6 +189,10 @@ DISCORD_GUILD_ID=
 # Per-category flags (only active when master switch is 1):
 #DISCOCLAW_DISCORD_ACTIONS_CHANNELS=1
 #DISCOCLAW_DISCORD_ACTIONS_MESSAGING=1
+# Comma-separated absolute directory paths allowed for the sendFile Discord action.
+# Defaults to /tmp when unset. DISCOCLAW_DATA_DIR and WORKSPACE_CWD are always
+# auto-included when configured. Symlinks are resolved via fs.realpath() before checking.
+#DISCOCLAW_SENDFILE_ALLOWED_DIRS=/tmp
 #DISCOCLAW_DISCORD_ACTIONS_GUILD=1
 # Intentionally off — moderation actions require explicit opt-in.
 #DISCOCLAW_DISCORD_ACTIONS_MODERATION=0
@@ -255,6 +271,10 @@ DISCOCLAW_DISCORD_ACTIONS_DEFER=1
 #DISCOCLAW_SUMMARY_MODEL=fast
 #DISCOCLAW_SUMMARY_MAX_CHARS=2000
 #DISCOCLAW_SUMMARY_EVERY_N_TURNS=5
+# Estimated token threshold for one-pass summary recompression.
+#DISCOCLAW_SUMMARY_MAX_TOKENS=1500
+# Compression target ratio used when recompression runs (target = max_tokens * ratio).
+#DISCOCLAW_SUMMARY_TARGET_RATIO=0.65
 # Override storage directory for rolling summaries.
 #DISCOCLAW_SUMMARY_DATA_DIR=
 # Durable per-user facts/preferences (manual via !memory commands).
@@ -287,6 +307,34 @@ DISCOCLAW_DISCORD_ACTIONS_DEFER=1
 # Character budget for recent conversation history in prompts (0 = disabled).
 #DISCOCLAW_MESSAGE_HISTORY_BUDGET=3000
+# ----------------------------------------------------------
+# Cold storage — vector-indexed conversation history (off by default)
+# ----------------------------------------------------------
+# Master switch — enables SQLite + sqlite-vec backed long-term memory.
+# When enabled, messages are chunked, embedded, and stored for semantic
+# retrieval. Matching context is injected into prompts automatically.
+#DISCOCLAW_COLD_STORAGE_ENABLED=0
+# API key for the embedding provider. Falls back to OPENAI_API_KEY when unset.
+# For the default "openai" provider, this is your OpenAI API key.
+#COLD_STORAGE_API_KEY=
+# Embedding provider: openai (default) or openai-compat (any OpenAI-compatible API).
+#COLD_STORAGE_PROVIDER=openai
+# Model name for embeddings (required for openai-compat; optional for openai).
+#COLD_STORAGE_MODEL=
+# Embedding dimensions (required for openai-compat; optional for openai).
+#COLD_STORAGE_DIMENSIONS=
+# Base URL for the embedding API (required for openai-compat; optional for openai).
+#COLD_STORAGE_BASE_URL=
+# Path to the SQLite database file. Default: <data-dir>/cold-storage.db
+#COLD_STORAGE_DB_PATH=
+# Max characters for the cold-storage prompt section injected into each prompt (default: 1500).
+#DISCOCLAW_COLD_STORAGE_INJECT_MAX_CHARS=1500
+# Max search results returned per query (default: 10).
+#DISCOCLAW_COLD_STORAGE_SEARCH_LIMIT=10
+# Comma-separated channel IDs to restrict cold-storage ingestion and retrieval.
+# When set, only messages from these channels are stored/searched. Empty = all channels.
+#COLD_STORAGE_CHANNEL_FILTER=
 # ----------------------------------------------------------
 # Bot identity
 # ----------------------------------------------------------
@@ -365,6 +413,21 @@ DISCOCLAW_DISCORD_ACTIONS_DEFER=1
 #DISCOCLAW_ACTION_FOLLOWUP_DEPTH=3
 # Timeout for runtime invocations (ms).
 #RUNTIME_TIMEOUT_MS=1800000
+# Global runtime supervisor wrapper (off by default; preserves legacy behavior).
+# When enabled, all runtime invocations run through plan -> execute -> evaluate -> decide.
+#DISCOCLAW_GLOBAL_SUPERVISOR_ENABLED=0
+# Emit supervisor cycle audit JSON on stdout or stderr.
+#DISCOCLAW_GLOBAL_SUPERVISOR_AUDIT_STREAM=stderr
+# Max supervisor cycles before forced bail (must be >= 1).
+#DISCOCLAW_GLOBAL_SUPERVISOR_MAX_CYCLES=3
+# Max retries allowed across cycles (must be >= 0).
+#DISCOCLAW_GLOBAL_SUPERVISOR_MAX_RETRIES=2
+# Max escalation prompt level applied across retries (must be >= 0).
+#DISCOCLAW_GLOBAL_SUPERVISOR_MAX_ESCALATION_LEVEL=2
+# Hard cap on total streamed events across all cycles (must be >= 1).
+#DISCOCLAW_GLOBAL_SUPERVISOR_MAX_TOTAL_EVENTS=5000
+# Wall-time cap for the full supervisor loop (ms). 0 disables wall-time cap.
+#DISCOCLAW_GLOBAL_SUPERVISOR_MAX_WALL_TIME_MS=0
 # Multi-turn mode: persistent subprocess per session, keeping session context across messages (default: 1).
 #DISCOCLAW_MULTI_TURN=1
 # Timeout (ms) before a multi-turn process is considered hung and restarted.
@@ -377,18 +440,26 @@ DISCOCLAW_DISCORD_ACTIONS_DEFER=1
 #DISCOCLAW_SESSION_SCANNING=1
 # Parse tool-use events during streaming for better progress reporting and stall suppression.
 #DISCOCLAW_TOOL_AWARE_STREAMING=1
+# Render a denser "Thinking..." streaming preview tail (more lines + wider logs + richer tool signals).
+# Helps previews update more visibly during long runs; action tags are still stripped from preview text.
+#DISCOCLAW_STREAM_PREVIEW_RAW=0
 # Stream stall detection: kill one-shot process if no stdout/stderr for this long (ms). 0 = disabled. (default: 600000)
 #DISCOCLAW_STREAM_STALL_TIMEOUT_MS=120000
 # Progress stall timeout: alert after this many ms with no progress event (ms). 0 = disabled.
 #DISCOCLAW_PROGRESS_STALL_TIMEOUT_MS=300000
 # Stream stall warning: show user-visible warning in Discord after this many ms of no events. 0 = disabled. (default: 300000)
 #DISCOCLAW_STREAM_STALL_WARNING_MS=60000
-# Post a brief "Done (Xm Ys)" completion notice as a new message after long-running runs finish.
-# Discord's unread indicator fires on new messages but not edits, so users who left the channel
-# while the bot was working will see an unread badge. Set to 0 to disable.
+# Enable long-run watchdog follow-up status updates.
+# Normal path: after the threshold delay, a deferred "Still running..." check-in is posted.
+# Recovery path: lifecycle state is persisted and swept on startup so interrupted long-running
+# runs still get a final status update after restart.
+# Persistence-first lifecycle: run completion is persisted before attempting the final Discord
+# post/edit, and finalPosted is set only after a successful post/edit. Crash boundaries may
+# duplicate follow-up/final updates, but should not omit them. Set to 0 to disable watchdog
+# follow-up posting entirely.
 #DISCOCLAW_COMPLETION_NOTIFY=1
-# Minimum elapsed time (ms) before a completion notice is sent. Runs shorter than this are
-# considered "fast" and don't need a notification. Default: 30000 (30 seconds).
+# Delay (ms) before the in-process "Still running..." follow-up timer fires. Fast runs shorter
+# than this are considered non-long-running and do not post watchdog follow-ups. Default: 30000.
 #DISCOCLAW_COMPLETION_NOTIFY_THRESHOLD_MS=30000
 # ----------------------------------------------------------
@@ -405,6 +476,10 @@ DISCOCLAW_DISCORD_ACTIONS_DEFER=1
 #PLAN_PHASE_TIMEOUT_MS=1800000
 # Max audit-fix attempts per phase before marking failed.
 #PLAN_PHASE_AUDIT_FIX_MAX=3
+# Default heartbeat interval (ms) for command-path phase progress updates
+# in `!plan run*` and `!forge`. Set to 0 to disable periodic heartbeats;
+# phase starts/transitions and the single terminal outcome still post.
+#PLAN_FORGE_HEARTBEAT_INTERVAL_MS=45000
 # Max draft-audit-revise loops before CAP_REACHED.
 #FORGE_MAX_AUDIT_ROUNDS=5
 # Model overrides for forge roles (fall back to RUNTIME_MODEL).
@@ -434,7 +509,7 @@ DISCOCLAW_DISCORD_ACTIONS_DEFER=1
 # Optional: isolate Codex state/sessions from ~/.codex (helps avoid stale rollout DB issues).
 #CODEX_HOME=/absolute/path/to/.codex-home-discoclaw
 # Default model for the Codex CLI adapter. Used when FORGE_AUDITOR_MODEL is not set.
-#CODEX_MODEL=gpt-5.3-codex
+#CODEX_MODEL=gpt-5.4
 # WARNING: disables Codex approval prompts and sandbox protections (full-access mode).
 # Equivalent to passing --dangerously-bypass-approvals-and-sandbox to codex exec.
 #CODEX_DANGEROUSLY_BYPASS_APPROVALS_AND_SANDBOX=0
@@ -540,13 +615,17 @@ DISCOCLAW_DISCORD_ACTIONS_IMAGEGEN=0
 # Leave unset to disable transcript mirroring.
 #DISCOCLAW_VOICE_LOG_CHANNEL=  # e.g. "voice-log" if using the default scaffold
 # Model for voice AI responses: tier (fast | capable | deep) or concrete name (sonnet, opus, haiku).
-# Independent of RUNTIME_MODEL — allows tuning voice latency vs quality separately from chat.
-# Switchable at runtime via `modelSet voice <model>`.
-# Default: follows DISCOCLAW_FAST_MODEL (override here for voice-specific tuning).
+# Independent of the chat model — allows tuning voice latency vs quality separately from chat.
+# Switchable at runtime via `!models set voice <model>`.
+# Default: follows the startup chat model unless overridden here.
 #DISCOCLAW_VOICE_MODEL=sonnet
 # Custom system prompt prepended to voice AI invocations. Max 4000 chars.
 # Use this to set a conversational tone, brevity instructions, or persona for voice responses.
 #DISCOCLAW_VOICE_SYSTEM_PROMPT=
+# Anthropic API key for direct Messages API access (bypasses Claude CLI cold-start).
+# When set and voice is enabled, voice invocations use the Anthropic REST adapter
+# instead of the CLI subprocess, eliminating ~2-5s cold-start latency per response.
+#ANTHROPIC_API_KEY=
 # API key for Deepgram Nova-3 STT. Required when DISCOCLAW_STT_PROVIDER=deepgram.
 #DEEPGRAM_API_KEY=
 # Deepgram STT model for voice transcription (default: nova-3-conversationalai).

package/README.md CHANGED Viewed

@@ -26,6 +26,7 @@ Your assistant carries context across every conversation, channel, and restart.
 - **Durable facts** — `!memory remember prefers dark mode` persists across sessions and channels
 - **Rolling summaries** — Compresses earlier conversation so context carries forward, even across restarts
+- **Cold storage** — Semantic search over past conversations using vector embeddings + keyword search. Relevant history is automatically retrieved and injected into the prompt (see [docs/memory.md](docs/memory.md))
 - **Per-channel context** — Each channel gets a markdown file shaping behavior (formal in #work, casual in #random)
 - **Customizable identity** — Personality, name, and values defined in workspace files (`SOUL.md`, `IDENTITY.md`, etc.)
 - **Group chat aware** — Knows when to speak up and when to stay quiet in shared channels
@@ -84,6 +85,24 @@ DiscoClaw orchestrates the flow between Discord and AI runtimes (Claude Code by
 4. Streams the response back, chunked to fit Discord's message limits
 5. Parses and executes any Discord actions the assistant emitted
+### Instruction precedence
+Prompt assembly has two layers, each with its own ordering contract.
+**Preamble precedence** — the front of every prompt, in strict priority order:
+1. **Immutable security policy** (hard-coded root rules)
+2. **Tracked defaults** (runtime-injected from `templates/instructions/SYSTEM_DEFAULTS.md`)
+3. **Tracked tools** (runtime-injected from `templates/instructions/TOOLS.md`)
+4. **User rules override** (`workspace/AGENTS.md`)
+5. **User tools override** (`workspace/TOOLS.md`, optional)
+6. **Memory/context layers** (workspace identity files, channel context, durable/rolling memory, etc.)
+**Post-preamble section ordering** — the sections between the preamble and the user message are arranged to exploit primacy bias (high-signal sections first) and recency bias (action schemas and constraints near the end, just before the user message). Low-signal data sections sit in the middle. See [`docs/prompt-ordering.md`](docs/prompt-ordering.md) for the canonical order and rationale.
+`workspace/DISCOCLAW.md` is no longer a managed or authoritative instruction source.
+If you still have a legacy copy, treat it as historical reference only.
 ### Message batching
 When multiple messages arrive while the bot is thinking (i.e., an AI invocation is already active for that session), they're automatically combined into a single prompt rather than queued individually. This means rapid follow-up messages are processed together, giving the bot full context in one shot. Commands (`!`-prefixed messages) bypass batching and are always processed individually.
@@ -96,7 +115,7 @@ Required: `OPENROUTER_API_KEY`. Optional overrides: `OPENROUTER_BASE_URL` (defau
 ## Model Overrides
-The `!models` command lets you view and swap AI models per role at runtime — no restart needed, and changes persist across restarts.
+The `!models` command lets you view and swap AI models per role at runtime — no restart needed. Changes are persisted to `models.json` under the data dir and survive restarts.
 **Roles:** `chat`, `fast`, `forge-drafter`, `forge-auditor`, `summary`, `cron`, `cron-exec`, `voice`
@@ -104,17 +123,18 @@ The `!models` command lets you view and swap AI models per role at runtime — n
 |---------|-------------|
 | `!models` | Show current model assignments |
 | `!models set <role> <model>` | Change the model for a role |
-| `!models reset` | Revert all roles to env-var defaults |
-| `!models reset <role>` | Revert a specific role |
+| `!models reset` | Revert all roles to startup defaults and clear overrides |
+| `!models reset <role>` | Revert a specific role to its startup default |
 **Examples:**
 - `!models set chat claude-sonnet-4` — use Sonnet for chat
 - `!models set chat openrouter` — switch chat to the OpenRouter runtime
 - `!models set cron-exec haiku` — run crons on a cheaper model
+- `!models set cron-exec default` — clear the cron-exec override and use the startup default again
 - `!models set voice sonnet` — use a specific model for voice
-- `!models reset` — clear all overrides
+- `!models reset` — clear all overrides and revert to startup defaults
-Setting the `chat` role to a runtime name (`openrouter`, `openai`, `gemini`, `codex`, `claude`) switches the active runtime adapter for that role.
+Setting the `chat` or `voice` role to a runtime name (`openrouter`, `openai`, `gemini`, `codex`, `claude`) switches the active runtime adapter for that role.
 ## Secret Management
@@ -165,6 +185,15 @@ When using the Claude runtime, you can connect external tool servers via MCP. Pl
 **Contributors (from source):**
 - Everything above, plus **pnpm** — enable via Corepack (`corepack enable`) or install separately
+### Model capability requirement
+DiscoClaw assumes reliable structured output for several runtime paths (for example: Discord actions, cron JSON routing, and tool-call loops).
+- For OpenAI-compatible and OpenRouter adapters, pick models that reliably support JSON-shaped output and function calling.
+- "OpenAI-compatible" API shape alone is not a capability guarantee.
+- If a model fails JSON/tool-call smoke tests, treat it as unsupported for DiscoClaw runtime use.
+- Use the [model validation smoke test checklist](docs/configuration.md#model-validation-smoke-test-recommended) before adopting a new model.
 <!-- source-of-truth: docs/discord-bot-setup.md -->
 ## Quick start
@@ -221,17 +250,12 @@ Full step-by-step guide: [docs/discord-bot-setup.md](docs/discord-bot-setup.md)
    npm install -g discoclaw
    ```
-   > **Fedora 43+ / GCC 14+ — `@discordjs/opus` build failure**
+   > **Fedora 43+ / GCC 14+ — `@discordjs/opus` build failure (resolved)**
    >
-   > GCC 14 promotes `-Wincompatible-pointer-types` to a hard error by default. The upstream opus C source triggers this, causing `npm install` to fail with an error like:
-   > ```
-   > error: incompatible pointer types passing ...
-   > ```
-   > **Workaround** — set the flag before installing:
+   > This was fixed upstream in `@discordjs/opus` 0.10.0. If you are pinned to an older version, set the flag before installing:
    > ```bash
    > CFLAGS="-Wno-error=incompatible-pointer-types" npm install -g discoclaw
    > ```
-   > This is a known upstream issue in the `@discordjs/opus` native addon. It only requires the flag override at install time; runtime behavior is unaffected.
 2. **Run the interactive setup wizard** (creates `.env` and scaffolds your workspace):
    ```bash

package/dist/cli/init-wizard.js CHANGED Viewed

@@ -44,6 +44,8 @@ export function buildEnvContent(vals, now = new Date()) {
             'GEMINI_BIN',
             'GEMINI_MODEL',
             'OPENAI_API_KEY',
+            'DISCOCLAW_FAST_RUNTIME',
+            'DISCOCLAW_TIER_OPENAI_FAST',
             'CODEX_BIN',
             'CODEX_MODEL',
             'CODEX_DANGEROUSLY_BYPASS_APPROVALS_AND_SANDBOX',
@@ -158,6 +160,19 @@ export async function runInitWizard() {
             console.log(`  Error: ${err}. Try again.\n`);
         }
     }
+    async function askOptional(prompt, validate) {
+        while (true) {
+            if (canceled)
+                return '';
+            const val = await ask(prompt);
+            if (!val.trim())
+                return '';
+            const err = validate(val.trim());
+            if (!err)
+                return val.trim();
+            console.log(`  Error: ${err}. Try again.\n`);
+        }
+    }
     // ── Welcome ──────────────────────────────────────────────────────────────
     console.log(`\nDiscoclaw Init\n==============`);
     const installDirInput = await ask(`Install directory [${cwd}]: `);
@@ -307,6 +322,13 @@ export async function runInitWizard() {
     }
     else if (finalChoice === '4') {
         values.PRIMARY_RUNTIME = 'codex';
+        const openaiFastKey = await askOptional('Optional OpenAI API key for fast tier (gpt-5-mini) [leave empty to skip]: ', () => null);
+        if (openaiFastKey) {
+            values.OPENAI_API_KEY = openaiFastKey;
+            values.DISCOCLAW_FAST_RUNTIME = 'openai';
+            values.DISCOCLAW_TIER_OPENAI_FAST = 'gpt-5-mini';
+            console.log('  Fast-tier split enabled: chat=codex, fast=openai (gpt-5-mini).');
+        }
     }
     else if (finalChoice === '5') {
         values.PRIMARY_RUNTIME = 'openrouter';

package/dist/cli/init-wizard.test.js CHANGED Viewed

@@ -106,6 +106,20 @@ describe('init wizard helpers', () => {
         expect(content).toContain('DISCOCLAW_TASKS_FORUM=1000000000000000002');
         expect(content).toContain('DISCOCLAW_CRON_FORUM=1000000000000000003');
     });
+    it('includes codex fast-runtime split keys in generated env content', () => {
+        const content = buildEnvContent({
+            DISCORD_TOKEN: 'a.b.c',
+            DISCORD_ALLOW_USER_IDS: '1000000000000000001',
+            PRIMARY_RUNTIME: 'codex',
+            OPENAI_API_KEY: 'sk-fast-key',
+            DISCOCLAW_FAST_RUNTIME: 'openai',
+            DISCOCLAW_TIER_OPENAI_FAST: 'gpt-5-mini',
+        }, new Date('2026-03-04T00:00:00.000Z'));
+        expect(content).toContain('PRIMARY_RUNTIME=codex');
+        expect(content).toContain('OPENAI_API_KEY=sk-fast-key');
+        expect(content).toContain('DISCOCLAW_FAST_RUNTIME=openai');
+        expect(content).toContain('DISCOCLAW_TIER_OPENAI_FAST=gpt-5-mini');
+    });
     it('writes DISCOCLAW_DATA_DIR in required section when provided', () => {
         const content = buildEnvContent({
             DISCORD_TOKEN: 'a.b.c',
@@ -259,6 +273,39 @@ describe('runInitWizard', () => {
         expect(newEnv).toContain('DISCOCLAW_DISCORD_ACTIONS=1');
         expect(newEnv).toContain(`DISCOCLAW_DATA_DIR=${path.join(tmpDir, 'data')}`);
     });
+    it('writes codex fast-runtime split config when provider 4 and OpenAI key are provided', async () => {
+        const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'discoclaw-init-test-'));
+        const previousCwd = process.cwd();
+        const answers = [
+            '', // install directory (default)
+            '', // Press Enter to continue
+            '', // data directory (default cwd/data)
+            'a.b.c', // DISCORD_TOKEN
+            '1000000000000000001', // DISCORD_ALLOW_USER_IDS
+            '5000000000000000001', // DISCORD_GUILD_ID
+            '4', // provider selection -> Codex
+            'sk-fast-key', // optional fast OpenAI API key
+            'n', // enable voice -> no
+        ];
+        process.chdir(tmpDir);
+        vi.mocked(createInterface).mockReturnValue(makeReadline(answers));
+        vi.mocked(execFileSync).mockImplementation(() => {
+            throw new Error('binary not found');
+        });
+        vi.mocked(ensureWorkspaceBootstrapFiles).mockResolvedValue([]);
+        vi.spyOn(console, 'log').mockImplementation(() => { });
+        try {
+            await runInitWizard();
+        }
+        finally {
+            process.chdir(previousCwd);
+        }
+        const newEnv = fs.readFileSync(path.join(tmpDir, '.env'), 'utf8');
+        expect(newEnv).toContain('PRIMARY_RUNTIME=codex');
+        expect(newEnv).toContain('OPENAI_API_KEY=sk-fast-key');
+        expect(newEnv).toContain('DISCOCLAW_FAST_RUNTIME=openai');
+        expect(newEnv).toContain('DISCOCLAW_TIER_OPENAI_FAST=gpt-5-mini');
+    });
     it('always writes DISCOCLAW_DATA_DIR when a custom path is given', async () => {
         const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'discoclaw-init-test-'));
         const previousCwd = process.cwd();