npm - @ouro.bot/cli - Versions diffs - 0.1.0-alpha.604 → 0.1.0-alpha.606 - Mend

@ouro.bot/cli 0.1.0-alpha.604 → 0.1.0-alpha.606

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/changelog.json +12 -0
package/dist/heart/daemon/cli-exec.js +8 -0
package/dist/heart/daemon/daemon.js +14 -2
package/dist/senses/voice/twilio-phone.js +5 -3
package/package.json +1 -1

package/changelog.json CHANGED Viewed

@@ -1,6 +1,18 @@
 {
   "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
   "versions": [
+    {
+      "version": "0.1.0-alpha.606",
+      "changes": [
+        "Root-cause fix for the 2026-05-11 inner-dialog wake storm that cost ~$50 in minimax inference. PR #725 removed `inner.wake` from the Claude Code post-tool-use hook with the stated intent that the notification message stay in the queue and be picked up on the agent's next natural turn — but the daemon's `message.send` HANDLER (daemon.ts case `message.send`) was unconditionally calling `processManager.sendToAgent(to, { type: \"message\" })` after queueing, which woke the inner-dialog worker on every message.send anyway. ~30 message.send/min × the 3-turn instinct-loop cap = ~90 turns/min sustained for hours. PR #725 fixed the hook side; the daemon side defeated it.\n\nFix: the `message.send` handler is now pure queue-only delivery. No `startAgent`, no `sendToAgent` — just `router.send`. Callers that want immediate processing must send `inner.wake` explicitly after `message.send`. The Claude Code hook (cli-exec.ts) was already correctly discriminating (only firing inner.wake on session-start/stop, never per-tool-use), so it works as originally intended now. The CLI `ouro msg` was updated to chain `inner.wake` after `message.send` (operator-driven delivery wants immediate response, preserving historical CLI UX). Other callers (API, programmatic) default to queue-only.\n\nTest pinned: `daemon-command-plane-branches.test.ts` now asserts `processManager.startAgent` and `processManager.sendToAgent` are NOT called from `message.send`. The regression cannot be silently reintroduced."
+      ]
+    },
+    {
+      "version": "0.1.0-alpha.605",
+      "changes": [
+        "voice: stop identifying as ChatGPT/provider; pace replies and stop cutting callers off"
+      ]
+    },
     {
       "version": "0.1.0-alpha.604",
       "changes": [

package/dist/heart/daemon/cli-exec.js CHANGED Viewed

@@ -7538,6 +7538,14 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
     let response;
     try {
         response = await deps.sendCommand(deps.socketPath, daemonCommand);
+        // `ouro msg` is operator-driven and expects the recipient to process the
+        // message now (vs. on next natural turn). message.send is queue-only as
+        // of the 2026-05-11 fix; we explicitly fire inner.wake to preserve the
+        // historical CLI UX. Background callers (hooks, API) deliberately omit
+        // this and let the agent pick up notifications on its next turn.
+        if (command.kind === "message.send" && command.from === "ouro-cli") {
+            await deps.sendCommand(deps.socketPath, { kind: "inner.wake", agent: command.to }).catch(() => { });
+        }
     }
     catch (error) {
         if (command.kind === "message.send") {

package/dist/heart/daemon/daemon.js CHANGED Viewed

@@ -1164,6 +1164,20 @@ class OuroDaemon {
                 return { ok: result.ok, message: result.message };
             }
             case "message.send": {
+                // Pure queue-only delivery. We DO NOT wake the recipient — that was
+                // the 2026-05-11 $50 bleed. The Claude Code post-tool-use hook
+                // (cli-exec.ts) intentionally sends only message.send for tool-use
+                // events to avoid waking the agent on every tool call. The hook's
+                // intent was completely defeated by this handler calling
+                // `sendToAgent({type: "message"})`, which woke the inner-dialog
+                // worker on EVERY message.send anyway. ~30 message.send/min × the
+                // 3-turn instinct-loop cap = ~90 turns/min sustained for hours.
+                //
+                // Callers that want immediate processing must send `inner.wake`
+                // explicitly after message.send. The CLI `ouro msg` does so
+                // (lifecycle-boundary delivery should wake); the hook does so
+                // only on session-start / stop, not per tool-use; the API does
+                // not (notifications go to the queue).
                 const receipt = await this.router.send({
                     from: command.from,
                     to: command.to,
@@ -1172,8 +1186,6 @@ class OuroDaemon {
                     sessionId: command.sessionId,
                     taskRef: command.taskRef,
                 });
-                await this.processManager.startAgent(command.to);
-                this.processManager.sendToAgent?.(command.to, { type: "message" });
                 return { ok: true, message: `queued message ${receipt.id}`, data: receipt };
             }
             case "message.poll": {

package/dist/senses/voice/twilio-phone.js CHANGED Viewed

@@ -1158,7 +1158,7 @@ const OPENAI_REALTIME_PCMS_BYTES_PER_MS = 8;
 const OPENAI_REALTIME_DEFAULT_NOISE_REDUCTION = "near_field";
 const OPENAI_REALTIME_DEFAULT_VAD_THRESHOLD = 0.78;
 const OPENAI_REALTIME_DEFAULT_VAD_PREFIX_PADDING_MS = 300;
-const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS = 650;
+const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS = 900;
 const OPENAI_REALTIME_DEFAULT_VAD_IDLE_TIMEOUT_MS = 7_000;
 const OPENAI_REALTIME_MAX_OUTPUT_TOKENS = 220;
 const OPENAI_REALTIME_BARGE_IN_MIN_SPEECH_MS = 260;
@@ -1168,7 +1168,7 @@ const OPENAI_REALTIME_MAX_VOICE_SPEED = 1.5;
 const OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS = 50;
 const OPENAI_REALTIME_RESPONSE_CREATE_CONFLICT_BACKOFF_MS = 1_000;
 const OPENAI_REALTIME_TOOL_PRESENCE_DELAY_MS = 900;
-const OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS = 700;
+const OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS = 1_000;
 const OPENAI_SIP_OUTBOUND_AMD_GREETING_TIMEOUT_MS = 10_000;
 const OPENAI_SIP_UNSUPPORTED_TOOL_NAMES = new Set();
 const OPENAI_SIP_DEFAULT_API_BASE_URL = "https://api.openai.com/v1";
@@ -1458,6 +1458,7 @@ async function buildRealtimeVoiceInstructions(options) {
     return [
         `You are ${options.agentName} in the live Voice sense.`,
         "This is the same agent identity as every other Ouro surface. Voice is not a reduced or alternate self.",
+        `Never identify yourself as ChatGPT, GPT, an AI model, an OpenAI assistant, or "powered by" any provider. You are ${options.agentName}. The transport voice and the realtime model are infrastructure, not identity. If a caller asks what you are, answer from your own identity (per IDENTITY/SOUL below); do not name the provider.`,
         options.friend ? `Resolved voice friend: ${options.friend.name || options.friend.id} (friendId=${options.friend.id}, trust=${options.friend.trustLevel ?? "friend"}, role=${options.friend.role ?? "friend"}). Use this same friend record and trust context for relationship awareness and tool permissions across voice, text, mail, and every other sense.` : "",
         `Current native Realtime provider config for this call: model=${options.realtimeModel?.trim() || OPENAI_REALTIME_DEFAULT_MODEL}, voice=${options.realtimeVoice?.trim() || OPENAI_REALTIME_DEFAULT_VOICE}${options.realtimeVoiceSpeed === undefined ? "" : `, speed=${options.realtimeVoiceSpeed}`}.`,
         options.realtimeVoiceStyle?.trim()
@@ -1465,7 +1466,8 @@ async function buildRealtimeVoiceInstructions(options) {
             : "",
         "Speak as yourself through live audio. Follow voice/style preferences from identity notes; do not say you lack identity, preferences, or agency because the provider voice is configured by the transport.",
         "Audio is synchronous. Default to one short sentence. Use two short sentences only when needed. Do not use markdown, lists, or long explanations unless the caller explicitly asks.",
-        "Do not treat every tiny silence as your turn. Let the caller finish the thought, especially if they pause mid-sentence.",
+        "Speak at a calm, unhurried pace. Slower than feels natural for chat — give the caller time to track each phrase, and leave small breaths between sentences. Never rush a reply.",
+        "Do not jump in on the caller's silence. Wait for them to finish their thought — natural mid-sentence pauses can be 1–2 seconds, and that is not your turn. Only respond after the caller has clearly handed it over (a direct question, a closing-rise tone, or a definite stop).",
         "If the caller interrupts, stop the older path and answer the newest thing first.",
         "If the caller says they are counting, measuring latency, testing lag, waiting, or wants you quiet, say at most 'got it' and then stay silent until they ask or say something that needs an answer.",
         "Use tools for outside facts or side effects. While a tool is running, give at most one tiny preamble, then summarize the result compactly when it returns.",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ouro.bot/cli",
-  "version": "0.1.0-alpha.604",
+  "version": "0.1.0-alpha.606",
   "main": "dist/heart/daemon/ouro-entry.js",
   "bin": {
     "cli": "dist/heart/daemon/ouro-bot-entry.js",