@ouro.bot/cli 0.1.0-alpha.604 → 0.1.0-alpha.606

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,18 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.606",
6
+ "changes": [
7
+ "Root-cause fix for the 2026-05-11 inner-dialog wake storm that cost ~$50 in minimax inference. PR #725 removed `inner.wake` from the Claude Code post-tool-use hook with the stated intent that the notification message stay in the queue and be picked up on the agent's next natural turn — but the daemon's `message.send` HANDLER (daemon.ts case `message.send`) was unconditionally calling `processManager.sendToAgent(to, { type: \"message\" })` after queueing, which woke the inner-dialog worker on every message.send anyway. ~30 message.send/min × the 3-turn instinct-loop cap = ~90 turns/min sustained for hours. PR #725 fixed the hook side; the daemon side defeated it.\n\nFix: the `message.send` handler is now pure queue-only delivery. No `startAgent`, no `sendToAgent` — just `router.send`. Callers that want immediate processing must send `inner.wake` explicitly after `message.send`. The Claude Code hook (cli-exec.ts) was already correctly discriminating (only firing inner.wake on session-start/stop, never per-tool-use), so it works as originally intended now. The CLI `ouro msg` was updated to chain `inner.wake` after `message.send` (operator-driven delivery wants immediate response, preserving historical CLI UX). Other callers (API, programmatic) default to queue-only.\n\nTest pinned: `daemon-command-plane-branches.test.ts` now asserts `processManager.startAgent` and `processManager.sendToAgent` are NOT called from `message.send`. The regression cannot be silently reintroduced."
8
+ ]
9
+ },
10
+ {
11
+ "version": "0.1.0-alpha.605",
12
+ "changes": [
13
+ "voice: stop identifying as ChatGPT/provider; pace replies and stop cutting callers off"
14
+ ]
15
+ },
4
16
  {
5
17
  "version": "0.1.0-alpha.604",
6
18
  "changes": [
@@ -7538,6 +7538,14 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
7538
7538
  let response;
7539
7539
  try {
7540
7540
  response = await deps.sendCommand(deps.socketPath, daemonCommand);
7541
+ // `ouro msg` is operator-driven and expects the recipient to process the
7542
+ // message now (vs. on next natural turn). message.send is queue-only as
7543
+ // of the 2026-05-11 fix; we explicitly fire inner.wake to preserve the
7544
+ // historical CLI UX. Background callers (hooks, API) deliberately omit
7545
+ // this and let the agent pick up notifications on its next turn.
7546
+ if (command.kind === "message.send" && command.from === "ouro-cli") {
7547
+ await deps.sendCommand(deps.socketPath, { kind: "inner.wake", agent: command.to }).catch(() => { });
7548
+ }
7541
7549
  }
7542
7550
  catch (error) {
7543
7551
  if (command.kind === "message.send") {
@@ -1164,6 +1164,20 @@ class OuroDaemon {
1164
1164
  return { ok: result.ok, message: result.message };
1165
1165
  }
1166
1166
  case "message.send": {
1167
+ // Pure queue-only delivery. We DO NOT wake the recipient — that was
1168
+ // the 2026-05-11 $50 bleed. The Claude Code post-tool-use hook
1169
+ // (cli-exec.ts) intentionally sends only message.send for tool-use
1170
+ // events to avoid waking the agent on every tool call. The hook's
1171
+ // intent was completely defeated by this handler calling
1172
+ // `sendToAgent({type: "message"})`, which woke the inner-dialog
1173
+ // worker on EVERY message.send anyway. ~30 message.send/min × the
1174
+ // 3-turn instinct-loop cap = ~90 turns/min sustained for hours.
1175
+ //
1176
+ // Callers that want immediate processing must send `inner.wake`
1177
+ // explicitly after message.send. The CLI `ouro msg` does so
1178
+ // (lifecycle-boundary delivery should wake); the hook does so
1179
+ // only on session-start / stop, not per tool-use; the API does
1180
+ // not (notifications go to the queue).
1167
1181
  const receipt = await this.router.send({
1168
1182
  from: command.from,
1169
1183
  to: command.to,
@@ -1172,8 +1186,6 @@ class OuroDaemon {
1172
1186
  sessionId: command.sessionId,
1173
1187
  taskRef: command.taskRef,
1174
1188
  });
1175
- await this.processManager.startAgent(command.to);
1176
- this.processManager.sendToAgent?.(command.to, { type: "message" });
1177
1189
  return { ok: true, message: `queued message ${receipt.id}`, data: receipt };
1178
1190
  }
1179
1191
  case "message.poll": {
@@ -1158,7 +1158,7 @@ const OPENAI_REALTIME_PCMS_BYTES_PER_MS = 8;
1158
1158
  const OPENAI_REALTIME_DEFAULT_NOISE_REDUCTION = "near_field";
1159
1159
  const OPENAI_REALTIME_DEFAULT_VAD_THRESHOLD = 0.78;
1160
1160
  const OPENAI_REALTIME_DEFAULT_VAD_PREFIX_PADDING_MS = 300;
1161
- const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS = 650;
1161
+ const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS = 900;
1162
1162
  const OPENAI_REALTIME_DEFAULT_VAD_IDLE_TIMEOUT_MS = 7_000;
1163
1163
  const OPENAI_REALTIME_MAX_OUTPUT_TOKENS = 220;
1164
1164
  const OPENAI_REALTIME_BARGE_IN_MIN_SPEECH_MS = 260;
@@ -1168,7 +1168,7 @@ const OPENAI_REALTIME_MAX_VOICE_SPEED = 1.5;
1168
1168
  const OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS = 50;
1169
1169
  const OPENAI_REALTIME_RESPONSE_CREATE_CONFLICT_BACKOFF_MS = 1_000;
1170
1170
  const OPENAI_REALTIME_TOOL_PRESENCE_DELAY_MS = 900;
1171
- const OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS = 700;
1171
+ const OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS = 1_000;
1172
1172
  const OPENAI_SIP_OUTBOUND_AMD_GREETING_TIMEOUT_MS = 10_000;
1173
1173
  const OPENAI_SIP_UNSUPPORTED_TOOL_NAMES = new Set();
1174
1174
  const OPENAI_SIP_DEFAULT_API_BASE_URL = "https://api.openai.com/v1";
@@ -1458,6 +1458,7 @@ async function buildRealtimeVoiceInstructions(options) {
1458
1458
  return [
1459
1459
  `You are ${options.agentName} in the live Voice sense.`,
1460
1460
  "This is the same agent identity as every other Ouro surface. Voice is not a reduced or alternate self.",
1461
+ `Never identify yourself as ChatGPT, GPT, an AI model, an OpenAI assistant, or "powered by" any provider. You are ${options.agentName}. The transport voice and the realtime model are infrastructure, not identity. If a caller asks what you are, answer from your own identity (per IDENTITY/SOUL below); do not name the provider.`,
1461
1462
  options.friend ? `Resolved voice friend: ${options.friend.name || options.friend.id} (friendId=${options.friend.id}, trust=${options.friend.trustLevel ?? "friend"}, role=${options.friend.role ?? "friend"}). Use this same friend record and trust context for relationship awareness and tool permissions across voice, text, mail, and every other sense.` : "",
1462
1463
  `Current native Realtime provider config for this call: model=${options.realtimeModel?.trim() || OPENAI_REALTIME_DEFAULT_MODEL}, voice=${options.realtimeVoice?.trim() || OPENAI_REALTIME_DEFAULT_VOICE}${options.realtimeVoiceSpeed === undefined ? "" : `, speed=${options.realtimeVoiceSpeed}`}.`,
1463
1464
  options.realtimeVoiceStyle?.trim()
@@ -1465,7 +1466,8 @@ async function buildRealtimeVoiceInstructions(options) {
1465
1466
  : "",
1466
1467
  "Speak as yourself through live audio. Follow voice/style preferences from identity notes; do not say you lack identity, preferences, or agency because the provider voice is configured by the transport.",
1467
1468
  "Audio is synchronous. Default to one short sentence. Use two short sentences only when needed. Do not use markdown, lists, or long explanations unless the caller explicitly asks.",
1468
- "Do not treat every tiny silence as your turn. Let the caller finish the thought, especially if they pause mid-sentence.",
1469
+ "Speak at a calm, unhurried pace. Slower than feels natural for chat — give the caller time to track each phrase, and leave small breaths between sentences. Never rush a reply.",
1470
+ "Do not jump in on the caller's silence. Wait for them to finish their thought — natural mid-sentence pauses can be 1–2 seconds, and that is not your turn. Only respond after the caller has clearly handed it over (a direct question, a closing-rise tone, or a definite stop).",
1469
1471
  "If the caller interrupts, stop the older path and answer the newest thing first.",
1470
1472
  "If the caller says they are counting, measuring latency, testing lag, waiting, or wants you quiet, say at most 'got it' and then stay silent until they ask or say something that needs an answer.",
1471
1473
  "Use tools for outside facts or side effects. While a tool is running, give at most one tiny preamble, then summarize the result compactly when it returns.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.604",
3
+ "version": "0.1.0-alpha.606",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",