@ouro.bot/cli 0.1.0-alpha.604 → 0.1.0-alpha.606
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json
CHANGED
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.606",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Root-cause fix for the 2026-05-11 inner-dialog wake storm that cost ~$50 in minimax inference. PR #725 removed `inner.wake` from the Claude Code post-tool-use hook with the stated intent that the notification message stay in the queue and be picked up on the agent's next natural turn — but the daemon's `message.send` HANDLER (daemon.ts case `message.send`) was unconditionally calling `processManager.sendToAgent(to, { type: \"message\" })` after queueing, which woke the inner-dialog worker on every message.send anyway. ~30 message.send/min × the 3-turn instinct-loop cap = ~90 turns/min sustained for hours. PR #725 fixed the hook side; the daemon side defeated it.\n\nFix: the `message.send` handler is now pure queue-only delivery. No `startAgent`, no `sendToAgent` — just `router.send`. Callers that want immediate processing must send `inner.wake` explicitly after `message.send`. The Claude Code hook (cli-exec.ts) was already correctly discriminating (only firing inner.wake on session-start/stop, never per-tool-use), so it works as originally intended now. The CLI `ouro msg` was updated to chain `inner.wake` after `message.send` (operator-driven delivery wants immediate response, preserving historical CLI UX). Other callers (API, programmatic) default to queue-only.\n\nTest pinned: `daemon-command-plane-branches.test.ts` now asserts `processManager.startAgent` and `processManager.sendToAgent` are NOT called from `message.send`. The regression cannot be silently reintroduced."
|
|
8
|
+
]
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"version": "0.1.0-alpha.605",
|
|
12
|
+
"changes": [
|
|
13
|
+
"voice: stop identifying as ChatGPT/provider; pace replies and stop cutting callers off"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
4
16
|
{
|
|
5
17
|
"version": "0.1.0-alpha.604",
|
|
6
18
|
"changes": [
|
|
@@ -7538,6 +7538,14 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
|
|
|
7538
7538
|
let response;
|
|
7539
7539
|
try {
|
|
7540
7540
|
response = await deps.sendCommand(deps.socketPath, daemonCommand);
|
|
7541
|
+
// `ouro msg` is operator-driven and expects the recipient to process the
|
|
7542
|
+
// message now (vs. on next natural turn). message.send is queue-only as
|
|
7543
|
+
// of the 2026-05-11 fix; we explicitly fire inner.wake to preserve the
|
|
7544
|
+
// historical CLI UX. Background callers (hooks, API) deliberately omit
|
|
7545
|
+
// this and let the agent pick up notifications on its next turn.
|
|
7546
|
+
if (command.kind === "message.send" && command.from === "ouro-cli") {
|
|
7547
|
+
await deps.sendCommand(deps.socketPath, { kind: "inner.wake", agent: command.to }).catch(() => { });
|
|
7548
|
+
}
|
|
7541
7549
|
}
|
|
7542
7550
|
catch (error) {
|
|
7543
7551
|
if (command.kind === "message.send") {
|
|
@@ -1164,6 +1164,20 @@ class OuroDaemon {
|
|
|
1164
1164
|
return { ok: result.ok, message: result.message };
|
|
1165
1165
|
}
|
|
1166
1166
|
case "message.send": {
|
|
1167
|
+
// Pure queue-only delivery. We DO NOT wake the recipient — that was
|
|
1168
|
+
// the 2026-05-11 $50 bleed. The Claude Code post-tool-use hook
|
|
1169
|
+
// (cli-exec.ts) intentionally sends only message.send for tool-use
|
|
1170
|
+
// events to avoid waking the agent on every tool call. The hook's
|
|
1171
|
+
// intent was completely defeated by this handler calling
|
|
1172
|
+
// `sendToAgent({type: "message"})`, which woke the inner-dialog
|
|
1173
|
+
// worker on EVERY message.send anyway. ~30 message.send/min × the
|
|
1174
|
+
// 3-turn instinct-loop cap = ~90 turns/min sustained for hours.
|
|
1175
|
+
//
|
|
1176
|
+
// Callers that want immediate processing must send `inner.wake`
|
|
1177
|
+
// explicitly after message.send. The CLI `ouro msg` does so
|
|
1178
|
+
// (lifecycle-boundary delivery should wake); the hook does so
|
|
1179
|
+
// only on session-start / stop, not per tool-use; the API does
|
|
1180
|
+
// not (notifications go to the queue).
|
|
1167
1181
|
const receipt = await this.router.send({
|
|
1168
1182
|
from: command.from,
|
|
1169
1183
|
to: command.to,
|
|
@@ -1172,8 +1186,6 @@ class OuroDaemon {
|
|
|
1172
1186
|
sessionId: command.sessionId,
|
|
1173
1187
|
taskRef: command.taskRef,
|
|
1174
1188
|
});
|
|
1175
|
-
await this.processManager.startAgent(command.to);
|
|
1176
|
-
this.processManager.sendToAgent?.(command.to, { type: "message" });
|
|
1177
1189
|
return { ok: true, message: `queued message ${receipt.id}`, data: receipt };
|
|
1178
1190
|
}
|
|
1179
1191
|
case "message.poll": {
|
|
@@ -1158,7 +1158,7 @@ const OPENAI_REALTIME_PCMS_BYTES_PER_MS = 8;
|
|
|
1158
1158
|
const OPENAI_REALTIME_DEFAULT_NOISE_REDUCTION = "near_field";
|
|
1159
1159
|
const OPENAI_REALTIME_DEFAULT_VAD_THRESHOLD = 0.78;
|
|
1160
1160
|
const OPENAI_REALTIME_DEFAULT_VAD_PREFIX_PADDING_MS = 300;
|
|
1161
|
-
const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS =
|
|
1161
|
+
const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS = 900;
|
|
1162
1162
|
const OPENAI_REALTIME_DEFAULT_VAD_IDLE_TIMEOUT_MS = 7_000;
|
|
1163
1163
|
const OPENAI_REALTIME_MAX_OUTPUT_TOKENS = 220;
|
|
1164
1164
|
const OPENAI_REALTIME_BARGE_IN_MIN_SPEECH_MS = 260;
|
|
@@ -1168,7 +1168,7 @@ const OPENAI_REALTIME_MAX_VOICE_SPEED = 1.5;
|
|
|
1168
1168
|
const OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS = 50;
|
|
1169
1169
|
const OPENAI_REALTIME_RESPONSE_CREATE_CONFLICT_BACKOFF_MS = 1_000;
|
|
1170
1170
|
const OPENAI_REALTIME_TOOL_PRESENCE_DELAY_MS = 900;
|
|
1171
|
-
const OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS =
|
|
1171
|
+
const OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS = 1_000;
|
|
1172
1172
|
const OPENAI_SIP_OUTBOUND_AMD_GREETING_TIMEOUT_MS = 10_000;
|
|
1173
1173
|
const OPENAI_SIP_UNSUPPORTED_TOOL_NAMES = new Set();
|
|
1174
1174
|
const OPENAI_SIP_DEFAULT_API_BASE_URL = "https://api.openai.com/v1";
|
|
@@ -1458,6 +1458,7 @@ async function buildRealtimeVoiceInstructions(options) {
|
|
|
1458
1458
|
return [
|
|
1459
1459
|
`You are ${options.agentName} in the live Voice sense.`,
|
|
1460
1460
|
"This is the same agent identity as every other Ouro surface. Voice is not a reduced or alternate self.",
|
|
1461
|
+
`Never identify yourself as ChatGPT, GPT, an AI model, an OpenAI assistant, or "powered by" any provider. You are ${options.agentName}. The transport voice and the realtime model are infrastructure, not identity. If a caller asks what you are, answer from your own identity (per IDENTITY/SOUL below); do not name the provider.`,
|
|
1461
1462
|
options.friend ? `Resolved voice friend: ${options.friend.name || options.friend.id} (friendId=${options.friend.id}, trust=${options.friend.trustLevel ?? "friend"}, role=${options.friend.role ?? "friend"}). Use this same friend record and trust context for relationship awareness and tool permissions across voice, text, mail, and every other sense.` : "",
|
|
1462
1463
|
`Current native Realtime provider config for this call: model=${options.realtimeModel?.trim() || OPENAI_REALTIME_DEFAULT_MODEL}, voice=${options.realtimeVoice?.trim() || OPENAI_REALTIME_DEFAULT_VOICE}${options.realtimeVoiceSpeed === undefined ? "" : `, speed=${options.realtimeVoiceSpeed}`}.`,
|
|
1463
1464
|
options.realtimeVoiceStyle?.trim()
|
|
@@ -1465,7 +1466,8 @@ async function buildRealtimeVoiceInstructions(options) {
|
|
|
1465
1466
|
: "",
|
|
1466
1467
|
"Speak as yourself through live audio. Follow voice/style preferences from identity notes; do not say you lack identity, preferences, or agency because the provider voice is configured by the transport.",
|
|
1467
1468
|
"Audio is synchronous. Default to one short sentence. Use two short sentences only when needed. Do not use markdown, lists, or long explanations unless the caller explicitly asks.",
|
|
1468
|
-
"
|
|
1469
|
+
"Speak at a calm, unhurried pace. Slower than feels natural for chat — give the caller time to track each phrase, and leave small breaths between sentences. Never rush a reply.",
|
|
1470
|
+
"Do not jump in on the caller's silence. Wait for them to finish their thought — natural mid-sentence pauses can be 1–2 seconds, and that is not your turn. Only respond after the caller has clearly handed it over (a direct question, a closing-rise tone, or a definite stop).",
|
|
1469
1471
|
"If the caller interrupts, stop the older path and answer the newest thing first.",
|
|
1470
1472
|
"If the caller says they are counting, measuring latency, testing lag, waiting, or wants you quiet, say at most 'got it' and then stay silent until they ask or say something that needs an answer.",
|
|
1471
1473
|
"Use tools for outside facts or side effects. While a tool is running, give at most one tiny preamble, then summarize the result compactly when it returns.",
|