@ouro.bot/cli 0.1.0-alpha.564 → 0.1.0-alpha.565

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,14 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.565",
6
+ "changes": [
7
+ "Voice shared-turn readback now recovers tool-required `settle` and `speak` deliveries from acknowledged tool calls instead of treating null assistant content as an empty response.",
8
+ "Twilio phone calls now start a real agent voice turn for the call-connected greeting, so the agent greets through the voice channel rather than a hardcoded transport prompt.",
9
+ "Twilio phone no-speech transcripts such as `[BLANK_AUDIO]` now become agent-authored voice reprompts instead of being passed through as caller speech."
10
+ ]
11
+ },
4
12
  {
5
13
  "version": "0.1.0-alpha.564",
6
14
  "changes": [
@@ -61,6 +61,10 @@ const pipeline_1 = require("./pipeline");
61
61
  const mcp_manager_1 = require("../repertoire/mcp-manager");
62
62
  const runtime_1 = require("../nerves/runtime");
63
63
  const RESPONSE_CAP = 50_000;
64
+ const DELIVERY_TOOL_ACKS = new Map([
65
+ ["settle", "(delivered)"],
66
+ ["speak", "(spoken)"],
67
+ ]);
64
68
  /**
65
69
  * Strip MiniMax-style `<think>...</think>` reasoning blocks from a response
66
70
  * string. Handles unclosed open tags (treats everything from `<think>` to
@@ -84,6 +88,78 @@ function stripThinkBlocks(input) {
84
88
  }
85
89
  return out.trim();
86
90
  }
91
+ function assistantContentText(content) {
92
+ if (typeof content !== "string")
93
+ return null;
94
+ const trimmed = content.trim();
95
+ return trimmed ? trimmed : null;
96
+ }
97
+ function parseToolStringArg(toolCall, toolName, argName) {
98
+ if (!toolCall || typeof toolCall !== "object")
99
+ return null;
100
+ const fn = toolCall.function;
101
+ if (fn?.name !== toolName || typeof fn.arguments !== "string")
102
+ return null;
103
+ try {
104
+ const parsed = JSON.parse(fn.arguments);
105
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed))
106
+ return null;
107
+ const value = parsed[argName];
108
+ if (typeof value !== "string")
109
+ return null;
110
+ const trimmed = value.trim();
111
+ return trimmed ? trimmed : null;
112
+ }
113
+ catch {
114
+ return null;
115
+ }
116
+ }
117
+ function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName) {
118
+ if (typeof toolCallId !== "string" || !toolCallId.trim())
119
+ return false;
120
+ const expectedAck = DELIVERY_TOOL_ACKS.get(toolName);
121
+ for (let index = assistantIndex + 1; index < messages.length; index++) {
122
+ const message = messages[index];
123
+ if (message.role !== "tool")
124
+ return false;
125
+ if (message.tool_call_id === toolCallId
126
+ && typeof message.content === "string"
127
+ && message.content.trim() === expectedAck) {
128
+ return true;
129
+ }
130
+ }
131
+ return false;
132
+ }
133
+ function deliveredTextFromAssistantTools(messages, assistantIndex) {
134
+ const assistant = messages[assistantIndex];
135
+ if (!Array.isArray(assistant.tool_calls))
136
+ return null;
137
+ const delivered = [];
138
+ for (let index = 0; index < assistant.tool_calls.length; index++) {
139
+ const toolCall = assistant.tool_calls[index];
140
+ const toolCallId = toolCall && typeof toolCall === "object"
141
+ ? toolCall.id
142
+ : undefined;
143
+ const settleAnswer = parseToolStringArg(toolCall, "settle", "answer");
144
+ if (settleAnswer && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "settle")) {
145
+ delivered.push(settleAnswer);
146
+ continue;
147
+ }
148
+ const spokenMessage = parseToolStringArg(toolCall, "speak", "message");
149
+ if (spokenMessage && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "speak")) {
150
+ delivered.push(spokenMessage);
151
+ }
152
+ }
153
+ return delivered.length > 0 ? delivered.join("\n") : null;
154
+ }
155
+ function responseFromSessionMessages(messages) {
156
+ const assistantIndex = messages.findLastIndex((message) => message.role === "assistant");
157
+ if (assistantIndex < 0)
158
+ return null;
159
+ const assistant = messages[assistantIndex];
160
+ return assistantContentText(assistant.content)
161
+ ?? deliveredTextFromAssistantTools(messages, assistantIndex);
162
+ }
87
163
  /**
88
164
  * Run a single agent turn through the inbound pipeline.
89
165
  * Caller provides channel, session key, friend, and message;
@@ -203,13 +279,8 @@ async function runSenseTurn(options) {
203
279
  await persistPromise;
204
280
  const postTurnSession = (0, context_1.loadSession)(sessPath);
205
281
  if (postTurnSession?.messages) {
206
- const lastAssistant = [...postTurnSession.messages].reverse().find(m => m.role === "assistant");
207
- if (lastAssistant && typeof lastAssistant.content === "string" && lastAssistant.content.trim()) {
208
- finalResponse = lastAssistant.content;
209
- }
210
- else {
211
- finalResponse = "(agent responded but response was empty)";
212
- }
282
+ finalResponse = responseFromSessionMessages(postTurnSession.messages)
283
+ ?? "(agent responded but response was empty)";
213
284
  }
214
285
  else {
215
286
  finalResponse = "(agent responded but response was empty)";
@@ -48,6 +48,7 @@ const http = __importStar(require("http"));
48
48
  const path = __importStar(require("path"));
49
49
  const runtime_1 = require("../../nerves/runtime");
50
50
  const playback_1 = require("./playback");
51
+ const transcript_1 = require("./transcript");
51
52
  const turn_1 = require("./turn");
52
53
  exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
53
54
  exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
@@ -175,6 +176,34 @@ function friendIdFromCaller(from, callSid) {
175
176
  const phoneish = from.replace(/[^0-9A-Za-z]+/g, "");
176
177
  return phoneish ? `twilio-${phoneish}` : `twilio-${safeSegment(callSid)}`;
177
178
  }
179
+ function voiceFriendId(options, from, callSid) {
180
+ return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
181
+ }
182
+ function callConnectedPrompt(params) {
183
+ const from = params.From?.trim();
184
+ const to = params.To?.trim();
185
+ return [
186
+ "A Twilio phone voice call just connected.",
187
+ "This is the first audible turn in the call.",
188
+ from ? `Twilio caller ID: ${from}.` : "Twilio did not provide caller ID.",
189
+ to ? `Dialed line: ${to}.` : "Twilio did not provide the dialed line.",
190
+ "Respond through the voice channel as yourself. Greet the caller naturally and briefly, then invite them to speak.",
191
+ ].join("\n");
192
+ }
193
+ function noSpeechPrompt() {
194
+ return [
195
+ "The last Twilio phone recording contained no intelligible speech.",
196
+ "The caller is still on the line.",
197
+ "Respond through the voice channel as yourself. Briefly ask them to try again or check whether they are there.",
198
+ ].join("\n");
199
+ }
200
+ function isNoSpeechTranscript(text) {
201
+ const normalized = text.trim().replace(/[.!?]+$/g, "").toUpperCase();
202
+ return normalized === "[BLANK_AUDIO]"
203
+ || normalized === "BLANK_AUDIO"
204
+ || normalized === "[NO_SPEECH]"
205
+ || normalized === "NO_SPEECH";
206
+ }
178
207
  function parseRecordingParams(params) {
179
208
  const callSid = params.CallSid?.trim();
180
209
  const recordingSid = params.RecordingSid?.trim();
@@ -199,6 +228,42 @@ function recordAgainResponse(options, basePath, message) {
199
228
  function errorMessage(error) {
200
229
  return error instanceof Error ? error.message : String(error);
201
230
  }
231
+ function nextInputTwiml(options, basePath, mode) {
232
+ if (mode === "redirect")
233
+ return redirectTwiml(options.publicBaseUrl, basePath);
234
+ return recordTwiml({
235
+ publicBaseUrl: options.publicBaseUrl,
236
+ basePath,
237
+ timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
238
+ maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
239
+ });
240
+ }
241
+ async function runPhonePromptTurn(options) {
242
+ const transcript = (0, transcript_1.buildVoiceTranscript)({
243
+ utteranceId: options.utteranceId,
244
+ text: options.promptText,
245
+ source: "loopback",
246
+ });
247
+ const turn = await (0, turn_1.runVoiceLoopbackTurn)({
248
+ agentName: options.bridgeOptions.agentName,
249
+ friendId: options.friendId,
250
+ sessionKey: options.sessionKey,
251
+ transcript,
252
+ tts: options.bridgeOptions.tts,
253
+ runSenseTurn: options.bridgeOptions.runSenseTurn,
254
+ });
255
+ const after = nextInputTwiml(options.bridgeOptions, options.basePath, options.afterPlayback);
256
+ if (turn.tts.status !== "delivered") {
257
+ return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
258
+ }
259
+ const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
260
+ utteranceId: options.utteranceId,
261
+ delivery: turn.tts,
262
+ outputDir: options.callDir,
263
+ });
264
+ const audioUrl = routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
265
+ return xmlResponse(`${playTwiml(audioUrl)}${after}`);
266
+ }
202
267
  function computeTwilioSignature(input) {
203
268
  const payload = input.url + Object.keys(input.params)
204
269
  .sort()
@@ -244,20 +309,46 @@ function verifyRequest(options, request, params) {
244
309
  signature: headerValue(request.headers, "x-twilio-signature"),
245
310
  });
246
311
  }
247
- async function handleIncoming(options, basePath) {
248
- const greeting = options.greetingText ?? "Connected to Ouro voice. Speak after the prompt.";
312
+ async function handleIncoming(options, basePath, params) {
313
+ const callSid = params.CallSid?.trim() || "incoming";
314
+ const safeCallSid = safeSegment(callSid);
315
+ const callDir = path.join(options.outputDir, safeCallSid);
316
+ const utteranceId = `twilio-${safeCallSid}-connected`;
249
317
  (0, runtime_1.emitNervesEvent)({
250
318
  component: "senses",
251
319
  event: "senses.voice_twilio_incoming",
252
320
  message: "Twilio voice call connected",
253
- meta: { agentName: options.agentName },
321
+ meta: { agentName: options.agentName, callSid: safeCallSid },
254
322
  });
255
- return xmlResponse(`${sayTwiml(greeting)}${recordTwiml({
256
- publicBaseUrl: options.publicBaseUrl,
257
- basePath,
258
- timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
259
- maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
260
- })}`);
323
+ try {
324
+ await fs.mkdir(callDir, { recursive: true });
325
+ return await runPhonePromptTurn({
326
+ bridgeOptions: options,
327
+ basePath,
328
+ callDir,
329
+ safeCallSid,
330
+ utteranceId,
331
+ friendId: voiceFriendId(options, params.From?.trim() ?? "", callSid),
332
+ sessionKey: `twilio-${safeCallSid}`,
333
+ promptText: callConnectedPrompt(params),
334
+ afterPlayback: "record",
335
+ });
336
+ }
337
+ catch (error) {
338
+ (0, runtime_1.emitNervesEvent)({
339
+ level: "error",
340
+ component: "senses",
341
+ event: "senses.voice_twilio_incoming_error",
342
+ message: "Twilio incoming voice greeting turn failed",
343
+ meta: { agentName: options.agentName, callSid: safeCallSid, error: errorMessage(error) },
344
+ });
345
+ return xmlResponse(recordTwiml({
346
+ publicBaseUrl: options.publicBaseUrl,
347
+ basePath,
348
+ timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
349
+ maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
350
+ }));
351
+ }
261
352
  }
262
353
  async function handleListen(options, basePath) {
263
354
  return xmlResponse(recordTwiml({
@@ -304,9 +395,22 @@ async function handleRecording(options, basePath, params) {
304
395
  utteranceId,
305
396
  audioPath: inputPath,
306
397
  });
398
+ if (isNoSpeechTranscript(transcript.text)) {
399
+ return await runPhonePromptTurn({
400
+ bridgeOptions: options,
401
+ basePath,
402
+ callDir,
403
+ safeCallSid,
404
+ utteranceId: `${utteranceId}-nospeech`,
405
+ friendId: voiceFriendId(options, recording.from, recording.callSid),
406
+ sessionKey: `twilio-${safeCallSid}`,
407
+ promptText: noSpeechPrompt(),
408
+ afterPlayback: "redirect",
409
+ });
410
+ }
307
411
  const turn = await (0, turn_1.runVoiceLoopbackTurn)({
308
412
  agentName: options.agentName,
309
- friendId: options.defaultFriendId?.trim() || friendIdFromCaller(recording.from, recording.callSid),
413
+ friendId: voiceFriendId(options, recording.from, recording.callSid),
310
414
  sessionKey: `twilio-${safeCallSid}`,
311
415
  transcript,
312
416
  tts: options.tts,
@@ -401,7 +505,7 @@ function createTwilioPhoneBridge(options) {
401
505
  return textResponse(403, "invalid Twilio signature");
402
506
  }
403
507
  if (routePath === `${basePath}/incoming`)
404
- return handleIncoming(options, basePath);
508
+ return handleIncoming(options, basePath, params);
405
509
  if (routePath === `${basePath}/listen`)
406
510
  return handleListen(options, basePath);
407
511
  if (routePath === `${basePath}/recording`)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.564",
3
+ "version": "0.1.0-alpha.565",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",