@ouro.bot/cli 0.1.0-alpha.564 → 0.1.0-alpha.566
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +15 -0
- package/dist/senses/shared-turn.js +95 -7
- package/dist/senses/voice/twilio-phone.js +115 -11
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.566",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Shared sense delivery recovery is now an exported contract so replaying transports such as Voice/Twilio recover tool-required `settle`/`speak` output only after outward delivery acknowledgements.",
|
|
8
|
+
"Sense development docs now spell out the sense/transport boundary and the tool-required delivery rules for future Voice, meeting, Twilio, and other adapter work."
|
|
9
|
+
]
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"version": "0.1.0-alpha.565",
|
|
13
|
+
"changes": [
|
|
14
|
+
"Voice shared-turn readback now recovers tool-required `settle` and `speak` deliveries from acknowledged tool calls instead of treating null assistant content as an empty response.",
|
|
15
|
+
"Twilio phone calls now start a real agent voice turn for the call-connected greeting, so the agent greets through the voice channel rather than a hardcoded transport prompt.",
|
|
16
|
+
"Twilio phone no-speech transcripts such as `[BLANK_AUDIO]` now become agent-authored voice reprompts instead of being passed through as caller speech."
|
|
17
|
+
]
|
|
18
|
+
},
|
|
4
19
|
{
|
|
5
20
|
"version": "0.1.0-alpha.564",
|
|
6
21
|
"changes": [
|
|
@@ -40,6 +40,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
40
40
|
})();
|
|
41
41
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
42
|
exports.stripThinkBlocks = stripThinkBlocks;
|
|
43
|
+
exports.extractOutwardSenseDeliveryText = extractOutwardSenseDeliveryText;
|
|
43
44
|
exports.runSenseTurn = runSenseTurn;
|
|
44
45
|
const os = __importStar(require("os"));
|
|
45
46
|
const path = __importStar(require("path"));
|
|
@@ -61,6 +62,10 @@ const pipeline_1 = require("./pipeline");
|
|
|
61
62
|
const mcp_manager_1 = require("../repertoire/mcp-manager");
|
|
62
63
|
const runtime_1 = require("../nerves/runtime");
|
|
63
64
|
const RESPONSE_CAP = 50_000;
|
|
65
|
+
const OUTWARD_DELIVERY_TOOL_ACKS = new Map([
|
|
66
|
+
["settle", "(delivered)"],
|
|
67
|
+
["speak", "(spoken)"],
|
|
68
|
+
]);
|
|
64
69
|
/**
|
|
65
70
|
* Strip MiniMax-style `<think>...</think>` reasoning blocks from a response
|
|
66
71
|
* string. Handles unclosed open tags (treats everything from `<think>` to
|
|
@@ -84,6 +89,94 @@ function stripThinkBlocks(input) {
|
|
|
84
89
|
}
|
|
85
90
|
return out.trim();
|
|
86
91
|
}
|
|
92
|
+
function assistantContentText(content) {
|
|
93
|
+
if (typeof content !== "string")
|
|
94
|
+
return null;
|
|
95
|
+
const trimmed = content.trim();
|
|
96
|
+
return trimmed ? trimmed : null;
|
|
97
|
+
}
|
|
98
|
+
function parseToolStringArg(toolCall, toolName, argName) {
|
|
99
|
+
if (!toolCall || typeof toolCall !== "object")
|
|
100
|
+
return null;
|
|
101
|
+
const fn = toolCall.function;
|
|
102
|
+
if (fn?.name !== toolName || typeof fn.arguments !== "string")
|
|
103
|
+
return null;
|
|
104
|
+
try {
|
|
105
|
+
const parsed = JSON.parse(fn.arguments);
|
|
106
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed))
|
|
107
|
+
return null;
|
|
108
|
+
const value = parsed[argName];
|
|
109
|
+
if (typeof value !== "string")
|
|
110
|
+
return null;
|
|
111
|
+
const trimmed = value.trim();
|
|
112
|
+
return trimmed ? trimmed : null;
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName) {
|
|
119
|
+
if (typeof toolCallId !== "string" || !toolCallId.trim())
|
|
120
|
+
return false;
|
|
121
|
+
const expectedAck = OUTWARD_DELIVERY_TOOL_ACKS.get(toolName);
|
|
122
|
+
for (let index = assistantIndex + 1; index < messages.length; index++) {
|
|
123
|
+
const message = messages[index];
|
|
124
|
+
if (message.role !== "tool")
|
|
125
|
+
return false;
|
|
126
|
+
if (message.tool_call_id === toolCallId
|
|
127
|
+
&& typeof message.content === "string"
|
|
128
|
+
&& message.content.trim() === expectedAck) {
|
|
129
|
+
return true;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
function outwardDeliveryTextFromAssistantTools(messages, assistantIndex) {
|
|
135
|
+
const assistant = messages[assistantIndex];
|
|
136
|
+
if (!Array.isArray(assistant.tool_calls))
|
|
137
|
+
return null;
|
|
138
|
+
const delivered = [];
|
|
139
|
+
for (let index = 0; index < assistant.tool_calls.length; index++) {
|
|
140
|
+
const toolCall = assistant.tool_calls[index];
|
|
141
|
+
const toolCallId = toolCall && typeof toolCall === "object"
|
|
142
|
+
? toolCall.id
|
|
143
|
+
: undefined;
|
|
144
|
+
const settleAnswer = parseToolStringArg(toolCall, "settle", "answer");
|
|
145
|
+
if (settleAnswer && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "settle")) {
|
|
146
|
+
delivered.push(settleAnswer);
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
const spokenMessage = parseToolStringArg(toolCall, "speak", "message");
|
|
150
|
+
if (spokenMessage && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "speak")) {
|
|
151
|
+
delivered.push(spokenMessage);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return delivered.length > 0 ? delivered.join("\n") : null;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Recover the text that actually reached a friend in an outward sense turn.
|
|
158
|
+
*
|
|
159
|
+
* Ouro runs outward channels in tool-required mode. That means the visible
|
|
160
|
+
* response may be a `settle({ answer })` or `speak({ message })` tool call
|
|
161
|
+
* whose assistant message has `content: null`. The authoritative delivery
|
|
162
|
+
* signal is the following tool ack:
|
|
163
|
+
*
|
|
164
|
+
* - `(delivered)` for `settle.answer`
|
|
165
|
+
* - `(spoken)` for `speak.message`
|
|
166
|
+
*
|
|
167
|
+
* Inner-dialog `(settled)`, malformed tool arguments, rejected tools, and
|
|
168
|
+
* interrupted tool-call sequences are not outward speech. Sense transports
|
|
169
|
+
* that need to replay the turn later (Voice/Twilio TTS, future meeting audio)
|
|
170
|
+
* should use this helper instead of reading `assistant.content` directly.
|
|
171
|
+
*/
|
|
172
|
+
function extractOutwardSenseDeliveryText(messages) {
|
|
173
|
+
const assistantIndex = messages.findLastIndex((message) => message.role === "assistant");
|
|
174
|
+
if (assistantIndex < 0)
|
|
175
|
+
return null;
|
|
176
|
+
const assistant = messages[assistantIndex];
|
|
177
|
+
return assistantContentText(assistant.content)
|
|
178
|
+
?? outwardDeliveryTextFromAssistantTools(messages, assistantIndex);
|
|
179
|
+
}
|
|
87
180
|
/**
|
|
88
181
|
* Run a single agent turn through the inbound pipeline.
|
|
89
182
|
* Caller provides channel, session key, friend, and message;
|
|
@@ -203,13 +296,8 @@ async function runSenseTurn(options) {
|
|
|
203
296
|
await persistPromise;
|
|
204
297
|
const postTurnSession = (0, context_1.loadSession)(sessPath);
|
|
205
298
|
if (postTurnSession?.messages) {
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
finalResponse = lastAssistant.content;
|
|
209
|
-
}
|
|
210
|
-
else {
|
|
211
|
-
finalResponse = "(agent responded but response was empty)";
|
|
212
|
-
}
|
|
299
|
+
finalResponse = extractOutwardSenseDeliveryText(postTurnSession.messages)
|
|
300
|
+
?? "(agent responded but response was empty)";
|
|
213
301
|
}
|
|
214
302
|
else {
|
|
215
303
|
finalResponse = "(agent responded but response was empty)";
|
|
@@ -48,6 +48,7 @@ const http = __importStar(require("http"));
|
|
|
48
48
|
const path = __importStar(require("path"));
|
|
49
49
|
const runtime_1 = require("../../nerves/runtime");
|
|
50
50
|
const playback_1 = require("./playback");
|
|
51
|
+
const transcript_1 = require("./transcript");
|
|
51
52
|
const turn_1 = require("./turn");
|
|
52
53
|
exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
|
|
53
54
|
exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
|
|
@@ -175,6 +176,34 @@ function friendIdFromCaller(from, callSid) {
|
|
|
175
176
|
const phoneish = from.replace(/[^0-9A-Za-z]+/g, "");
|
|
176
177
|
return phoneish ? `twilio-${phoneish}` : `twilio-${safeSegment(callSid)}`;
|
|
177
178
|
}
|
|
179
|
+
function voiceFriendId(options, from, callSid) {
|
|
180
|
+
return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
|
|
181
|
+
}
|
|
182
|
+
function callConnectedPrompt(params) {
|
|
183
|
+
const from = params.From?.trim();
|
|
184
|
+
const to = params.To?.trim();
|
|
185
|
+
return [
|
|
186
|
+
"A Twilio phone voice call just connected.",
|
|
187
|
+
"This is the first audible turn in the call.",
|
|
188
|
+
from ? `Twilio caller ID: ${from}.` : "Twilio did not provide caller ID.",
|
|
189
|
+
to ? `Dialed line: ${to}.` : "Twilio did not provide the dialed line.",
|
|
190
|
+
"Respond through the voice channel as yourself. Greet the caller naturally and briefly, then invite them to speak.",
|
|
191
|
+
].join("\n");
|
|
192
|
+
}
|
|
193
|
+
function noSpeechPrompt() {
|
|
194
|
+
return [
|
|
195
|
+
"The last Twilio phone recording contained no intelligible speech.",
|
|
196
|
+
"The caller is still on the line.",
|
|
197
|
+
"Respond through the voice channel as yourself. Briefly ask them to try again or check whether they are there.",
|
|
198
|
+
].join("\n");
|
|
199
|
+
}
|
|
200
|
+
function isNoSpeechTranscript(text) {
|
|
201
|
+
const normalized = text.trim().replace(/[.!?]+$/g, "").toUpperCase();
|
|
202
|
+
return normalized === "[BLANK_AUDIO]"
|
|
203
|
+
|| normalized === "BLANK_AUDIO"
|
|
204
|
+
|| normalized === "[NO_SPEECH]"
|
|
205
|
+
|| normalized === "NO_SPEECH";
|
|
206
|
+
}
|
|
178
207
|
function parseRecordingParams(params) {
|
|
179
208
|
const callSid = params.CallSid?.trim();
|
|
180
209
|
const recordingSid = params.RecordingSid?.trim();
|
|
@@ -199,6 +228,42 @@ function recordAgainResponse(options, basePath, message) {
|
|
|
199
228
|
function errorMessage(error) {
|
|
200
229
|
return error instanceof Error ? error.message : String(error);
|
|
201
230
|
}
|
|
231
|
+
function nextInputTwiml(options, basePath, mode) {
|
|
232
|
+
if (mode === "redirect")
|
|
233
|
+
return redirectTwiml(options.publicBaseUrl, basePath);
|
|
234
|
+
return recordTwiml({
|
|
235
|
+
publicBaseUrl: options.publicBaseUrl,
|
|
236
|
+
basePath,
|
|
237
|
+
timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
|
|
238
|
+
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
async function runPhonePromptTurn(options) {
|
|
242
|
+
const transcript = (0, transcript_1.buildVoiceTranscript)({
|
|
243
|
+
utteranceId: options.utteranceId,
|
|
244
|
+
text: options.promptText,
|
|
245
|
+
source: "loopback",
|
|
246
|
+
});
|
|
247
|
+
const turn = await (0, turn_1.runVoiceLoopbackTurn)({
|
|
248
|
+
agentName: options.bridgeOptions.agentName,
|
|
249
|
+
friendId: options.friendId,
|
|
250
|
+
sessionKey: options.sessionKey,
|
|
251
|
+
transcript,
|
|
252
|
+
tts: options.bridgeOptions.tts,
|
|
253
|
+
runSenseTurn: options.bridgeOptions.runSenseTurn,
|
|
254
|
+
});
|
|
255
|
+
const after = nextInputTwiml(options.bridgeOptions, options.basePath, options.afterPlayback);
|
|
256
|
+
if (turn.tts.status !== "delivered") {
|
|
257
|
+
return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
|
|
258
|
+
}
|
|
259
|
+
const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
|
|
260
|
+
utteranceId: options.utteranceId,
|
|
261
|
+
delivery: turn.tts,
|
|
262
|
+
outputDir: options.callDir,
|
|
263
|
+
});
|
|
264
|
+
const audioUrl = routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
|
|
265
|
+
return xmlResponse(`${playTwiml(audioUrl)}${after}`);
|
|
266
|
+
}
|
|
202
267
|
function computeTwilioSignature(input) {
|
|
203
268
|
const payload = input.url + Object.keys(input.params)
|
|
204
269
|
.sort()
|
|
@@ -244,20 +309,46 @@ function verifyRequest(options, request, params) {
|
|
|
244
309
|
signature: headerValue(request.headers, "x-twilio-signature"),
|
|
245
310
|
});
|
|
246
311
|
}
|
|
247
|
-
async function handleIncoming(options, basePath) {
|
|
248
|
-
const
|
|
312
|
+
async function handleIncoming(options, basePath, params) {
|
|
313
|
+
const callSid = params.CallSid?.trim() || "incoming";
|
|
314
|
+
const safeCallSid = safeSegment(callSid);
|
|
315
|
+
const callDir = path.join(options.outputDir, safeCallSid);
|
|
316
|
+
const utteranceId = `twilio-${safeCallSid}-connected`;
|
|
249
317
|
(0, runtime_1.emitNervesEvent)({
|
|
250
318
|
component: "senses",
|
|
251
319
|
event: "senses.voice_twilio_incoming",
|
|
252
320
|
message: "Twilio voice call connected",
|
|
253
|
-
meta: { agentName: options.agentName },
|
|
321
|
+
meta: { agentName: options.agentName, callSid: safeCallSid },
|
|
254
322
|
});
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
323
|
+
try {
|
|
324
|
+
await fs.mkdir(callDir, { recursive: true });
|
|
325
|
+
return await runPhonePromptTurn({
|
|
326
|
+
bridgeOptions: options,
|
|
327
|
+
basePath,
|
|
328
|
+
callDir,
|
|
329
|
+
safeCallSid,
|
|
330
|
+
utteranceId,
|
|
331
|
+
friendId: voiceFriendId(options, params.From?.trim() ?? "", callSid),
|
|
332
|
+
sessionKey: `twilio-${safeCallSid}`,
|
|
333
|
+
promptText: callConnectedPrompt(params),
|
|
334
|
+
afterPlayback: "record",
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
catch (error) {
|
|
338
|
+
(0, runtime_1.emitNervesEvent)({
|
|
339
|
+
level: "error",
|
|
340
|
+
component: "senses",
|
|
341
|
+
event: "senses.voice_twilio_incoming_error",
|
|
342
|
+
message: "Twilio incoming voice greeting turn failed",
|
|
343
|
+
meta: { agentName: options.agentName, callSid: safeCallSid, error: errorMessage(error) },
|
|
344
|
+
});
|
|
345
|
+
return xmlResponse(recordTwiml({
|
|
346
|
+
publicBaseUrl: options.publicBaseUrl,
|
|
347
|
+
basePath,
|
|
348
|
+
timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
|
|
349
|
+
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
350
|
+
}));
|
|
351
|
+
}
|
|
261
352
|
}
|
|
262
353
|
async function handleListen(options, basePath) {
|
|
263
354
|
return xmlResponse(recordTwiml({
|
|
@@ -304,9 +395,22 @@ async function handleRecording(options, basePath, params) {
|
|
|
304
395
|
utteranceId,
|
|
305
396
|
audioPath: inputPath,
|
|
306
397
|
});
|
|
398
|
+
if (isNoSpeechTranscript(transcript.text)) {
|
|
399
|
+
return await runPhonePromptTurn({
|
|
400
|
+
bridgeOptions: options,
|
|
401
|
+
basePath,
|
|
402
|
+
callDir,
|
|
403
|
+
safeCallSid,
|
|
404
|
+
utteranceId: `${utteranceId}-nospeech`,
|
|
405
|
+
friendId: voiceFriendId(options, recording.from, recording.callSid),
|
|
406
|
+
sessionKey: `twilio-${safeCallSid}`,
|
|
407
|
+
promptText: noSpeechPrompt(),
|
|
408
|
+
afterPlayback: "redirect",
|
|
409
|
+
});
|
|
410
|
+
}
|
|
307
411
|
const turn = await (0, turn_1.runVoiceLoopbackTurn)({
|
|
308
412
|
agentName: options.agentName,
|
|
309
|
-
friendId: options
|
|
413
|
+
friendId: voiceFriendId(options, recording.from, recording.callSid),
|
|
310
414
|
sessionKey: `twilio-${safeCallSid}`,
|
|
311
415
|
transcript,
|
|
312
416
|
tts: options.tts,
|
|
@@ -401,7 +505,7 @@ function createTwilioPhoneBridge(options) {
|
|
|
401
505
|
return textResponse(403, "invalid Twilio signature");
|
|
402
506
|
}
|
|
403
507
|
if (routePath === `${basePath}/incoming`)
|
|
404
|
-
return handleIncoming(options, basePath);
|
|
508
|
+
return handleIncoming(options, basePath, params);
|
|
405
509
|
if (routePath === `${basePath}/listen`)
|
|
406
510
|
return handleListen(options, basePath);
|
|
407
511
|
if (routePath === `${basePath}/recording`)
|