@ouro.bot/cli 0.1.0-alpha.559 → 0.1.0-alpha.560
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/changelog.json +8 -0
- package/dist/heart/config-registry.js +18 -0
- package/dist/heart/core.js +1 -1
- package/dist/heart/daemon/cli-exec.js +59 -3
- package/dist/heart/daemon/cli-parse.js +5 -3
- package/dist/heart/daemon/runtime-logging.js +1 -1
- package/dist/heart/daemon/sense-manager.js +42 -6
- package/dist/heart/identity.js +4 -1
- package/dist/heart/sense-truth.js +2 -0
- package/dist/heart/turn-context.js +8 -0
- package/dist/mailbox-ui/assets/{index-Cm51CY9W.js → index-B-461hes.js} +1 -1
- package/dist/mailbox-ui/index.html +1 -1
- package/dist/mind/friends/channel.js +9 -0
- package/dist/mind/prompt.js +16 -0
- package/dist/senses/voice/elevenlabs.js +125 -0
- package/dist/senses/voice/index.js +22 -0
- package/dist/senses/voice/transcript.js +70 -0
- package/dist/senses/voice/turn.js +85 -0
- package/dist/senses/voice/types.js +2 -0
- package/dist/senses/voice/whisper.js +133 -0
- package/dist/senses/voice-entry.js +80 -0
- package/package.json +1 -1
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
<meta name="color-scheme" content="dark" />
|
|
7
7
|
<title>Ouro Mailbox</title>
|
|
8
8
|
<meta name="description" content="The daemon-hosted shared orientation surface for agents alive on this machine." />
|
|
9
|
-
<script type="module" crossorigin src="/assets/index-
|
|
9
|
+
<script type="module" crossorigin src="/assets/index-B-461hes.js"></script>
|
|
10
10
|
<link rel="stylesheet" crossorigin href="/assets/index-BPr5vNuM.css">
|
|
11
11
|
</head>
|
|
12
12
|
<body>
|
|
@@ -55,6 +55,15 @@ const CHANNEL_CAPABILITIES = {
|
|
|
55
55
|
supportsRichCards: false,
|
|
56
56
|
maxMessageLength: Infinity,
|
|
57
57
|
},
|
|
58
|
+
voice: {
|
|
59
|
+
channel: "voice",
|
|
60
|
+
senseType: "local",
|
|
61
|
+
availableIntegrations: [],
|
|
62
|
+
supportsMarkdown: false,
|
|
63
|
+
supportsStreaming: true,
|
|
64
|
+
supportsRichCards: false,
|
|
65
|
+
maxMessageLength: Infinity,
|
|
66
|
+
},
|
|
58
67
|
inner: {
|
|
59
68
|
channel: "inner",
|
|
60
69
|
senseType: "internal",
|
package/dist/mind/prompt.js
CHANGED
|
@@ -342,6 +342,7 @@ const PROCESS_TYPE_LABELS = {
|
|
|
342
342
|
teams: "teams handler",
|
|
343
343
|
bluebubbles: "bluebubbles handler",
|
|
344
344
|
mail: "mail handler",
|
|
345
|
+
voice: "voice handler",
|
|
345
346
|
mcp: "mcp bridge",
|
|
346
347
|
};
|
|
347
348
|
function processTypeLabel(channel) {
|
|
@@ -401,6 +402,12 @@ function runtimeInfoSection(channel, options) {
|
|
|
401
402
|
lines.push("i am responding in iMessage through BlueBubbles. i keep replies short and phone-native. i do not use markdown. i do not introduce myself on boot.");
|
|
402
403
|
lines.push("when a bluebubbles turn arrives from a thread, the harness tells me the current lane and any recent active thread ids. if widening back to top-level or routing into a different active thread is the better move, i use bluebubbles_set_reply_target before settle.");
|
|
403
404
|
}
|
|
405
|
+
else if (channel === "mail") {
|
|
406
|
+
lines.push("i am responding from an agent mail session. i keep the response clear, auditable, and grounded in visible mail facts.");
|
|
407
|
+
}
|
|
408
|
+
else if (channel === "voice") {
|
|
409
|
+
lines.push("i am responding in a voice session. i keep turns conversational, concise, and interrupt-friendly. the overview shows the text transcript as the durable record.");
|
|
410
|
+
}
|
|
404
411
|
else {
|
|
405
412
|
lines.push("i am responding in Microsoft Teams. i keep responses concise. i use markdown formatting. i do not introduce myself on boot.");
|
|
406
413
|
}
|
|
@@ -423,6 +430,7 @@ function localSenseStatusLines() {
|
|
|
423
430
|
teams: configuredSenses.teams ?? { enabled: false },
|
|
424
431
|
bluebubbles: configuredSenses.bluebubbles ?? { enabled: false },
|
|
425
432
|
mail: configuredSenses.mail ?? { enabled: false },
|
|
433
|
+
voice: configuredSenses.voice ?? { enabled: false },
|
|
426
434
|
};
|
|
427
435
|
const payload = (0, config_1.loadConfig)();
|
|
428
436
|
const runtimeConfig = (0, runtime_credentials_1.readRuntimeCredentialConfig)((0, identity_1.getAgentName)());
|
|
@@ -432,12 +440,15 @@ function localSenseStatusLines() {
|
|
|
432
440
|
const teams = recordOrUndefined(runtimePayload.teams) ?? recordOrUndefined(payload.teams);
|
|
433
441
|
const bluebubbles = recordOrUndefined(machinePayload.bluebubbles) ?? recordOrUndefined(payload.bluebubbles);
|
|
434
442
|
const mailroom = recordOrUndefined(runtimePayload.mailroom) ?? recordOrUndefined(payload.mailroom);
|
|
443
|
+
const voice = recordOrUndefined(machinePayload.voice) ?? recordOrUndefined(payload.voice);
|
|
444
|
+
const integrations = recordOrUndefined(runtimePayload.integrations) ?? recordOrUndefined(payload.integrations);
|
|
435
445
|
const privateKeys = mailroom?.privateKeys;
|
|
436
446
|
const configured = {
|
|
437
447
|
cli: true,
|
|
438
448
|
teams: hasTextField(teams, "clientId") && hasTextField(teams, "clientSecret") && hasTextField(teams, "tenantId"),
|
|
439
449
|
bluebubbles: hasTextField(bluebubbles, "serverUrl") && hasTextField(bluebubbles, "password"),
|
|
440
450
|
mail: hasTextField(mailroom, "mailboxAddress") && !!privateKeys && typeof privateKeys === "object" && !Array.isArray(privateKeys),
|
|
451
|
+
voice: hasTextField(integrations, "elevenLabsApiKey") && hasTextField(voice, "whisperCliPath") && hasTextField(voice, "whisperModelPath"),
|
|
441
452
|
};
|
|
442
453
|
const rows = [
|
|
443
454
|
{ label: "CLI", status: "interactive" },
|
|
@@ -453,6 +464,10 @@ function localSenseStatusLines() {
|
|
|
453
464
|
label: "Mail",
|
|
454
465
|
status: !senses.mail.enabled ? "disabled" : configured.mail ? "ready" : "needs_config",
|
|
455
466
|
},
|
|
467
|
+
{
|
|
468
|
+
label: "Voice",
|
|
469
|
+
status: !senses.voice.enabled ? "disabled" : configured.voice ? "ready" : "needs_config",
|
|
470
|
+
},
|
|
456
471
|
];
|
|
457
472
|
return rows.map((row) => `- ${row.label}: ${row.status}`);
|
|
458
473
|
}
|
|
@@ -487,6 +502,7 @@ function senseRuntimeGuidance(channel, preReadStatusLines) {
|
|
|
487
502
|
lines.push("mail validation diagnostics: health checks, bounded mail tools, access logs, and UI inspection can support validation, but they are evidence inside those paths, not additional paths. If asked to name golden paths, do not include diagnostic commands, tool names, or status checks in the answer.");
|
|
488
503
|
lines.push("mail diagnostic naming: `ouro doctor` is installation-wide; do not invent `ouro doctor --agent <agent>`.");
|
|
489
504
|
lines.push("mail setup boundaries: do not invent `ouro auth verify --provider mail`, HEY OAuth, HEY IMAP, `ouro mcp call mail ...`, policy flags, autonomous sending, destructive mail actions, or production MX/DNS/forwarding changes. HEY export, HEY forwarding, DNS, MX cutover, sending, and destructive actions require explicit human confirmation.");
|
|
505
|
+
lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting-link joining and browser/system audio routing are a later milestone, not current setup truth.");
|
|
490
506
|
if (channel === "cli") {
|
|
491
507
|
lines.push("cli is interactive: it is available when the user opens it, not something `ouro up` daemonizes.");
|
|
492
508
|
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DEFAULT_ELEVENLABS_MIME_TYPE = exports.DEFAULT_ELEVENLABS_OUTPUT_FORMAT = exports.DEFAULT_ELEVENLABS_MODEL_ID = void 0;
|
|
4
|
+
exports.createElevenLabsTtsClient = createElevenLabsTtsClient;
|
|
5
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
6
|
+
exports.DEFAULT_ELEVENLABS_MODEL_ID = "eleven_flash_v2_5";
|
|
7
|
+
exports.DEFAULT_ELEVENLABS_OUTPUT_FORMAT = "pcm_16000";
|
|
8
|
+
exports.DEFAULT_ELEVENLABS_MIME_TYPE = "audio/pcm;rate=16000";
|
|
9
|
+
function cleanTtsText(text) {
|
|
10
|
+
return text.replace(/\s+/g, " ").trim();
|
|
11
|
+
}
|
|
12
|
+
function elevenLabsStreamUrl(voiceId, modelId, outputFormat) {
|
|
13
|
+
const params = new URLSearchParams({ model_id: modelId, output_format: outputFormat });
|
|
14
|
+
return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(voiceId)}/stream-input?${params.toString()}`;
|
|
15
|
+
}
|
|
16
|
+
function payloadText(payload) {
|
|
17
|
+
if (typeof payload === "string")
|
|
18
|
+
return payload;
|
|
19
|
+
if (Buffer.isBuffer(payload))
|
|
20
|
+
return payload.toString("utf8");
|
|
21
|
+
return String(payload ?? "");
|
|
22
|
+
}
|
|
23
|
+
function createElevenLabsTtsClient(options) {
|
|
24
|
+
const modelId = options.modelId ?? exports.DEFAULT_ELEVENLABS_MODEL_ID;
|
|
25
|
+
const outputFormat = options.outputFormat ?? exports.DEFAULT_ELEVENLABS_OUTPUT_FORMAT;
|
|
26
|
+
const mimeType = outputFormat === exports.DEFAULT_ELEVENLABS_OUTPUT_FORMAT
|
|
27
|
+
? exports.DEFAULT_ELEVENLABS_MIME_TYPE
|
|
28
|
+
: "audio/mpeg";
|
|
29
|
+
return {
|
|
30
|
+
async synthesize(request) {
|
|
31
|
+
const text = cleanTtsText(request.text);
|
|
32
|
+
if (!text) {
|
|
33
|
+
(0, runtime_1.emitNervesEvent)({
|
|
34
|
+
level: "error",
|
|
35
|
+
component: "senses",
|
|
36
|
+
event: "senses.voice_tts_error",
|
|
37
|
+
message: "voice TTS text is empty",
|
|
38
|
+
meta: { utteranceId: request.utteranceId },
|
|
39
|
+
});
|
|
40
|
+
throw new Error("voice TTS text is empty");
|
|
41
|
+
}
|
|
42
|
+
const url = elevenLabsStreamUrl(options.voiceId, modelId, outputFormat);
|
|
43
|
+
const socket = options.socketFactory(url);
|
|
44
|
+
const chunks = [];
|
|
45
|
+
(0, runtime_1.emitNervesEvent)({
|
|
46
|
+
component: "senses",
|
|
47
|
+
event: "senses.voice_tts_start",
|
|
48
|
+
message: "starting ElevenLabs TTS",
|
|
49
|
+
meta: { utteranceId: request.utteranceId, modelId, voiceId: options.voiceId },
|
|
50
|
+
});
|
|
51
|
+
return new Promise((resolve, reject) => {
|
|
52
|
+
let settled = false;
|
|
53
|
+
const fail = (error) => {
|
|
54
|
+
if (settled)
|
|
55
|
+
return;
|
|
56
|
+
settled = true;
|
|
57
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
58
|
+
(0, runtime_1.emitNervesEvent)({
|
|
59
|
+
level: "error",
|
|
60
|
+
component: "senses",
|
|
61
|
+
event: "senses.voice_tts_error",
|
|
62
|
+
message: "ElevenLabs TTS failed",
|
|
63
|
+
meta: { utteranceId: request.utteranceId, error: message },
|
|
64
|
+
});
|
|
65
|
+
reject(new Error(`ElevenLabs TTS failed: ${message}`));
|
|
66
|
+
};
|
|
67
|
+
const finish = () => {
|
|
68
|
+
if (settled)
|
|
69
|
+
return;
|
|
70
|
+
settled = true;
|
|
71
|
+
const audio = Buffer.concat(chunks);
|
|
72
|
+
(0, runtime_1.emitNervesEvent)({
|
|
73
|
+
component: "senses",
|
|
74
|
+
event: "senses.voice_tts_end",
|
|
75
|
+
message: "finished ElevenLabs TTS",
|
|
76
|
+
meta: { utteranceId: request.utteranceId, chunkCount: chunks.length, byteLength: audio.byteLength },
|
|
77
|
+
});
|
|
78
|
+
socket.close();
|
|
79
|
+
resolve({
|
|
80
|
+
utteranceId: request.utteranceId,
|
|
81
|
+
audio,
|
|
82
|
+
byteLength: audio.byteLength,
|
|
83
|
+
chunkCount: chunks.length,
|
|
84
|
+
modelId,
|
|
85
|
+
voiceId: options.voiceId,
|
|
86
|
+
mimeType,
|
|
87
|
+
});
|
|
88
|
+
};
|
|
89
|
+
socket.on("open", () => {
|
|
90
|
+
socket.send(JSON.stringify({
|
|
91
|
+
text: " ",
|
|
92
|
+
xi_api_key: options.apiKey,
|
|
93
|
+
voice_settings: {
|
|
94
|
+
stability: 0.5,
|
|
95
|
+
similarity_boost: 0.8,
|
|
96
|
+
use_speaker_boost: true,
|
|
97
|
+
},
|
|
98
|
+
}));
|
|
99
|
+
socket.send(JSON.stringify({ text, try_trigger_generation: true }));
|
|
100
|
+
socket.send(JSON.stringify({ text: "" }));
|
|
101
|
+
});
|
|
102
|
+
socket.on("message", (payload) => {
|
|
103
|
+
try {
|
|
104
|
+
const parsed = JSON.parse(payloadText(payload));
|
|
105
|
+
if (typeof parsed.audio === "string" && parsed.audio.length > 0) {
|
|
106
|
+
chunks.push(Buffer.from(parsed.audio, "base64"));
|
|
107
|
+
}
|
|
108
|
+
if (parsed.isFinal === true) {
|
|
109
|
+
finish();
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
fail(error);
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
socket.on("error", fail);
|
|
117
|
+
socket.on("close", () => {
|
|
118
|
+
if (!settled) {
|
|
119
|
+
fail(new Error("socket closed before final audio"));
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
},
|
|
124
|
+
};
|
|
125
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
/* v8 ignore file -- barrel exports only; executable voice helpers are covered directly. @preserve */
|
|
18
|
+
__exportStar(require("./types"), exports);
|
|
19
|
+
__exportStar(require("./transcript"), exports);
|
|
20
|
+
__exportStar(require("./whisper"), exports);
|
|
21
|
+
__exportStar(require("./elevenlabs"), exports);
|
|
22
|
+
__exportStar(require("./turn"), exports);
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.buildVoiceTranscript = buildVoiceTranscript;
|
|
4
|
+
exports.transcriptToPromptText = transcriptToPromptText;
|
|
5
|
+
exports.normalizeVoiceSessionKey = normalizeVoiceSessionKey;
|
|
6
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
7
|
+
function compactSpeechText(text) {
|
|
8
|
+
return text.replace(/\s+/g, " ").trim();
|
|
9
|
+
}
|
|
10
|
+
function buildVoiceTranscript(input) {
|
|
11
|
+
const text = compactSpeechText(input.text);
|
|
12
|
+
if (!text) {
|
|
13
|
+
(0, runtime_1.emitNervesEvent)({
|
|
14
|
+
level: "error",
|
|
15
|
+
component: "senses",
|
|
16
|
+
event: "senses.voice_transcript_error",
|
|
17
|
+
message: "voice transcript text is empty",
|
|
18
|
+
meta: { utteranceId: input.utteranceId },
|
|
19
|
+
});
|
|
20
|
+
throw new Error("voice transcript text is empty");
|
|
21
|
+
}
|
|
22
|
+
const transcript = {
|
|
23
|
+
utteranceId: input.utteranceId,
|
|
24
|
+
text,
|
|
25
|
+
source: input.source,
|
|
26
|
+
audioPath: input.audioPath ?? null,
|
|
27
|
+
language: input.language ?? null,
|
|
28
|
+
startedAt: input.startedAt ?? null,
|
|
29
|
+
endedAt: input.endedAt ?? null,
|
|
30
|
+
};
|
|
31
|
+
(0, runtime_1.emitNervesEvent)({
|
|
32
|
+
component: "senses",
|
|
33
|
+
event: "senses.voice_transcript_built",
|
|
34
|
+
message: "built voice transcript",
|
|
35
|
+
meta: { utteranceId: transcript.utteranceId, source: transcript.source, length: transcript.text.length },
|
|
36
|
+
});
|
|
37
|
+
return transcript;
|
|
38
|
+
}
|
|
39
|
+
function transcriptToPromptText(transcript) {
|
|
40
|
+
const text = compactSpeechText(transcript.text);
|
|
41
|
+
if (!text) {
|
|
42
|
+
(0, runtime_1.emitNervesEvent)({
|
|
43
|
+
level: "error",
|
|
44
|
+
component: "senses",
|
|
45
|
+
event: "senses.voice_transcript_error",
|
|
46
|
+
message: "voice prompt text is empty",
|
|
47
|
+
meta: { utteranceId: transcript.utteranceId },
|
|
48
|
+
});
|
|
49
|
+
throw new Error("voice transcript text is empty");
|
|
50
|
+
}
|
|
51
|
+
return text;
|
|
52
|
+
}
|
|
53
|
+
function normalizeVoiceSessionKey(value) {
|
|
54
|
+
const normalized = value
|
|
55
|
+
.trim()
|
|
56
|
+
.toLowerCase()
|
|
57
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
58
|
+
.replace(/^-+|-+$/g, "");
|
|
59
|
+
if (!normalized) {
|
|
60
|
+
(0, runtime_1.emitNervesEvent)({
|
|
61
|
+
level: "error",
|
|
62
|
+
component: "senses",
|
|
63
|
+
event: "senses.voice_transcript_error",
|
|
64
|
+
message: "voice session key is empty",
|
|
65
|
+
meta: { inputLength: value.length },
|
|
66
|
+
});
|
|
67
|
+
throw new Error("voice session key is empty");
|
|
68
|
+
}
|
|
69
|
+
return normalized;
|
|
70
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runVoiceLoopbackTurn = runVoiceLoopbackTurn;
|
|
4
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
5
|
+
const shared_turn_1 = require("../shared-turn");
|
|
6
|
+
const transcript_1 = require("./transcript");
|
|
7
|
+
async function runVoiceLoopbackTurn(options) {
|
|
8
|
+
const runSenseTurn = options.runSenseTurn ?? shared_turn_1.runSenseTurn;
|
|
9
|
+
let userMessage;
|
|
10
|
+
try {
|
|
11
|
+
userMessage = (0, transcript_1.transcriptToPromptText)(options.transcript);
|
|
12
|
+
}
|
|
13
|
+
catch (error) {
|
|
14
|
+
(0, runtime_1.emitNervesEvent)({
|
|
15
|
+
level: "error",
|
|
16
|
+
component: "senses",
|
|
17
|
+
event: "senses.voice_turn_error",
|
|
18
|
+
message: "voice turn rejected transcript",
|
|
19
|
+
meta: { utteranceId: options.transcript.utteranceId, error: error instanceof Error ? error.message : String(error) },
|
|
20
|
+
});
|
|
21
|
+
throw error;
|
|
22
|
+
}
|
|
23
|
+
(0, runtime_1.emitNervesEvent)({
|
|
24
|
+
component: "senses",
|
|
25
|
+
event: "senses.voice_turn_start",
|
|
26
|
+
message: "starting voice loopback turn",
|
|
27
|
+
meta: {
|
|
28
|
+
agentName: options.agentName,
|
|
29
|
+
friendId: options.friendId,
|
|
30
|
+
sessionKey: options.sessionKey,
|
|
31
|
+
utteranceId: options.transcript.utteranceId,
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
const turn = await runSenseTurn({
|
|
35
|
+
agentName: options.agentName,
|
|
36
|
+
channel: "voice",
|
|
37
|
+
friendId: options.friendId,
|
|
38
|
+
sessionKey: options.sessionKey,
|
|
39
|
+
userMessage,
|
|
40
|
+
});
|
|
41
|
+
try {
|
|
42
|
+
const spoken = await options.tts.synthesize({
|
|
43
|
+
utteranceId: options.transcript.utteranceId,
|
|
44
|
+
text: turn.response,
|
|
45
|
+
});
|
|
46
|
+
const result = {
|
|
47
|
+
responseText: turn.response,
|
|
48
|
+
ponderDeferred: turn.ponderDeferred,
|
|
49
|
+
tts: {
|
|
50
|
+
status: "delivered",
|
|
51
|
+
audio: spoken.audio,
|
|
52
|
+
byteLength: spoken.byteLength,
|
|
53
|
+
chunkCount: spoken.chunkCount,
|
|
54
|
+
mimeType: spoken.mimeType,
|
|
55
|
+
modelId: spoken.modelId,
|
|
56
|
+
voiceId: spoken.voiceId,
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
(0, runtime_1.emitNervesEvent)({
|
|
60
|
+
component: "senses",
|
|
61
|
+
event: "senses.voice_turn_end",
|
|
62
|
+
message: "voice loopback turn delivered speech",
|
|
63
|
+
meta: { utteranceId: options.transcript.utteranceId, responseLength: turn.response.length, byteLength: spoken.byteLength },
|
|
64
|
+
});
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
69
|
+
(0, runtime_1.emitNervesEvent)({
|
|
70
|
+
level: "error",
|
|
71
|
+
component: "senses",
|
|
72
|
+
event: "senses.voice_turn_tts_error",
|
|
73
|
+
message: "voice loopback TTS failed after text response",
|
|
74
|
+
meta: { utteranceId: options.transcript.utteranceId, error: message, responseLength: turn.response.length },
|
|
75
|
+
});
|
|
76
|
+
return {
|
|
77
|
+
responseText: turn.response,
|
|
78
|
+
ponderDeferred: turn.ponderDeferred,
|
|
79
|
+
tts: {
|
|
80
|
+
status: "failed",
|
|
81
|
+
error: message,
|
|
82
|
+
},
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.parseWhisperCppTranscriptJson = parseWhisperCppTranscriptJson;
|
|
37
|
+
exports.createWhisperCppTranscriber = createWhisperCppTranscriber;
|
|
38
|
+
const fs = __importStar(require("fs/promises"));
|
|
39
|
+
const os = __importStar(require("os"));
|
|
40
|
+
const path = __importStar(require("path"));
|
|
41
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
42
|
+
const transcript_1 = require("./transcript");
|
|
43
|
+
function parseWhisperCppTranscriptJson(raw) {
|
|
44
|
+
let parsed;
|
|
45
|
+
try {
|
|
46
|
+
parsed = JSON.parse(raw);
|
|
47
|
+
}
|
|
48
|
+
catch (error) {
|
|
49
|
+
throw new Error(`invalid whisper.cpp JSON: ${String(error)}`);
|
|
50
|
+
}
|
|
51
|
+
const text = typeof parsed.text === "string"
|
|
52
|
+
? parsed.text.trim()
|
|
53
|
+
: Array.isArray(parsed.transcription)
|
|
54
|
+
? parsed.transcription
|
|
55
|
+
.map((entry) => typeof entry.text === "string" ? entry.text.trim() : "")
|
|
56
|
+
.filter(Boolean)
|
|
57
|
+
.join(" ")
|
|
58
|
+
.trim()
|
|
59
|
+
: "";
|
|
60
|
+
if (!text) {
|
|
61
|
+
throw new Error("empty whisper.cpp transcript");
|
|
62
|
+
}
|
|
63
|
+
return text;
|
|
64
|
+
}
|
|
65
|
+
async function defaultMakeTempDir() {
|
|
66
|
+
return fs.mkdtemp(path.join(os.tmpdir(), "ouro-voice-whisper-"));
|
|
67
|
+
}
|
|
68
|
+
async function defaultRemoveDir(dir) {
|
|
69
|
+
await fs.rm(dir, { recursive: true, force: true });
|
|
70
|
+
}
|
|
71
|
+
function createWhisperCppTranscriber(options) {
|
|
72
|
+
const timeoutMs = options.timeoutMs ?? 120_000;
|
|
73
|
+
const readFile = options.readFile ?? fs.readFile;
|
|
74
|
+
const makeTempDir = options.makeTempDir ?? defaultMakeTempDir;
|
|
75
|
+
const removeDir = options.removeDir ?? defaultRemoveDir;
|
|
76
|
+
return {
|
|
77
|
+
async transcribe(request) {
|
|
78
|
+
const workDir = await makeTempDir();
|
|
79
|
+
const outputBase = path.join(workDir, "transcript");
|
|
80
|
+
const args = [
|
|
81
|
+
"-m",
|
|
82
|
+
options.modelPath,
|
|
83
|
+
"-f",
|
|
84
|
+
request.audioPath,
|
|
85
|
+
"-oj",
|
|
86
|
+
"-of",
|
|
87
|
+
outputBase,
|
|
88
|
+
...(request.language ? ["-l", request.language] : []),
|
|
89
|
+
];
|
|
90
|
+
(0, runtime_1.emitNervesEvent)({
|
|
91
|
+
component: "senses",
|
|
92
|
+
event: "senses.voice_stt_start",
|
|
93
|
+
message: "starting Whisper.cpp transcription",
|
|
94
|
+
meta: { utteranceId: request.utteranceId, audioPath: request.audioPath },
|
|
95
|
+
});
|
|
96
|
+
try {
|
|
97
|
+
const result = await options.processRunner(options.whisperCliPath, args, { timeoutMs });
|
|
98
|
+
if (typeof result.exitCode === "number" && result.exitCode !== 0) {
|
|
99
|
+
throw new Error(`exit ${result.exitCode}${result.stderr ? `: ${result.stderr}` : ""}`);
|
|
100
|
+
}
|
|
101
|
+
const raw = await readFile(`${outputBase}.json`, "utf8");
|
|
102
|
+
const text = parseWhisperCppTranscriptJson(raw);
|
|
103
|
+
const transcript = (0, transcript_1.buildVoiceTranscript)({
|
|
104
|
+
utteranceId: request.utteranceId,
|
|
105
|
+
text,
|
|
106
|
+
audioPath: request.audioPath,
|
|
107
|
+
language: request.language,
|
|
108
|
+
source: "whisper.cpp",
|
|
109
|
+
});
|
|
110
|
+
(0, runtime_1.emitNervesEvent)({
|
|
111
|
+
component: "senses",
|
|
112
|
+
event: "senses.voice_stt_end",
|
|
113
|
+
message: "finished Whisper.cpp transcription",
|
|
114
|
+
meta: { utteranceId: request.utteranceId, length: transcript.text.length },
|
|
115
|
+
});
|
|
116
|
+
return transcript;
|
|
117
|
+
}
|
|
118
|
+
catch (error) {
|
|
119
|
+
(0, runtime_1.emitNervesEvent)({
|
|
120
|
+
level: "error",
|
|
121
|
+
component: "senses",
|
|
122
|
+
event: "senses.voice_stt_error",
|
|
123
|
+
message: "Whisper.cpp transcription failed",
|
|
124
|
+
meta: { utteranceId: request.utteranceId, error: error instanceof Error ? error.message : String(error) },
|
|
125
|
+
});
|
|
126
|
+
throw new Error(`whisper.cpp transcription failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
127
|
+
}
|
|
128
|
+
finally {
|
|
129
|
+
await removeDir(workDir);
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
};
|
|
133
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
const agentArgIndex = process.argv.indexOf("--agent");
|
|
37
|
+
const agentName = agentArgIndex >= 0 ? process.argv[agentArgIndex + 1] : undefined;
|
|
38
|
+
if (!agentName) {
|
|
39
|
+
// eslint-disable-next-line no-console -- pre-boot guard: --agent check before imports
|
|
40
|
+
console.error("Missing required --agent <name> argument.\nUsage: node dist/senses/voice-entry.js --agent ouroboros");
|
|
41
|
+
process.exit(1);
|
|
42
|
+
}
|
|
43
|
+
const runtime_logging_1 = require("../heart/daemon/runtime-logging");
|
|
44
|
+
const runtime_1 = require("../nerves/runtime");
|
|
45
|
+
(0, runtime_logging_1.configureDaemonRuntimeLogger)("voice");
|
|
46
|
+
(0, runtime_1.emitNervesEvent)({
|
|
47
|
+
component: "senses",
|
|
48
|
+
event: "senses.entry_boot",
|
|
49
|
+
message: "booting Voice entrypoint",
|
|
50
|
+
meta: { entry: "voice", agentName },
|
|
51
|
+
});
|
|
52
|
+
Promise.resolve().then(() => __importStar(require("../heart/runtime-credentials"))).then(async ({ readMachineRuntimeCredentialConfig, refreshMachineRuntimeCredentialConfig, refreshRuntimeCredentialConfig, waitForRuntimeCredentialBootstrap, }) => {
|
|
53
|
+
await waitForRuntimeCredentialBootstrap(agentName);
|
|
54
|
+
const { loadOrCreateMachineIdentity } = await Promise.resolve().then(() => __importStar(require("../heart/machine-identity")));
|
|
55
|
+
const machine = loadOrCreateMachineIdentity();
|
|
56
|
+
const machineConfig = readMachineRuntimeCredentialConfig(agentName);
|
|
57
|
+
if (!machineConfig.ok) {
|
|
58
|
+
await refreshMachineRuntimeCredentialConfig(agentName, machine.machineId, { preserveCachedOnFailure: true }).catch(() => undefined);
|
|
59
|
+
}
|
|
60
|
+
void refreshRuntimeCredentialConfig(agentName, { preserveCachedOnFailure: true }).catch(() => undefined);
|
|
61
|
+
(0, runtime_1.emitNervesEvent)({
|
|
62
|
+
component: "senses",
|
|
63
|
+
event: "senses.voice_entry_ready",
|
|
64
|
+
message: "Voice entrypoint is ready for managed voice turns",
|
|
65
|
+
meta: { entry: "voice", agentName, machineId: machine.machineId },
|
|
66
|
+
});
|
|
67
|
+
setInterval(() => undefined, 60_000);
|
|
68
|
+
})
|
|
69
|
+
.catch((error) => {
|
|
70
|
+
(0, runtime_1.emitNervesEvent)({
|
|
71
|
+
level: "error",
|
|
72
|
+
component: "senses",
|
|
73
|
+
event: "senses.entry_error",
|
|
74
|
+
message: "Voice entrypoint failed",
|
|
75
|
+
meta: { entry: "voice", agentName, error: error instanceof Error ? error.message : String(error) },
|
|
76
|
+
});
|
|
77
|
+
// eslint-disable-next-line no-console -- fatal startup guard for sense process
|
|
78
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
79
|
+
process.exit(1);
|
|
80
|
+
});
|