@ouro.bot/cli 0.1.0-alpha.560 → 0.1.0-alpha.561
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +8 -0
- package/dist/heart/daemon/cli-exec.js +1 -1
- package/dist/mind/prompt.js +1 -1
- package/dist/senses/shared-turn.js +4 -1
- package/dist/senses/voice/audio-routing.js +119 -0
- package/dist/senses/voice/elevenlabs.js +54 -1
- package/dist/senses/voice/golden-path.js +116 -0
- package/dist/senses/voice/index.js +4 -0
- package/dist/senses/voice/meeting.js +113 -0
- package/dist/senses/voice/playback.js +139 -0
- package/dist/senses/voice/whisper.js +29 -1
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.561",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Voice golden-path orchestration now accepts meeting URLs, checks local BlackHole/Multi-Output readiness, runs Whisper.cpp STT, routes text through ordinary `voice` sessions, streams ElevenLabs TTS, and writes playback-ready audio artifacts.",
|
|
8
|
+
"Voice runtime edges now include default Node Whisper.cpp process execution, default Node WebSocket adaptation for ElevenLabs, and tested playback artifact handling while keeping credentials injected at runtime.",
|
|
9
|
+
"Shared sense turns now store sessions under the explicit agent's bundle instead of depending on process argv for the session path, and voice setup guidance now reflects meeting URL intake plus live-browser handoff limits truthfully."
|
|
10
|
+
]
|
|
11
|
+
},
|
|
4
12
|
{
|
|
5
13
|
"version": "0.1.0-alpha.560",
|
|
6
14
|
"changes": [
|
|
@@ -4276,7 +4276,7 @@ async function executeConnectVoice(agent, deps) {
|
|
|
4276
4276
|
` ouro vault config set --agent ${agent} --scope machine --key voice.whisperCliPath`,
|
|
4277
4277
|
` ouro vault config set --agent ${agent} --scope machine --key voice.whisperModelPath`,
|
|
4278
4278
|
"Then enable agent.json: senses.voice.enabled = true and restart with `ouro up`.",
|
|
4279
|
-
"Meeting-
|
|
4279
|
+
"Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Live browser join/injection is a handoff edge until provider automation lands.",
|
|
4280
4280
|
].join("\n");
|
|
4281
4281
|
deps.writeStdout(message);
|
|
4282
4282
|
return message;
|
package/dist/mind/prompt.js
CHANGED
|
@@ -502,7 +502,7 @@ function senseRuntimeGuidance(channel, preReadStatusLines) {
|
|
|
502
502
|
lines.push("mail validation diagnostics: health checks, bounded mail tools, access logs, and UI inspection can support validation, but they are evidence inside those paths, not additional paths. If asked to name golden paths, do not include diagnostic commands, tool names, or status checks in the answer.");
|
|
503
503
|
lines.push("mail diagnostic naming: `ouro doctor` is installation-wide; do not invent `ouro doctor --agent <agent>`.");
|
|
504
504
|
lines.push("mail setup boundaries: do not invent `ouro auth verify --provider mail`, HEY OAuth, HEY IMAP, `ouro mcp call mail ...`, policy flags, autonomous sending, destructive mail actions, or production MX/DNS/forwarding changes. HEY export, HEY forwarding, DNS, MX cutover, sending, and destructive actions require explicit human confirmation.");
|
|
505
|
-
lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting
|
|
505
|
+
lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links now have URL intake and local BlackHole/Multi-Output readiness checks; live browser join/injection remains an explicit handoff edge until provider automation lands.");
|
|
506
506
|
if (channel === "cli") {
|
|
507
507
|
lines.push("cli is interactive: it is available when the user opens it, not something `ouro up` daemonizes.");
|
|
508
508
|
}
|
|
@@ -43,6 +43,7 @@ exports.stripThinkBlocks = stripThinkBlocks;
|
|
|
43
43
|
exports.runSenseTurn = runSenseTurn;
|
|
44
44
|
const os = __importStar(require("os"));
|
|
45
45
|
const path = __importStar(require("path"));
|
|
46
|
+
const fs = __importStar(require("fs"));
|
|
46
47
|
const core_1 = require("../heart/core");
|
|
47
48
|
const identity_1 = require("../heart/identity");
|
|
48
49
|
const config_1 = require("../heart/config");
|
|
@@ -130,7 +131,9 @@ async function runSenseTurn(options) {
|
|
|
130
131
|
// Initialize MCP manager so MCP tools appear as first-class tools in the agent's tool list
|
|
131
132
|
const mcpManager = await (0, mcp_manager_1.getSharedMcpManager)() ?? undefined;
|
|
132
133
|
// Session path and loading
|
|
133
|
-
const
|
|
134
|
+
const sessionDir = path.join(agentRoot, "state", "sessions", friendId, channel);
|
|
135
|
+
fs.mkdirSync(sessionDir, { recursive: true });
|
|
136
|
+
const sessPath = path.join(sessionDir, `${(0, config_1.sanitizeKey)(sessionKey)}.json`);
|
|
134
137
|
const existing = (0, context_1.loadSession)(sessPath);
|
|
135
138
|
let sessionState = existing?.state;
|
|
136
139
|
let persistPromise;
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createNodeVoiceCommandRunner = createNodeVoiceCommandRunner;
|
|
4
|
+
exports.inspectVoiceAudioRouting = inspectVoiceAudioRouting;
|
|
5
|
+
const child_process_1 = require("child_process");
|
|
6
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
7
|
+
function createNodeVoiceCommandRunner() {
|
|
8
|
+
return (command, args, options) => new Promise((resolve, reject) => {
|
|
9
|
+
const child = (0, child_process_1.spawn)(command, args, { stdio: ["ignore", "pipe", "pipe"] });
|
|
10
|
+
const stdout = [];
|
|
11
|
+
const stderr = [];
|
|
12
|
+
const timer = setTimeout(() => {
|
|
13
|
+
child.kill("SIGTERM");
|
|
14
|
+
reject(new Error(`command timed out after ${options.timeoutMs}ms`));
|
|
15
|
+
}, options.timeoutMs);
|
|
16
|
+
child.stdout.on("data", (chunk) => stdout.push(chunk));
|
|
17
|
+
child.stderr.on("data", (chunk) => stderr.push(chunk));
|
|
18
|
+
child.on("error", (error) => {
|
|
19
|
+
clearTimeout(timer);
|
|
20
|
+
reject(error);
|
|
21
|
+
});
|
|
22
|
+
child.on("close", (exitCode) => {
|
|
23
|
+
clearTimeout(timer);
|
|
24
|
+
resolve({
|
|
25
|
+
stdout: Buffer.concat(stdout).toString("utf8"),
|
|
26
|
+
stderr: Buffer.concat(stderr).toString("utf8"),
|
|
27
|
+
exitCode: exitCode ?? 0,
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
function parseDeviceLines(output) {
|
|
33
|
+
return output
|
|
34
|
+
.split(/\r?\n/)
|
|
35
|
+
.map((line) => line.trim())
|
|
36
|
+
.filter(Boolean);
|
|
37
|
+
}
|
|
38
|
+
function commandFailureMessage(exitCode, result) {
|
|
39
|
+
const stderr = result.stderr?.trim();
|
|
40
|
+
if (stderr)
|
|
41
|
+
return stderr;
|
|
42
|
+
const stdout = result.stdout?.trim();
|
|
43
|
+
if (stdout)
|
|
44
|
+
return stdout;
|
|
45
|
+
return `exit ${exitCode}`;
|
|
46
|
+
}
|
|
47
|
+
function setupGuidance(missing, currentOutput, outputDeviceName) {
|
|
48
|
+
const guidance = missing.map((device) => `Install or configure the local audio device: ${device}.`);
|
|
49
|
+
if (currentOutput && currentOutput !== outputDeviceName) {
|
|
50
|
+
guidance.push(`Browser meeting audio should be routed through ${outputDeviceName}; current output is ${currentOutput}.`);
|
|
51
|
+
}
|
|
52
|
+
return guidance;
|
|
53
|
+
}
|
|
54
|
+
async function inspectVoiceAudioRouting(options = {}) {
|
|
55
|
+
const commandRunner = options.commandRunner ?? createNodeVoiceCommandRunner();
|
|
56
|
+
const switchAudioSourcePath = options.switchAudioSourcePath ?? "SwitchAudioSource";
|
|
57
|
+
const captureDeviceName = options.captureDeviceName ?? "BlackHole 2ch";
|
|
58
|
+
const outputDeviceName = options.outputDeviceName ?? "Multi-Output Device";
|
|
59
|
+
const timeoutMs = options.timeoutMs ?? 5_000;
|
|
60
|
+
try {
|
|
61
|
+
const devicesResult = await commandRunner(switchAudioSourcePath, ["-a"], { timeoutMs });
|
|
62
|
+
if (typeof devicesResult.exitCode === "number" && devicesResult.exitCode !== 0) {
|
|
63
|
+
throw new Error(commandFailureMessage(devicesResult.exitCode, devicesResult));
|
|
64
|
+
}
|
|
65
|
+
const currentResult = await commandRunner(switchAudioSourcePath, ["-c"], { timeoutMs });
|
|
66
|
+
if (typeof currentResult.exitCode === "number" && currentResult.exitCode !== 0) {
|
|
67
|
+
throw new Error(commandFailureMessage(currentResult.exitCode, currentResult));
|
|
68
|
+
}
|
|
69
|
+
const devices = parseDeviceLines(devicesResult.stdout ?? "");
|
|
70
|
+
const currentOutput = parseDeviceLines(currentResult.stdout ?? "")[0] ?? null;
|
|
71
|
+
const hasCaptureDevice = devices.includes(captureDeviceName);
|
|
72
|
+
const hasOutputDevice = devices.includes(outputDeviceName);
|
|
73
|
+
const missing = [
|
|
74
|
+
...(hasCaptureDevice ? [] : [captureDeviceName]),
|
|
75
|
+
...(hasOutputDevice ? [] : [outputDeviceName]),
|
|
76
|
+
];
|
|
77
|
+
const result = {
|
|
78
|
+
status: missing.length === 0 ? "ready" : "needs_setup",
|
|
79
|
+
hasCaptureDevice,
|
|
80
|
+
hasOutputDevice,
|
|
81
|
+
currentOutput,
|
|
82
|
+
missing,
|
|
83
|
+
guidance: setupGuidance(missing, currentOutput, outputDeviceName),
|
|
84
|
+
};
|
|
85
|
+
(0, runtime_1.emitNervesEvent)({
|
|
86
|
+
component: "senses",
|
|
87
|
+
event: "senses.voice_audio_routing_checked",
|
|
88
|
+
message: "voice audio routing readiness checked",
|
|
89
|
+
meta: {
|
|
90
|
+
status: result.status,
|
|
91
|
+
hasCaptureDevice,
|
|
92
|
+
hasOutputDevice,
|
|
93
|
+
currentOutput,
|
|
94
|
+
missing,
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
99
|
+
catch (error) {
|
|
100
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
101
|
+
const result = {
|
|
102
|
+
status: "unknown",
|
|
103
|
+
hasCaptureDevice: false,
|
|
104
|
+
hasOutputDevice: false,
|
|
105
|
+
currentOutput: null,
|
|
106
|
+
missing: [captureDeviceName, outputDeviceName],
|
|
107
|
+
guidance: setupGuidance([captureDeviceName, outputDeviceName], null, outputDeviceName),
|
|
108
|
+
error: message,
|
|
109
|
+
};
|
|
110
|
+
(0, runtime_1.emitNervesEvent)({
|
|
111
|
+
level: "error",
|
|
112
|
+
component: "senses",
|
|
113
|
+
event: "senses.voice_audio_routing_error",
|
|
114
|
+
message: "voice audio routing readiness check failed",
|
|
115
|
+
meta: { error: message, missing: result.missing },
|
|
116
|
+
});
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.DEFAULT_ELEVENLABS_MIME_TYPE = exports.DEFAULT_ELEVENLABS_OUTPUT_FORMAT = exports.DEFAULT_ELEVENLABS_MODEL_ID = void 0;
|
|
4
|
+
exports.createNodeElevenLabsSocketFactory = createNodeElevenLabsSocketFactory;
|
|
4
5
|
exports.createElevenLabsTtsClient = createElevenLabsTtsClient;
|
|
5
6
|
const runtime_1 = require("../../nerves/runtime");
|
|
6
7
|
exports.DEFAULT_ELEVENLABS_MODEL_ID = "eleven_flash_v2_5";
|
|
@@ -14,15 +15,67 @@ function elevenLabsStreamUrl(voiceId, modelId, outputFormat) {
|
|
|
14
15
|
return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(voiceId)}/stream-input?${params.toString()}`;
|
|
15
16
|
}
|
|
16
17
|
function payloadText(payload) {
|
|
18
|
+
if (payload && typeof payload === "object" && "data" in payload) {
|
|
19
|
+
return payloadText(payload.data);
|
|
20
|
+
}
|
|
17
21
|
if (typeof payload === "string")
|
|
18
22
|
return payload;
|
|
19
23
|
if (Buffer.isBuffer(payload))
|
|
20
24
|
return payload.toString("utf8");
|
|
21
25
|
return String(payload ?? "");
|
|
22
26
|
}
|
|
27
|
+
function createNodeElevenLabsSocketFactory(webSocketConstructor) {
|
|
28
|
+
const WebSocketConstructor = webSocketConstructor
|
|
29
|
+
?? globalThis.WebSocket;
|
|
30
|
+
if (!WebSocketConstructor) {
|
|
31
|
+
throw new Error("global WebSocket is unavailable; inject an ElevenLabs socketFactory");
|
|
32
|
+
}
|
|
33
|
+
return (url) => {
|
|
34
|
+
const socket = new WebSocketConstructor(url);
|
|
35
|
+
const handlers = {
|
|
36
|
+
open: [],
|
|
37
|
+
message: [],
|
|
38
|
+
error: [],
|
|
39
|
+
close: [],
|
|
40
|
+
};
|
|
41
|
+
const emit = (event, payload) => {
|
|
42
|
+
for (const handler of handlers[event]) {
|
|
43
|
+
handler(payload);
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
const attach = (event) => {
|
|
47
|
+
const listener = (payload) => emit(event, payload);
|
|
48
|
+
if (typeof socket.addEventListener === "function") {
|
|
49
|
+
socket.addEventListener(event, listener);
|
|
50
|
+
}
|
|
51
|
+
else if (typeof socket.on === "function") {
|
|
52
|
+
socket.on(event, listener);
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
socket[`on${event}`] = listener;
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
attach("open");
|
|
59
|
+
attach("message");
|
|
60
|
+
attach("error");
|
|
61
|
+
attach("close");
|
|
62
|
+
return {
|
|
63
|
+
on(event, handler) {
|
|
64
|
+
handlers[event].push(handler);
|
|
65
|
+
},
|
|
66
|
+
send(payload) {
|
|
67
|
+
socket.send(payload);
|
|
68
|
+
},
|
|
69
|
+
close() {
|
|
70
|
+
socket.close();
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
};
|
|
74
|
+
}
|
|
23
75
|
function createElevenLabsTtsClient(options) {
|
|
24
76
|
const modelId = options.modelId ?? exports.DEFAULT_ELEVENLABS_MODEL_ID;
|
|
25
77
|
const outputFormat = options.outputFormat ?? exports.DEFAULT_ELEVENLABS_OUTPUT_FORMAT;
|
|
78
|
+
const socketFactory = options.socketFactory ?? createNodeElevenLabsSocketFactory();
|
|
26
79
|
const mimeType = outputFormat === exports.DEFAULT_ELEVENLABS_OUTPUT_FORMAT
|
|
27
80
|
? exports.DEFAULT_ELEVENLABS_MIME_TYPE
|
|
28
81
|
: "audio/mpeg";
|
|
@@ -40,7 +93,7 @@ function createElevenLabsTtsClient(options) {
|
|
|
40
93
|
throw new Error("voice TTS text is empty");
|
|
41
94
|
}
|
|
42
95
|
const url = elevenLabsStreamUrl(options.voiceId, modelId, outputFormat);
|
|
43
|
-
const socket =
|
|
96
|
+
const socket = socketFactory(url);
|
|
44
97
|
const chunks = [];
|
|
45
98
|
(0, runtime_1.emitNervesEvent)({
|
|
46
99
|
component: "senses",
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runVoiceGoldenPath = runVoiceGoldenPath;
|
|
4
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
5
|
+
const audio_routing_1 = require("./audio-routing");
|
|
6
|
+
const meeting_1 = require("./meeting");
|
|
7
|
+
const playback_1 = require("./playback");
|
|
8
|
+
const turn_1 = require("./turn");
|
|
9
|
+
const defaultVoiceMeetingJoiner = {
|
|
10
|
+
async join(request) {
|
|
11
|
+
const result = {
|
|
12
|
+
status: "manual_required",
|
|
13
|
+
detail: `Open ${request.meeting.redactedUrl} in a browser profile whose meeting audio is routed through Multi-Output Device.`,
|
|
14
|
+
};
|
|
15
|
+
(0, runtime_1.emitNervesEvent)({
|
|
16
|
+
component: "senses",
|
|
17
|
+
event: "senses.voice_meeting_join_manual_required",
|
|
18
|
+
message: "voice meeting join requires browser handoff",
|
|
19
|
+
meta: {
|
|
20
|
+
provider: request.meeting.provider,
|
|
21
|
+
sessionKey: request.meeting.sessionKey,
|
|
22
|
+
routingStatus: request.audioRouting.status,
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
return result;
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
function defaultUtteranceId() {
|
|
29
|
+
return `voice-${Date.now().toString(36)}`;
|
|
30
|
+
}
|
|
31
|
+
function playbackSkipped() {
|
|
32
|
+
(0, runtime_1.emitNervesEvent)({
|
|
33
|
+
component: "senses",
|
|
34
|
+
event: "senses.voice_golden_path_playback_skipped",
|
|
35
|
+
message: "voice golden path skipped playback because TTS failed",
|
|
36
|
+
meta: { reason: "tts_failed" },
|
|
37
|
+
});
|
|
38
|
+
return {
|
|
39
|
+
status: "skipped",
|
|
40
|
+
reason: "tts_failed",
|
|
41
|
+
playbackAttempted: false,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
async function runVoiceGoldenPath(options) {
|
|
45
|
+
(0, runtime_1.emitNervesEvent)({
|
|
46
|
+
component: "senses",
|
|
47
|
+
event: "senses.voice_golden_path_start",
|
|
48
|
+
message: "starting voice golden path",
|
|
49
|
+
meta: { agentName: options.agentName, friendId: options.friendId },
|
|
50
|
+
});
|
|
51
|
+
try {
|
|
52
|
+
const meeting = (0, meeting_1.parseVoiceMeetingUrl)(options.meetingUrl);
|
|
53
|
+
const audioRouting = await (options.inspectAudioRouting ?? audio_routing_1.inspectVoiceAudioRouting)();
|
|
54
|
+
const joiner = options.meetingJoiner ?? defaultVoiceMeetingJoiner;
|
|
55
|
+
const join = await joiner.join({ meeting, audioRouting });
|
|
56
|
+
const utteranceId = options.utteranceId ?? defaultUtteranceId();
|
|
57
|
+
const transcript = await options.transcriber.transcribe({
|
|
58
|
+
utteranceId,
|
|
59
|
+
audioPath: options.audioPath,
|
|
60
|
+
language: options.language,
|
|
61
|
+
});
|
|
62
|
+
const sessionKey = options.sessionKey ?? meeting.sessionKey;
|
|
63
|
+
const runSenseTurn = options.runSenseTurn;
|
|
64
|
+
const turn = await (0, turn_1.runVoiceLoopbackTurn)({
|
|
65
|
+
agentName: options.agentName,
|
|
66
|
+
friendId: options.friendId,
|
|
67
|
+
sessionKey,
|
|
68
|
+
transcript,
|
|
69
|
+
tts: options.tts,
|
|
70
|
+
runSenseTurn,
|
|
71
|
+
});
|
|
72
|
+
const writePlaybackArtifact = options.writePlaybackArtifact ?? playback_1.writeVoicePlaybackArtifact;
|
|
73
|
+
const playback = turn.tts.status === "delivered"
|
|
74
|
+
? await writePlaybackArtifact({
|
|
75
|
+
utteranceId,
|
|
76
|
+
delivery: turn.tts,
|
|
77
|
+
outputDir: options.outputDir,
|
|
78
|
+
playAudio: options.playAudio ?? false,
|
|
79
|
+
})
|
|
80
|
+
: playbackSkipped();
|
|
81
|
+
const result = {
|
|
82
|
+
meeting,
|
|
83
|
+
audioRouting,
|
|
84
|
+
join,
|
|
85
|
+
transcript,
|
|
86
|
+
responseText: turn.responseText,
|
|
87
|
+
ponderDeferred: turn.ponderDeferred,
|
|
88
|
+
tts: turn.tts,
|
|
89
|
+
playback,
|
|
90
|
+
sessionKey,
|
|
91
|
+
};
|
|
92
|
+
(0, runtime_1.emitNervesEvent)({
|
|
93
|
+
component: "senses",
|
|
94
|
+
event: "senses.voice_golden_path_end",
|
|
95
|
+
message: "finished voice golden path",
|
|
96
|
+
meta: {
|
|
97
|
+
sessionKey,
|
|
98
|
+
joinStatus: join.status,
|
|
99
|
+
ttsStatus: turn.tts.status,
|
|
100
|
+
playbackStatus: playback.status,
|
|
101
|
+
},
|
|
102
|
+
});
|
|
103
|
+
return result;
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
107
|
+
(0, runtime_1.emitNervesEvent)({
|
|
108
|
+
level: "error",
|
|
109
|
+
component: "senses",
|
|
110
|
+
event: "senses.voice_golden_path_error",
|
|
111
|
+
message: "voice golden path failed",
|
|
112
|
+
meta: { error: message },
|
|
113
|
+
});
|
|
114
|
+
throw new Error(`voice golden path failed: ${message}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
@@ -20,3 +20,7 @@ __exportStar(require("./transcript"), exports);
|
|
|
20
20
|
__exportStar(require("./whisper"), exports);
|
|
21
21
|
__exportStar(require("./elevenlabs"), exports);
|
|
22
22
|
__exportStar(require("./turn"), exports);
|
|
23
|
+
__exportStar(require("./meeting"), exports);
|
|
24
|
+
__exportStar(require("./audio-routing"), exports);
|
|
25
|
+
__exportStar(require("./playback"), exports);
|
|
26
|
+
__exportStar(require("./golden-path"), exports);
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.redactVoiceMeetingUrl = redactVoiceMeetingUrl;
|
|
4
|
+
exports.parseVoiceMeetingUrl = parseVoiceMeetingUrl;
|
|
5
|
+
const crypto_1 = require("crypto");
|
|
6
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
7
|
+
function isRiversideHost(host) {
|
|
8
|
+
return host === "riverside.fm" || host === "riverside.com";
|
|
9
|
+
}
|
|
10
|
+
function stableMeetingHash(provider, url) {
|
|
11
|
+
return (0, crypto_1.createHash)("sha256")
|
|
12
|
+
.update(`${provider}:${url.protocol}//${url.host}${url.pathname}`)
|
|
13
|
+
.digest("hex")
|
|
14
|
+
.slice(0, 12);
|
|
15
|
+
}
|
|
16
|
+
function redactPath(pathname) {
|
|
17
|
+
const parts = pathname.split("/").filter(Boolean);
|
|
18
|
+
if (parts.length === 0)
|
|
19
|
+
return "/";
|
|
20
|
+
if (parts.length === 1)
|
|
21
|
+
return "/:redacted";
|
|
22
|
+
return `/${parts[0]}/:redacted`;
|
|
23
|
+
}
|
|
24
|
+
function redactVoiceMeetingUrl(input) {
|
|
25
|
+
try {
|
|
26
|
+
const url = new URL(input);
|
|
27
|
+
return `${url.protocol}//${url.host}${redactPath(url.pathname)}`;
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
return ":invalid";
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
function parseVoiceMeetingUrl(input) {
|
|
34
|
+
const trimmed = input.trim();
|
|
35
|
+
if (!trimmed) {
|
|
36
|
+
(0, runtime_1.emitNervesEvent)({
|
|
37
|
+
level: "error",
|
|
38
|
+
component: "senses",
|
|
39
|
+
event: "senses.voice_meeting_rejected",
|
|
40
|
+
message: "voice meeting URL is empty",
|
|
41
|
+
meta: { reason: "empty" },
|
|
42
|
+
});
|
|
43
|
+
throw new Error("voice meeting URL is empty");
|
|
44
|
+
}
|
|
45
|
+
let url;
|
|
46
|
+
try {
|
|
47
|
+
url = new URL(trimmed);
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
(0, runtime_1.emitNervesEvent)({
|
|
51
|
+
level: "error",
|
|
52
|
+
component: "senses",
|
|
53
|
+
event: "senses.voice_meeting_rejected",
|
|
54
|
+
message: "voice meeting URL is invalid",
|
|
55
|
+
meta: { reason: "invalid_url" },
|
|
56
|
+
});
|
|
57
|
+
throw new Error("voice meeting URL is invalid");
|
|
58
|
+
}
|
|
59
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
60
|
+
(0, runtime_1.emitNervesEvent)({
|
|
61
|
+
level: "error",
|
|
62
|
+
component: "senses",
|
|
63
|
+
event: "senses.voice_meeting_rejected",
|
|
64
|
+
message: "voice meeting URL must be http or https",
|
|
65
|
+
meta: { protocol: url.protocol },
|
|
66
|
+
});
|
|
67
|
+
throw new Error("voice meeting URL must be http or https");
|
|
68
|
+
}
|
|
69
|
+
const host = url.hostname.toLowerCase();
|
|
70
|
+
const provider = isRiversideHost(host) ? "riverside" : "generic";
|
|
71
|
+
if (provider === "riverside" && !url.pathname.startsWith("/studio/")) {
|
|
72
|
+
(0, runtime_1.emitNervesEvent)({
|
|
73
|
+
level: "error",
|
|
74
|
+
component: "senses",
|
|
75
|
+
event: "senses.voice_meeting_rejected",
|
|
76
|
+
message: "Riverside voice meeting URLs must use /studio/",
|
|
77
|
+
meta: { host, redactedUrl: redactVoiceMeetingUrl(trimmed) },
|
|
78
|
+
});
|
|
79
|
+
throw new Error("Riverside voice meeting URLs must use /studio/");
|
|
80
|
+
}
|
|
81
|
+
if (provider === "generic" && url.protocol !== "https:") {
|
|
82
|
+
(0, runtime_1.emitNervesEvent)({
|
|
83
|
+
level: "error",
|
|
84
|
+
component: "senses",
|
|
85
|
+
event: "senses.voice_meeting_rejected",
|
|
86
|
+
message: "generic voice meeting URLs must use https",
|
|
87
|
+
meta: { host, redactedUrl: redactVoiceMeetingUrl(trimmed) },
|
|
88
|
+
});
|
|
89
|
+
throw new Error("generic voice meeting URLs must use https");
|
|
90
|
+
}
|
|
91
|
+
const hash = stableMeetingHash(provider, url);
|
|
92
|
+
const meeting = {
|
|
93
|
+
originalUrl: trimmed,
|
|
94
|
+
provider,
|
|
95
|
+
host,
|
|
96
|
+
pathname: url.pathname,
|
|
97
|
+
redactedUrl: redactVoiceMeetingUrl(trimmed),
|
|
98
|
+
sessionKey: `voice-${provider}-${hash}`,
|
|
99
|
+
requiresBrowserJoin: true,
|
|
100
|
+
};
|
|
101
|
+
(0, runtime_1.emitNervesEvent)({
|
|
102
|
+
component: "senses",
|
|
103
|
+
event: "senses.voice_meeting_parsed",
|
|
104
|
+
message: "voice meeting URL parsed",
|
|
105
|
+
meta: {
|
|
106
|
+
provider: meeting.provider,
|
|
107
|
+
host: meeting.host,
|
|
108
|
+
sessionKey: meeting.sessionKey,
|
|
109
|
+
redactedUrl: meeting.redactedUrl,
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
return meeting;
|
|
113
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.writeVoicePlaybackArtifact = writeVoicePlaybackArtifact;
|
|
37
|
+
const fs = __importStar(require("fs/promises"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
40
|
+
const audio_routing_1 = require("./audio-routing");
|
|
41
|
+
function audioExtension(mimeType) {
|
|
42
|
+
if (mimeType === "audio/mpeg")
|
|
43
|
+
return "mp3";
|
|
44
|
+
if (mimeType === "audio/wav" || mimeType === "audio/x-wav")
|
|
45
|
+
return "wav";
|
|
46
|
+
if (mimeType.startsWith("audio/pcm"))
|
|
47
|
+
return "pcm";
|
|
48
|
+
return "audio";
|
|
49
|
+
}
|
|
50
|
+
function safeFileStem(input) {
|
|
51
|
+
const stem = input
|
|
52
|
+
.trim()
|
|
53
|
+
.toLowerCase()
|
|
54
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
55
|
+
.replace(/^-+|-+$/g, "");
|
|
56
|
+
return stem || "utterance";
|
|
57
|
+
}
|
|
58
|
+
function commandFailureMessage(exitCode, result) {
|
|
59
|
+
const stderr = result.stderr?.trim();
|
|
60
|
+
if (stderr)
|
|
61
|
+
return `exit ${exitCode}: ${stderr}`;
|
|
62
|
+
const stdout = result.stdout?.trim();
|
|
63
|
+
if (stdout)
|
|
64
|
+
return `exit ${exitCode}: ${stdout}`;
|
|
65
|
+
return `exit ${exitCode}`;
|
|
66
|
+
}
|
|
67
|
+
async function writeVoicePlaybackArtifact(request) {
|
|
68
|
+
const mkdir = request.mkdir ?? fs.mkdir;
|
|
69
|
+
const writeFile = request.writeFile ?? fs.writeFile;
|
|
70
|
+
const commandRunner = request.commandRunner ?? (0, audio_routing_1.createNodeVoiceCommandRunner)();
|
|
71
|
+
const timeoutMs = request.timeoutMs ?? 30_000;
|
|
72
|
+
const playbackCommandPath = request.playbackCommandPath ?? "afplay";
|
|
73
|
+
const audioPath = path.join(request.outputDir, `${safeFileStem(request.utteranceId)}.${audioExtension(request.delivery.mimeType)}`);
|
|
74
|
+
await mkdir(request.outputDir, { recursive: true });
|
|
75
|
+
await writeFile(audioPath, request.delivery.audio);
|
|
76
|
+
(0, runtime_1.emitNervesEvent)({
|
|
77
|
+
component: "senses",
|
|
78
|
+
event: "senses.voice_playback_artifact_written",
|
|
79
|
+
message: "voice playback artifact written",
|
|
80
|
+
meta: {
|
|
81
|
+
utteranceId: request.utteranceId,
|
|
82
|
+
audioPath,
|
|
83
|
+
byteLength: request.delivery.byteLength,
|
|
84
|
+
mimeType: request.delivery.mimeType,
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
if (request.playAudio !== true) {
|
|
88
|
+
return {
|
|
89
|
+
status: "written",
|
|
90
|
+
audioPath,
|
|
91
|
+
byteLength: request.delivery.byteLength,
|
|
92
|
+
mimeType: request.delivery.mimeType,
|
|
93
|
+
playbackAttempted: false,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
(0, runtime_1.emitNervesEvent)({
|
|
97
|
+
component: "senses",
|
|
98
|
+
event: "senses.voice_playback_start",
|
|
99
|
+
message: "starting voice playback",
|
|
100
|
+
meta: { utteranceId: request.utteranceId, audioPath, playbackCommandPath },
|
|
101
|
+
});
|
|
102
|
+
try {
|
|
103
|
+
const result = await commandRunner(playbackCommandPath, [audioPath], { timeoutMs });
|
|
104
|
+
if (typeof result.exitCode === "number" && result.exitCode !== 0) {
|
|
105
|
+
throw new Error(commandFailureMessage(result.exitCode, result));
|
|
106
|
+
}
|
|
107
|
+
(0, runtime_1.emitNervesEvent)({
|
|
108
|
+
component: "senses",
|
|
109
|
+
event: "senses.voice_playback_end",
|
|
110
|
+
message: "finished voice playback",
|
|
111
|
+
meta: { utteranceId: request.utteranceId, audioPath },
|
|
112
|
+
});
|
|
113
|
+
return {
|
|
114
|
+
status: "played",
|
|
115
|
+
audioPath,
|
|
116
|
+
byteLength: request.delivery.byteLength,
|
|
117
|
+
mimeType: request.delivery.mimeType,
|
|
118
|
+
playbackAttempted: true,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
123
|
+
(0, runtime_1.emitNervesEvent)({
|
|
124
|
+
level: "error",
|
|
125
|
+
component: "senses",
|
|
126
|
+
event: "senses.voice_playback_error",
|
|
127
|
+
message: "voice playback failed",
|
|
128
|
+
meta: { utteranceId: request.utteranceId, audioPath, error: message },
|
|
129
|
+
});
|
|
130
|
+
return {
|
|
131
|
+
status: "failed",
|
|
132
|
+
audioPath,
|
|
133
|
+
byteLength: request.delivery.byteLength,
|
|
134
|
+
mimeType: request.delivery.mimeType,
|
|
135
|
+
playbackAttempted: true,
|
|
136
|
+
error: message,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
}
|
|
@@ -34,10 +34,12 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.parseWhisperCppTranscriptJson = parseWhisperCppTranscriptJson;
|
|
37
|
+
exports.createNodeWhisperCppProcessRunner = createNodeWhisperCppProcessRunner;
|
|
37
38
|
exports.createWhisperCppTranscriber = createWhisperCppTranscriber;
|
|
38
39
|
const fs = __importStar(require("fs/promises"));
|
|
39
40
|
const os = __importStar(require("os"));
|
|
40
41
|
const path = __importStar(require("path"));
|
|
42
|
+
const child_process_1 = require("child_process");
|
|
41
43
|
const runtime_1 = require("../../nerves/runtime");
|
|
42
44
|
const transcript_1 = require("./transcript");
|
|
43
45
|
function parseWhisperCppTranscriptJson(raw) {
|
|
@@ -68,11 +70,37 @@ async function defaultMakeTempDir() {
|
|
|
68
70
|
async function defaultRemoveDir(dir) {
|
|
69
71
|
await fs.rm(dir, { recursive: true, force: true });
|
|
70
72
|
}
|
|
73
|
+
function createNodeWhisperCppProcessRunner() {
|
|
74
|
+
return (command, args, options) => new Promise((resolve, reject) => {
|
|
75
|
+
const child = (0, child_process_1.spawn)(command, args, { stdio: ["ignore", "pipe", "pipe"] });
|
|
76
|
+
const stdout = [];
|
|
77
|
+
const stderr = [];
|
|
78
|
+
const timer = setTimeout(() => {
|
|
79
|
+
child.kill("SIGTERM");
|
|
80
|
+
reject(new Error(`command timed out after ${options.timeoutMs}ms`));
|
|
81
|
+
}, options.timeoutMs);
|
|
82
|
+
child.stdout.on("data", (chunk) => stdout.push(chunk));
|
|
83
|
+
child.stderr.on("data", (chunk) => stderr.push(chunk));
|
|
84
|
+
child.on("error", (error) => {
|
|
85
|
+
clearTimeout(timer);
|
|
86
|
+
reject(error);
|
|
87
|
+
});
|
|
88
|
+
child.on("close", (exitCode) => {
|
|
89
|
+
clearTimeout(timer);
|
|
90
|
+
resolve({
|
|
91
|
+
stdout: Buffer.concat(stdout).toString("utf8"),
|
|
92
|
+
stderr: Buffer.concat(stderr).toString("utf8"),
|
|
93
|
+
exitCode: exitCode ?? 0,
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
}
|
|
71
98
|
function createWhisperCppTranscriber(options) {
|
|
72
99
|
const timeoutMs = options.timeoutMs ?? 120_000;
|
|
73
100
|
const readFile = options.readFile ?? fs.readFile;
|
|
74
101
|
const makeTempDir = options.makeTempDir ?? defaultMakeTempDir;
|
|
75
102
|
const removeDir = options.removeDir ?? defaultRemoveDir;
|
|
103
|
+
const processRunner = options.processRunner ?? createNodeWhisperCppProcessRunner();
|
|
76
104
|
return {
|
|
77
105
|
async transcribe(request) {
|
|
78
106
|
const workDir = await makeTempDir();
|
|
@@ -94,7 +122,7 @@ function createWhisperCppTranscriber(options) {
|
|
|
94
122
|
meta: { utteranceId: request.utteranceId, audioPath: request.audioPath },
|
|
95
123
|
});
|
|
96
124
|
try {
|
|
97
|
-
const result = await
|
|
125
|
+
const result = await processRunner(options.whisperCliPath, args, { timeoutMs });
|
|
98
126
|
if (typeof result.exitCode === "number" && result.exitCode !== 0) {
|
|
99
127
|
throw new Error(`exit ${result.exitCode}${result.stderr ? `: ${result.stderr}` : ""}`);
|
|
100
128
|
}
|