osborn 0.9.39 → 0.9.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +385 -31
- package/dist/recall-client.d.ts +17 -1
- package/dist/recall-client.js +56 -7
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Load environment variables FIRST before any other imports
|
|
2
2
|
import 'dotenv/config';
|
|
3
3
|
import { voice, initializeLogger } from '@livekit/agents';
|
|
4
|
-
import { Room, RoomEvent } from '@livekit/rtc-node';
|
|
4
|
+
import { Room, RoomEvent, AudioSource, AudioFrame, LocalAudioTrack, TrackPublishOptions, TrackSource, } from '@livekit/rtc-node';
|
|
5
5
|
import { AccessToken } from 'livekit-server-sdk';
|
|
6
6
|
// Initialize logger before anything else
|
|
7
7
|
initializeLogger({ pretty: true, level: 'info' });
|
|
@@ -149,6 +149,12 @@ process.on('uncaughtException', (error) => {
|
|
|
149
149
|
let currentRoomCode = null;
|
|
150
150
|
// Meeting output WebSocket — module-level so both startApiServer and main() can access it
|
|
151
151
|
let meetingOutputWs = null;
|
|
152
|
+
// Module-level AgentSession reference so /meeting-audio-in WS handler can switch
|
|
153
|
+
// the RoomIO-linked participant when meeting audio starts/stops (B2 design).
|
|
154
|
+
let activeAgentSession = null;
|
|
155
|
+
// Identity of the local user participant the session was originally listening to
|
|
156
|
+
// — captured at the moment we switch to the meeting publisher, restored on cleanup.
|
|
157
|
+
let preMeetingUserIdentity = null;
|
|
152
158
|
function sendToMeetingOutput(msg) {
|
|
153
159
|
if (meetingOutputWs && meetingOutputWs.readyState === WebSocket.OPEN) {
|
|
154
160
|
try {
|
|
@@ -960,20 +966,235 @@ function startApiServer(workingDir, port) {
|
|
|
960
966
|
cleanStaleUploadDirs();
|
|
961
967
|
setInterval(cleanStaleUploadDirs, 10 * 60 * 1000);
|
|
962
968
|
// ============================================================
|
|
963
|
-
// Meeting Output WebSocket — /meeting-audio
|
|
969
|
+
// Meeting Output WebSocket — /meeting-audio (LEGACY)
|
|
964
970
|
// ============================================================
|
|
965
|
-
// Recall's headless browser
|
|
966
|
-
//
|
|
971
|
+
// Recall's headless browser used to open meeting-output.html which connects
|
|
972
|
+
// here. With the new /meeting-bot Next.js page (Phase 2 + LiveKit), Recall
|
|
973
|
+
// points at frontend/meeting-bot instead — this handler exists only for
|
|
974
|
+
// backwards-compat with old machine images still serving the legacy path.
|
|
967
975
|
const meetingOutputWss = new WebSocketServer({ noServer: true });
|
|
968
976
|
meetingOutputWss.on('connection', (ws) => {
|
|
969
|
-
console.log('📺 Meeting output browser connected');
|
|
977
|
+
console.log('📺 Meeting output browser connected (legacy /meeting-audio)');
|
|
970
978
|
meetingOutputWs = ws;
|
|
971
979
|
ws.on('close', () => {
|
|
972
|
-
console.log('📺 Meeting output browser disconnected');
|
|
980
|
+
console.log('📺 Meeting output browser disconnected (legacy)');
|
|
973
981
|
if (meetingOutputWs === ws)
|
|
974
982
|
meetingOutputWs = null;
|
|
975
983
|
});
|
|
976
984
|
});
|
|
985
|
+
// ============================================================
|
|
986
|
+
// Recall.ai meeting-audio-in WebSocket — /meeting-audio-in
|
|
987
|
+
// ============================================================
|
|
988
|
+
// Recall.ai's per-participant real-time audio protocol. Bot is configured
|
|
989
|
+
// (in recall-client.ts joinMeeting) with audio_separate_raw + a realtime
|
|
990
|
+
// endpoint pointing at this URL. Recall sends JSON events containing
|
|
991
|
+
// base64-encoded PCM (S16LE, 16kHz, mono) for every meeting participant
|
|
992
|
+
// (bot's own audio NOT included by default — no feedback loop possible).
|
|
993
|
+
//
|
|
994
|
+
// Flow: Recall → /meeting-audio-in → open a SECOND LiveKit connection from
|
|
995
|
+
// this agent process as a publisher participant → publish PCM as an
|
|
996
|
+
// audio track in the same LiveKit room → the existing AgentSession's
|
|
997
|
+
// STT subscribes to it as a remote track → routes to currentLLM.chat()
|
|
998
|
+
// via the same pipeline as voice-native user mic.
|
|
999
|
+
//
|
|
1000
|
+
// The advantage of this design vs a parallel STT pipeline: meeting audio
|
|
1001
|
+
// becomes "just another participant" in the LiveKit room — same end-of-turn
|
|
1002
|
+
// detection, same interrupt handling, same conversation context, no parallel
|
|
1003
|
+
// chat() paths to maintain.
|
|
1004
|
+
//
|
|
1005
|
+
// Wait until activeAgentSession._roomIO exists AND the publisher participant
|
|
1006
|
+
// is visible to the agent's room. Both can race against join_meeting:
|
|
1007
|
+
// - Agent session may still be starting up when Recall connects.
|
|
1008
|
+
// - LiveKit takes a moment to propagate the publisher's join to the agent
|
|
1009
|
+
// side after publishTrack() returns on our side.
|
|
1010
|
+
// Bounded poll (200ms cadence) avoids both timing gaps.
|
|
1011
|
+
async function waitForRoomIOAndParticipant(publisherIdentity, timeoutMs) {
|
|
1012
|
+
const deadline = Date.now() + timeoutMs;
|
|
1013
|
+
let roomIO = null;
|
|
1014
|
+
let participantVisible = false;
|
|
1015
|
+
while (Date.now() < deadline) {
|
|
1016
|
+
roomIO = activeAgentSession?._roomIO;
|
|
1017
|
+
if (roomIO && typeof roomIO.setParticipant === 'function') {
|
|
1018
|
+
const agentRoom = roomIO.rtcRoom;
|
|
1019
|
+
const remotes = agentRoom?.remoteParticipants;
|
|
1020
|
+
if (remotes && typeof remotes.values === 'function') {
|
|
1021
|
+
for (const p of remotes.values()) {
|
|
1022
|
+
if (p?.identity === publisherIdentity) {
|
|
1023
|
+
participantVisible = true;
|
|
1024
|
+
break;
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
if (participantVisible)
|
|
1029
|
+
return { roomIO, participantVisible };
|
|
1030
|
+
}
|
|
1031
|
+
await new Promise(r => setTimeout(r, 200));
|
|
1032
|
+
}
|
|
1033
|
+
// Timed out — return whatever we have. Caller decides whether to proceed.
|
|
1034
|
+
return { roomIO, participantVisible };
|
|
1035
|
+
}
|
|
1036
|
+
const meetingAudioInWss = new WebSocketServer({ noServer: true });
|
|
1037
|
+
meetingAudioInWss.on('connection', async (recallWs) => {
|
|
1038
|
+
console.log('🎙️ Recall audio-in WebSocket connected — setting up LiveKit publisher');
|
|
1039
|
+
const livekitUrl = process.env.LIVEKIT_URL;
|
|
1040
|
+
const apiKey = process.env.LIVEKIT_API_KEY;
|
|
1041
|
+
const apiSecret = process.env.LIVEKIT_API_SECRET;
|
|
1042
|
+
if (!livekitUrl || !apiKey || !apiSecret) {
|
|
1043
|
+
console.warn('⚠️ LIVEKIT_URL / LIVEKIT_API_KEY / LIVEKIT_API_SECRET not set — meeting audio publisher disabled');
|
|
1044
|
+
recallWs.close();
|
|
1045
|
+
return;
|
|
1046
|
+
}
|
|
1047
|
+
if (!currentRoomCode) {
|
|
1048
|
+
console.warn('⚠️ No active LiveKit room (currentRoomCode null) — meeting audio publisher cannot attach');
|
|
1049
|
+
recallWs.close();
|
|
1050
|
+
return;
|
|
1051
|
+
}
|
|
1052
|
+
const roomName = `osborn-${currentRoomCode}`;
|
|
1053
|
+
// Mint a publisher token via livekit-server-sdk (already imported for
|
|
1054
|
+
// /api/token style flows). Long TTL — meetings can run for hours.
|
|
1055
|
+
const identity = `meeting-audio-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
1056
|
+
const at = new AccessToken(apiKey, apiSecret, {
|
|
1057
|
+
identity,
|
|
1058
|
+
ttl: 14400, // 4 hours
|
|
1059
|
+
metadata: JSON.stringify({ role: 'meeting-audio-publisher' }),
|
|
1060
|
+
});
|
|
1061
|
+
at.addGrant({ roomJoin: true, room: roomName, canPublish: true, canSubscribe: false });
|
|
1062
|
+
const token = await at.toJwt();
|
|
1063
|
+
let room = null;
|
|
1064
|
+
let source = null;
|
|
1065
|
+
let track = null;
|
|
1066
|
+
const cleanup = async () => {
|
|
1067
|
+
// Restore AgentSession STT input to the original user participant before
|
|
1068
|
+
// tearing down the publisher track. If we don't switch back, the session
|
|
1069
|
+
// will be stuck waiting on a participant that's about to disappear.
|
|
1070
|
+
try {
|
|
1071
|
+
const roomIO = activeAgentSession?._roomIO;
|
|
1072
|
+
if (roomIO && typeof roomIO.setParticipant === 'function') {
|
|
1073
|
+
if (preMeetingUserIdentity) {
|
|
1074
|
+
roomIO.setParticipant(preMeetingUserIdentity);
|
|
1075
|
+
console.log(`🔁 Restored AgentSession STT input to user: ${preMeetingUserIdentity}`);
|
|
1076
|
+
}
|
|
1077
|
+
else {
|
|
1078
|
+
roomIO.unsetParticipant();
|
|
1079
|
+
console.log('🔁 Cleared AgentSession STT input (no original user to restore)');
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
catch (err) {
|
|
1084
|
+
console.warn('⚠️ Failed to restore RoomIO participant on cleanup:', err.message);
|
|
1085
|
+
}
|
|
1086
|
+
preMeetingUserIdentity = null;
|
|
1087
|
+
try {
|
|
1088
|
+
if (track)
|
|
1089
|
+
await track.close(true);
|
|
1090
|
+
}
|
|
1091
|
+
catch { }
|
|
1092
|
+
try {
|
|
1093
|
+
if (source)
|
|
1094
|
+
await source.close();
|
|
1095
|
+
}
|
|
1096
|
+
catch { }
|
|
1097
|
+
try {
|
|
1098
|
+
if (room)
|
|
1099
|
+
await room.disconnect();
|
|
1100
|
+
}
|
|
1101
|
+
catch { }
|
|
1102
|
+
room = null;
|
|
1103
|
+
source = null;
|
|
1104
|
+
track = null;
|
|
1105
|
+
};
|
|
1106
|
+
try {
|
|
1107
|
+
room = new Room();
|
|
1108
|
+
await room.connect(livekitUrl, token);
|
|
1109
|
+
if (!room.localParticipant)
|
|
1110
|
+
throw new Error('LiveKit connected but localParticipant missing');
|
|
1111
|
+
// Recall sends S16LE PCM at 16kHz mono. AudioSource matches the format.
|
|
1112
|
+
source = new AudioSource(16000, 1);
|
|
1113
|
+
track = LocalAudioTrack.createAudioTrack('meeting-audio', source);
|
|
1114
|
+
await room.localParticipant.publishTrack(track, new TrackPublishOptions({ source: TrackSource.SOURCE_MICROPHONE }));
|
|
1115
|
+
console.log(`🎙️ Meeting audio publisher connected to ${roomName} as ${identity}`);
|
|
1116
|
+
// B2 — switch the existing AgentSession's RoomIO input from the local user
|
|
1117
|
+
// to this meeting-audio publisher. While the meeting is active, the user
|
|
1118
|
+
// talks via the meeting (Recall captures it and sends PCM here), and the
|
|
1119
|
+
// agent treats this publisher as the "speaking" participant for STT/EOT.
|
|
1120
|
+
// Original user identity is stashed so cleanup() can restore it.
|
|
1121
|
+
//
|
|
1122
|
+
// 15s timeout accommodates: session-start race (agent still booting when
|
|
1123
|
+
// user clicks "join meeting"), LiveKit participant-join propagation
|
|
1124
|
+
// (~hundreds of ms), and Fly cold-path latency on first request.
|
|
1125
|
+
try {
|
|
1126
|
+
const { roomIO, participantVisible } = await waitForRoomIOAndParticipant(identity, 15000);
|
|
1127
|
+
if (!roomIO) {
|
|
1128
|
+
console.warn('⚠️ Timed out waiting for AgentSession._roomIO (15s) — meeting audio published but STT not switched. Meeting audio will be ignored until a session starts.');
|
|
1129
|
+
}
|
|
1130
|
+
else if (!participantVisible) {
|
|
1131
|
+
// RoomIO exists but our publisher hasn't propagated to the agent's
|
|
1132
|
+
// room view yet. setParticipant stores the identity and links on
|
|
1133
|
+
// participant-connected event, so this is still safe to call —
|
|
1134
|
+
// RoomIO will pick up the link when the event arrives.
|
|
1135
|
+
preMeetingUserIdentity = roomIO.linkedParticipant?.identity ?? null;
|
|
1136
|
+
roomIO.setParticipant(identity);
|
|
1137
|
+
console.log(`🔁 Switched AgentSession STT input (publisher not yet visible — will link on connect): ${preMeetingUserIdentity ?? '(none)'} → ${identity}`);
|
|
1138
|
+
}
|
|
1139
|
+
else {
|
|
1140
|
+
preMeetingUserIdentity = roomIO.linkedParticipant?.identity ?? null;
|
|
1141
|
+
roomIO.setParticipant(identity);
|
|
1142
|
+
console.log(`🔁 Switched AgentSession STT input: ${preMeetingUserIdentity ?? '(none)'} → ${identity}`);
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
catch (err) {
|
|
1146
|
+
console.warn('⚠️ Failed to switch RoomIO participant:', err.message);
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
catch (err) {
|
|
1150
|
+
console.error('❌ Failed to set up LiveKit publisher for meeting audio:', err instanceof Error ? err.message : err);
|
|
1151
|
+
try {
|
|
1152
|
+
recallWs.close();
|
|
1153
|
+
}
|
|
1154
|
+
catch { }
|
|
1155
|
+
await cleanup();
|
|
1156
|
+
return;
|
|
1157
|
+
}
|
|
1158
|
+
// Recall → us: JSON events with base64-encoded PCM. Decode, wrap as
|
|
1159
|
+
// AudioFrame, and capture into the source. AgentSession in the main room
|
|
1160
|
+
// will subscribe to this published track and STT it via the normal pipeline.
|
|
1161
|
+
// Payload shape from
|
|
1162
|
+
// docs.recall.ai/docs/how-to-get-separate-audio-per-participant-realtime:
|
|
1163
|
+
// { event: 'audio_separate_raw.data', data: { data: { buffer: '<base64>', ... }, participant: {...} } }
|
|
1164
|
+
recallWs.on('message', async (raw) => {
|
|
1165
|
+
if (!source)
|
|
1166
|
+
return;
|
|
1167
|
+
try {
|
|
1168
|
+
const msg = JSON.parse(raw.toString());
|
|
1169
|
+
if (msg.event !== 'audio_separate_raw.data')
|
|
1170
|
+
return;
|
|
1171
|
+
const b64 = msg.data?.data?.buffer;
|
|
1172
|
+
if (!b64)
|
|
1173
|
+
return;
|
|
1174
|
+
const pcmBuf = Buffer.from(b64, 'base64');
|
|
1175
|
+
// AudioFrame expects Int16Array. The PCM buffer is S16LE — view it
|
|
1176
|
+
// directly without copy. Length / 2 = samples (each sample 2 bytes).
|
|
1177
|
+
const samplesPerChannel = pcmBuf.byteLength / 2;
|
|
1178
|
+
const int16 = new Int16Array(pcmBuf.buffer, pcmBuf.byteOffset, samplesPerChannel);
|
|
1179
|
+
const frame = new AudioFrame(int16, 16000, 1, samplesPerChannel);
|
|
1180
|
+
await source.captureFrame(frame);
|
|
1181
|
+
}
|
|
1182
|
+
catch (err) {
|
|
1183
|
+
// Don't log every frame parse failure — could be noisy if Recall sends
|
|
1184
|
+
// non-audio_separate_raw events on the same channel.
|
|
1185
|
+
if (err.message?.includes('JSON'))
|
|
1186
|
+
return;
|
|
1187
|
+
console.warn('⚠️ meeting audio capture error:', err instanceof Error ? err.message : err);
|
|
1188
|
+
}
|
|
1189
|
+
});
|
|
1190
|
+
recallWs.on('close', async () => {
|
|
1191
|
+
console.log('🎙️ Recall audio-in WebSocket closed — tearing down LiveKit publisher');
|
|
1192
|
+
await cleanup();
|
|
1193
|
+
});
|
|
1194
|
+
recallWs.on('error', (err) => {
|
|
1195
|
+
console.warn('⚠️ Recall WS error:', err instanceof Error ? err.message : err);
|
|
1196
|
+
});
|
|
1197
|
+
});
|
|
977
1198
|
server.on('upgrade', (req, socket, head) => {
|
|
978
1199
|
const url = new URL(req.url || '/', `http://localhost:${port}`);
|
|
979
1200
|
if (url.pathname === '/meeting-audio') {
|
|
@@ -981,6 +1202,11 @@ function startApiServer(workingDir, port) {
|
|
|
981
1202
|
meetingOutputWss.emit('connection', ws, req);
|
|
982
1203
|
});
|
|
983
1204
|
}
|
|
1205
|
+
else if (url.pathname === '/meeting-audio-in') {
|
|
1206
|
+
meetingAudioInWss.handleUpgrade(req, socket, head, (ws) => {
|
|
1207
|
+
meetingAudioInWss.emit('connection', ws, req);
|
|
1208
|
+
});
|
|
1209
|
+
}
|
|
984
1210
|
else {
|
|
985
1211
|
socket.destroy();
|
|
986
1212
|
}
|
|
@@ -1224,28 +1450,46 @@ async function main() {
|
|
|
1224
1450
|
let lastCompletedResearch = null;
|
|
1225
1451
|
// No manual queuing — the Claude SDK handles sequential queries internally
|
|
1226
1452
|
// ============================================================
|
|
1227
|
-
// Recall.ai — Meeting Transcript
|
|
1453
|
+
// Recall.ai — Meeting Transcript Listener
|
|
1228
1454
|
// ============================================================
|
|
1455
|
+
// NOTE: LLM-forwarding via Recall webhook STT was DISABLED in the Phase 2
|
|
1456
|
+
// LiveKit-based meeting-bot migration. Reason: Recall sends transcripts as
|
|
1457
|
+
// sentence-level fragments (e.g. "transcript.data" events fire ~once per
|
|
1458
|
+
// sentence). The old code below called currentLLM.chat() PER FRAGMENT, which
|
|
1459
|
+
// meant the agent fired ~10 chat() calls during a single user utterance —
|
|
1460
|
+
// each one prompting a separate response. The agent ended up speaking over
|
|
1461
|
+
// itself answering partial fragments.
|
|
1462
|
+
//
|
|
1463
|
+
// Phase 2 routes meeting audio through LiveKit instead (see
|
|
1464
|
+
// frontend/src/app/meeting-bot/page.tsx). The agent's existing Deepgram Flux
|
|
1465
|
+
// STT processes that audio via end-of-turn detection — ONE chat() call per
|
|
1466
|
+
// actual completed utterance, no fragment storms.
|
|
1467
|
+
//
|
|
1468
|
+
// We keep the listener registered so we have a hook for future work (e.g.
|
|
1469
|
+
// forwarding the live transcript to the frontend chat panel as a read-only
|
|
1470
|
+
// "what was said in the meeting" display, separate from the LLM input path).
|
|
1229
1471
|
const recall = getRecallClient();
|
|
1230
1472
|
if (recall) {
|
|
1231
|
-
console.log('🎥 Recall.ai client initialized (
|
|
1473
|
+
console.log('🎥 Recall.ai client initialized (webhook STT receiver — LLM forwarding disabled, see meeting-bot Phase 2)');
|
|
1232
1474
|
recall.on('transcript', ({ botId, speaker, text }) => {
|
|
1233
1475
|
console.log(`📝 Meeting transcript [${speaker}]: ${text}`);
|
|
1234
|
-
//
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1476
|
+
// INTENTIONALLY DISABLED — see comment above. Audio path is now LiveKit
|
|
1477
|
+
// → meeting-bot page publishes meeting audio → agent STT processes it.
|
|
1478
|
+
// The line below is preserved as a reference for future re-enablement
|
|
1479
|
+
// (e.g. as a display-only feature, NOT as LLM input).
|
|
1480
|
+
//
|
|
1481
|
+
// if (currentLLM && currentSession) {
|
|
1482
|
+
// const meetingText = `[Meeting — ${speaker}]: ${text}`
|
|
1483
|
+
// try {
|
|
1484
|
+
// if (currentVoiceMode === 'pipeline' || currentVoiceMode === 'direct') {
|
|
1485
|
+
// const chatCtx = new llm.ChatContext()
|
|
1486
|
+
// chatCtx.addMessage({ role: 'user', content: meetingText })
|
|
1487
|
+
// ;(currentLLM as any).chat({ chatCtx })
|
|
1488
|
+
// }
|
|
1489
|
+
// } catch (err) {
|
|
1490
|
+
// console.error('❌ Failed to route meeting transcript:', err)
|
|
1491
|
+
// }
|
|
1492
|
+
// }
|
|
1249
1493
|
});
|
|
1250
1494
|
}
|
|
1251
1495
|
// ============================================================
|
|
@@ -1656,9 +1900,34 @@ async function main() {
|
|
|
1656
1900
|
skipTTSQueue: true,
|
|
1657
1901
|
onCompactionEvent: (event) => {
|
|
1658
1902
|
try {
|
|
1659
|
-
// Forward
|
|
1660
|
-
// Spread covers compaction_started/progress/complete (different fields per type).
|
|
1903
|
+
// Forward the raw event so the dedicated banner UI can render it (if/when fixed).
|
|
1661
1904
|
sendToFrontend({ ...event });
|
|
1905
|
+
// ALSO emit as a claude_output chat bubble — reuses the existing message path
|
|
1906
|
+
// that's already working end-to-end. PreCompact → in-progress bubble.
|
|
1907
|
+
// PostCompact → completion bubble with the skills summary. The dedicated
|
|
1908
|
+
// banner has been unreliable in production (data path works on backend, banner
|
|
1909
|
+
// never appears on iPad/iPhone where dev tools aren't accessible). Chat bubbles
|
|
1910
|
+
// are visible without dev tools.
|
|
1911
|
+
if (event.type === 'compaction_started') {
|
|
1912
|
+
const triggerLabel = event.trigger ? ` (${event.trigger})` : '';
|
|
1913
|
+
sendToFrontend({
|
|
1914
|
+
type: 'claude_output',
|
|
1915
|
+
text: `🧠 _Crystallizing session memory…_${triggerLabel}`,
|
|
1916
|
+
agentRole: 'direct',
|
|
1917
|
+
});
|
|
1918
|
+
}
|
|
1919
|
+
else if (event.type === 'compaction_complete') {
|
|
1920
|
+
const ev = event;
|
|
1921
|
+
const n = ev.skillsWritten ?? 0;
|
|
1922
|
+
const names = Array.isArray(ev.skillNames) && ev.skillNames.length > 0
|
|
1923
|
+
? ` — ${ev.skillNames.join(', ')}`
|
|
1924
|
+
: '';
|
|
1925
|
+
sendToFrontend({
|
|
1926
|
+
type: 'claude_output',
|
|
1927
|
+
text: `🧠 Memory crystallized — ${n} skill${n === 1 ? '' : 's'} updated${names}.`,
|
|
1928
|
+
agentRole: 'direct',
|
|
1929
|
+
});
|
|
1930
|
+
}
|
|
1662
1931
|
}
|
|
1663
1932
|
catch { /* non-fatal */ }
|
|
1664
1933
|
},
|
|
@@ -1862,14 +2131,17 @@ async function main() {
|
|
|
1862
2131
|
const sayId = Date.now(); // simple ID to correlate start/end logs
|
|
1863
2132
|
console.log(`🗣️ [${sayId}] session.say START (${data.text.length} chars): "${data.text}"`);
|
|
1864
2133
|
// Forward spoken text + audio to meeting output page when bot is in a meeting.
|
|
1865
|
-
//
|
|
1866
|
-
//
|
|
2134
|
+
// Uses DIRECT_MODE_TTS (same OpenAI fable voice as the live session) — was
|
|
2135
|
+
// previously using directConfig.tts which falls back to DEFAULT_CONFIG.direct.tts
|
|
2136
|
+
// (Deepgram aura-2-asteria-en) when no user config exists, producing a different
|
|
2137
|
+
// voice in the meeting than what the user hears in voice-native. Both paths now
|
|
2138
|
+
// share the single source of truth.
|
|
1867
2139
|
// PCM frames are WAV-encoded and pushed as binary WebSocket frames.
|
|
1868
2140
|
// Recall captures the browser page's audio output and injects it into the meeting.
|
|
1869
2141
|
if (activeMeetingBotId) {
|
|
1870
2142
|
sendToMeetingOutput({ type: 'speak', text: data.text });
|
|
1871
2143
|
if (meetingOutputWs) {
|
|
1872
|
-
synthesizeForMeeting(data.text,
|
|
2144
|
+
synthesizeForMeeting(data.text, DIRECT_MODE_TTS).catch((err) => console.warn('⚠️ Meeting TTS error:', err));
|
|
1873
2145
|
}
|
|
1874
2146
|
}
|
|
1875
2147
|
try {
|
|
@@ -2011,9 +2283,34 @@ async function main() {
|
|
|
2011
2283
|
resumeSessionId,
|
|
2012
2284
|
onCompactionEvent: (event) => {
|
|
2013
2285
|
try {
|
|
2014
|
-
// Forward
|
|
2015
|
-
// Spread covers compaction_started/progress/complete (different fields per type).
|
|
2286
|
+
// Forward the raw event so the dedicated banner UI can render it (if/when fixed).
|
|
2016
2287
|
sendToFrontend({ ...event });
|
|
2288
|
+
// ALSO emit as a claude_output chat bubble — reuses the existing message path
|
|
2289
|
+
// that's already working end-to-end. PreCompact → in-progress bubble.
|
|
2290
|
+
// PostCompact → completion bubble with the skills summary. The dedicated
|
|
2291
|
+
// banner has been unreliable in production (data path works on backend, banner
|
|
2292
|
+
// never appears on iPad/iPhone where dev tools aren't accessible). Chat bubbles
|
|
2293
|
+
// are visible without dev tools.
|
|
2294
|
+
if (event.type === 'compaction_started') {
|
|
2295
|
+
const triggerLabel = event.trigger ? ` (${event.trigger})` : '';
|
|
2296
|
+
sendToFrontend({
|
|
2297
|
+
type: 'claude_output',
|
|
2298
|
+
text: `🧠 _Crystallizing session memory…_${triggerLabel}`,
|
|
2299
|
+
agentRole: 'direct',
|
|
2300
|
+
});
|
|
2301
|
+
}
|
|
2302
|
+
else if (event.type === 'compaction_complete') {
|
|
2303
|
+
const ev = event;
|
|
2304
|
+
const n = ev.skillsWritten ?? 0;
|
|
2305
|
+
const names = Array.isArray(ev.skillNames) && ev.skillNames.length > 0
|
|
2306
|
+
? ` — ${ev.skillNames.join(', ')}`
|
|
2307
|
+
: '';
|
|
2308
|
+
sendToFrontend({
|
|
2309
|
+
type: 'claude_output',
|
|
2310
|
+
text: `🧠 Memory crystallized — ${n} skill${n === 1 ? '' : 's'} updated${names}.`,
|
|
2311
|
+
agentRole: 'direct',
|
|
2312
|
+
});
|
|
2313
|
+
}
|
|
2017
2314
|
}
|
|
2018
2315
|
catch { /* non-fatal */ }
|
|
2019
2316
|
},
|
|
@@ -2559,6 +2856,7 @@ async function main() {
|
|
|
2559
2856
|
}
|
|
2560
2857
|
lastCompletedResearch = null;
|
|
2561
2858
|
currentSession = null;
|
|
2859
|
+
activeAgentSession = null;
|
|
2562
2860
|
currentAgent = null;
|
|
2563
2861
|
// Same disconnect-leak fix as the other two cleanup sites — kill the Claude SDK
|
|
2564
2862
|
// subprocess BEFORE dropping the reference. See killCurrentLLM() for full context.
|
|
@@ -2604,6 +2902,7 @@ async function main() {
|
|
|
2604
2902
|
}
|
|
2605
2903
|
catch { }
|
|
2606
2904
|
currentSession = null;
|
|
2905
|
+
activeAgentSession = null;
|
|
2607
2906
|
currentAgent = null;
|
|
2608
2907
|
// Same disconnect-leak fix — kill the previous user's Claude subprocess
|
|
2609
2908
|
// before binding currentLLM to the new user's session below.
|
|
@@ -2758,6 +3057,7 @@ async function main() {
|
|
|
2758
3057
|
agent = result.agent;
|
|
2759
3058
|
}
|
|
2760
3059
|
currentSession = session;
|
|
3060
|
+
activeAgentSession = session;
|
|
2761
3061
|
currentAgent = agent; // Store for updateChatCtx() context injection
|
|
2762
3062
|
// ============================================================
|
|
2763
3063
|
// Session event wiring — extracted into function for auto-recovery
|
|
@@ -2917,6 +3217,7 @@ async function main() {
|
|
|
2917
3217
|
}
|
|
2918
3218
|
catch { }
|
|
2919
3219
|
currentSession = null;
|
|
3220
|
+
activeAgentSession = null;
|
|
2920
3221
|
currentAgent = null;
|
|
2921
3222
|
// Clear stale state from crashed session
|
|
2922
3223
|
voiceQueue.length = 0;
|
|
@@ -2978,6 +3279,7 @@ async function main() {
|
|
|
2978
3279
|
const newSession = result.session;
|
|
2979
3280
|
const newAgent = result.agent;
|
|
2980
3281
|
currentSession = newSession;
|
|
3282
|
+
activeAgentSession = newSession;
|
|
2981
3283
|
currentAgent = newAgent;
|
|
2982
3284
|
// Re-wire event listeners on the new session
|
|
2983
3285
|
wireSessionEvents(newSession, newAgent);
|
|
@@ -3034,6 +3336,7 @@ async function main() {
|
|
|
3034
3336
|
}
|
|
3035
3337
|
catch { }
|
|
3036
3338
|
currentSession = null;
|
|
3339
|
+
activeAgentSession = null;
|
|
3037
3340
|
currentAgent = null;
|
|
3038
3341
|
// Clear voice queue — stale injections from the crashed session
|
|
3039
3342
|
voiceQueue.length = 0;
|
|
@@ -3057,6 +3360,7 @@ async function main() {
|
|
|
3057
3360
|
const newSession = result.session;
|
|
3058
3361
|
const newAgent = result.agent;
|
|
3059
3362
|
currentSession = newSession;
|
|
3363
|
+
activeAgentSession = newSession;
|
|
3060
3364
|
currentAgent = newAgent;
|
|
3061
3365
|
// Re-wire event listeners on the new session
|
|
3062
3366
|
wireSessionEvents(newSession, newAgent);
|
|
@@ -3251,6 +3555,7 @@ async function main() {
|
|
|
3251
3555
|
if (currentSession) {
|
|
3252
3556
|
const sessionToClose = currentSession;
|
|
3253
3557
|
currentSession = null;
|
|
3558
|
+
activeAgentSession = null;
|
|
3254
3559
|
// Track async close so new connections can wait for byte stream handler to be released
|
|
3255
3560
|
pendingSessionClose = (async () => {
|
|
3256
3561
|
try {
|
|
@@ -3855,8 +4160,57 @@ async function main() {
|
|
|
3855
4160
|
(process.env.FLY_APP_NAME
|
|
3856
4161
|
? `https://${process.env.FLY_APP_NAME}.fly.dev`
|
|
3857
4162
|
: `http://localhost:${apiPort}`);
|
|
4163
|
+
// Try to mint a LiveKit bot token + construct the frontend-hosted
|
|
4164
|
+
// meeting-bot page URL. The bot page joins the same LiveKit room
|
|
4165
|
+
// as this agent so meeting audio flows through LiveKit directly
|
|
4166
|
+
// (no agent-side WebSocket+WAV pipe). Falls back to the legacy
|
|
4167
|
+
// /meeting-output webpage if no frontend URL is resolvable, so
|
|
4168
|
+
// the old code path keeps working during the migration window.
|
|
4169
|
+
//
|
|
4170
|
+
// Frontend URL resolution (in priority order):
|
|
4171
|
+
// 1. data.frontendBase — the public URL the user's browser is on,
|
|
4172
|
+
// passed through the join_meeting data channel message. Works
|
|
4173
|
+
// automatically for localhost dev + production without any
|
|
4174
|
+
// env var.
|
|
4175
|
+
// 2. OSBORN_FRONTEND_URL — existing convention from sprites.ts
|
|
4176
|
+
// (frontend/src/lib/sprites.ts:241) that injects the public
|
|
4177
|
+
// frontend URL into sandbox env vars. Defense in depth.
|
|
4178
|
+
//
|
|
4179
|
+
// Auth: the endpoint uses LiveKit room-presence as the auth check
|
|
4180
|
+
// — no shared secret needed. The agent must already be in the
|
|
4181
|
+
// requested room (which it is by this point) for the mint to
|
|
4182
|
+
// succeed.
|
|
4183
|
+
let outputPageUrl;
|
|
4184
|
+
const frontendUrl = data.frontendBase
|
|
4185
|
+
|| process.env.OSBORN_FRONTEND_URL;
|
|
4186
|
+
if (frontendUrl) {
|
|
4187
|
+
try {
|
|
4188
|
+
const botLkId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
4189
|
+
const tokenRes = await fetch(`${frontendUrl}/api/meeting-bot-token`, {
|
|
4190
|
+
method: 'POST',
|
|
4191
|
+
headers: { 'Content-Type': 'application/json' },
|
|
4192
|
+
body: JSON.stringify({ botId: botLkId, roomName }),
|
|
4193
|
+
});
|
|
4194
|
+
if (tokenRes.ok) {
|
|
4195
|
+
const { token, url } = await tokenRes.json();
|
|
4196
|
+
const params = new URLSearchParams({ token, url, room: roomName, botId: botLkId });
|
|
4197
|
+
outputPageUrl = `${frontendUrl}/meeting-bot?${params.toString()}`;
|
|
4198
|
+
console.log(`🎫 Meeting-bot token minted for room=${roomName} bot=${botLkId}`);
|
|
4199
|
+
}
|
|
4200
|
+
else {
|
|
4201
|
+
const errText = await tokenRes.text().catch(() => '');
|
|
4202
|
+
console.warn(`⚠️ meeting-bot-token mint failed (HTTP ${tokenRes.status}: ${errText.substring(0, 120)}) — falling back to legacy /meeting-output path`);
|
|
4203
|
+
}
|
|
4204
|
+
}
|
|
4205
|
+
catch (mintErr) {
|
|
4206
|
+
console.warn(`⚠️ meeting-bot-token mint threw — falling back: ${mintErr.message}`);
|
|
4207
|
+
}
|
|
4208
|
+
}
|
|
4209
|
+
else {
|
|
4210
|
+
console.log('ℹ️ No frontend URL (data.frontendBase + OSBORN_FRONTEND_URL both empty) — using legacy /meeting-output path');
|
|
4211
|
+
}
|
|
3858
4212
|
await sendToFrontend({ type: 'meeting_joining', message: 'Osborn is joining your meeting...' });
|
|
3859
|
-
const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase);
|
|
4213
|
+
const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase, { outputPageUrl });
|
|
3860
4214
|
const sessionId = currentLLM?.sessionId || currentResumeSessionId || 'default';
|
|
3861
4215
|
recallJoin.registerBot(botId, sessionId);
|
|
3862
4216
|
activeMeetingBotId = botId;
|
package/dist/recall-client.d.ts
CHANGED
|
@@ -36,7 +36,23 @@ export interface TranscriptPayload {
|
|
|
36
36
|
export declare class RecallClient extends EventEmitter {
|
|
37
37
|
#private;
|
|
38
38
|
constructor(apiKey: string);
|
|
39
|
-
|
|
39
|
+
/**
|
|
40
|
+
* Join a meeting via Recall.ai.
|
|
41
|
+
*
|
|
42
|
+
* @param meetingUrl Zoom / Google Meet / Teams URL the bot should dial in to
|
|
43
|
+
* @param webhookBaseUrl Base URL for the agent's HTTP endpoints (transcript webhook)
|
|
44
|
+
* @param opts.outputPageUrl Full URL for the bot's camera/audio page. If provided,
|
|
45
|
+
* replaces the default `${webhookBaseUrl}/meeting-output`.
|
|
46
|
+
* Used to point at the frontend-hosted /meeting-bot page
|
|
47
|
+
* with token + room embedded as query params, so the page
|
|
48
|
+
* connects to LiveKit and audio flows through the same
|
|
49
|
+
* room as the osborn agent (no separate WebSocket+WAV pipe).
|
|
50
|
+
* @param opts.botName Display name of the bot in the meeting
|
|
51
|
+
*/
|
|
52
|
+
joinMeeting(meetingUrl: string, webhookBaseUrl: string, opts?: {
|
|
53
|
+
outputPageUrl?: string;
|
|
54
|
+
botName?: string;
|
|
55
|
+
}): Promise<string>;
|
|
40
56
|
leaveMeeting(botId: string): Promise<void>;
|
|
41
57
|
getBotStatus(botId: string): Promise<string>;
|
|
42
58
|
handleWebhook(payload: TranscriptPayload): void;
|
package/dist/recall-client.js
CHANGED
|
@@ -8,7 +8,22 @@ export class RecallClient extends EventEmitter {
|
|
|
8
8
|
super();
|
|
9
9
|
this.#apiKey = apiKey;
|
|
10
10
|
}
|
|
11
|
-
|
|
11
|
+
/**
|
|
12
|
+
* Join a meeting via Recall.ai.
|
|
13
|
+
*
|
|
14
|
+
* @param meetingUrl Zoom / Google Meet / Teams URL the bot should dial in to
|
|
15
|
+
* @param webhookBaseUrl Base URL for the agent's HTTP endpoints (transcript webhook)
|
|
16
|
+
* @param opts.outputPageUrl Full URL for the bot's camera/audio page. If provided,
|
|
17
|
+
* replaces the default `${webhookBaseUrl}/meeting-output`.
|
|
18
|
+
* Used to point at the frontend-hosted /meeting-bot page
|
|
19
|
+
* with token + room embedded as query params, so the page
|
|
20
|
+
* connects to LiveKit and audio flows through the same
|
|
21
|
+
* room as the osborn agent (no separate WebSocket+WAV pipe).
|
|
22
|
+
* @param opts.botName Display name of the bot in the meeting
|
|
23
|
+
*/
|
|
24
|
+
async joinMeeting(meetingUrl, webhookBaseUrl, opts) {
|
|
25
|
+
const botName = opts?.botName ?? 'Osborn';
|
|
26
|
+
const outputPageUrl = opts?.outputPageUrl ?? `${webhookBaseUrl}/meeting-output`;
|
|
12
27
|
// Authoritative structure per https://docs.recall.ai/reference/bot_create
|
|
13
28
|
// and https://docs.recall.ai/docs/real-time-transcription:
|
|
14
29
|
//
|
|
@@ -20,6 +35,23 @@ export class RecallClient extends EventEmitter {
|
|
|
20
35
|
// - `url` and `events` are flat on the endpoint object (NOT nested under `config`)
|
|
21
36
|
// - `transcription_options` does NOT exist — use `transcript.provider`
|
|
22
37
|
// - Both transcript.provider AND realtime_endpoints must be set, or no events delivered
|
|
38
|
+
//
|
|
39
|
+
// ARCHITECTURE (post-2026-05-22 redesign):
|
|
40
|
+
// Input (meeting → osborn): Recall's documented WebSocket audio protocol.
|
|
41
|
+
// `audio_separate_raw` config + websocket realtime endpoint streams
|
|
42
|
+
// per-participant PCM (S16LE 16kHz mono, base64 in JSON) to the agent's
|
|
43
|
+
// /meeting-audio-in WS handler. Bot's own audio is excluded by default
|
|
44
|
+
// → zero possibility of feedback loop, no echo cancellation needed.
|
|
45
|
+
// Output (osborn → meeting): webpage output_media (LiveKit-on-page). Bot
|
|
46
|
+
// page subscribes to osborn's LiveKit audio track and plays it via
|
|
47
|
+
// track.attach(); Recall captures the page's audio output and injects
|
|
48
|
+
// into the meeting.
|
|
49
|
+
// Webhook transcripts (transcript.data): retained as a SECONDARY signal —
|
|
50
|
+
// the agent index.ts handler for this event currently logs but does NOT
|
|
51
|
+
// forward to the LLM (intentionally disabled). The Deepgram WS path
|
|
52
|
+
// above is the LLM input.
|
|
53
|
+
const httpBase = webhookBaseUrl.replace(/\/$/, '');
|
|
54
|
+
const wsBase = httpBase.replace(/^https?:\/\//, m => m === 'https://' ? 'wss://' : 'ws://');
|
|
23
55
|
const res = await fetch(`${RECALL_BASE_URL}/bot`, {
|
|
24
56
|
method: 'POST',
|
|
25
57
|
headers: {
|
|
@@ -34,25 +66,42 @@ export class RecallClient extends EventEmitter {
|
|
|
34
66
|
provider: {
|
|
35
67
|
// recallai_streaming is built-in — no external API key needed,
|
|
36
68
|
// low-latency, works across all meeting platforms.
|
|
69
|
+
// Kept for the secondary webhook signal (display / future use);
|
|
70
|
+
// LLM input now comes from the Deepgram WS pipe below.
|
|
37
71
|
recallai_streaming: {
|
|
38
72
|
mode: 'prioritize_low_latency',
|
|
39
73
|
language_code: 'en',
|
|
40
74
|
},
|
|
41
75
|
},
|
|
42
76
|
},
|
|
43
|
-
|
|
77
|
+
// Per-participant raw PCM audio stream. Bot's own audio is excluded
|
|
78
|
+
// (we don't set include_bot_in_recording.audio:true).
|
|
79
|
+
audio_separate_raw: {},
|
|
80
|
+
realtime_endpoints: [
|
|
81
|
+
{
|
|
82
|
+
// Transcript webhook (secondary signal; LLM forwarding disabled).
|
|
44
83
|
type: 'webhook',
|
|
45
|
-
url: `${
|
|
84
|
+
url: `${httpBase}/webhook/recall`,
|
|
46
85
|
events: ['transcript.data'],
|
|
47
|
-
}
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
// Per-participant PCM audio → agent's Deepgram STT pipe.
|
|
89
|
+
type: 'websocket',
|
|
90
|
+
url: `${wsBase}/meeting-audio-in`,
|
|
91
|
+
events: ['audio_separate_raw.data'],
|
|
92
|
+
},
|
|
93
|
+
],
|
|
48
94
|
},
|
|
49
95
|
output_media: {
|
|
50
96
|
camera: {
|
|
51
97
|
// `kind` (not `type`) — confirmed from prior debugging.
|
|
52
|
-
//
|
|
98
|
+
// The page Recall renders connects to LiveKit and plays osborn's
|
|
99
|
+
// TTS audio via track.attach(); Recall captures the page audio.
|
|
100
|
+
// The page does NOT call getUserMedia anymore — input now comes
|
|
101
|
+
// from the audio_separate_raw WebSocket above.
|
|
53
102
|
kind: 'webpage',
|
|
54
103
|
config: {
|
|
55
|
-
url:
|
|
104
|
+
url: outputPageUrl,
|
|
56
105
|
},
|
|
57
106
|
},
|
|
58
107
|
},
|
|
@@ -63,7 +112,7 @@ export class RecallClient extends EventEmitter {
|
|
|
63
112
|
throw new Error(`Recall.ai join failed: ${res.status} ${err}`);
|
|
64
113
|
}
|
|
65
114
|
const bot = (await res.json());
|
|
66
|
-
console.log(`🤖 Recall.ai bot joined meeting: ${bot.id}`);
|
|
115
|
+
console.log(`🤖 Recall.ai bot joined meeting: ${bot.id} (output page: ${outputPageUrl})`);
|
|
67
116
|
return bot.id;
|
|
68
117
|
}
|
|
69
118
|
async leaveMeeting(botId) {
|