osborn 0.9.38 → 0.9.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +164 -78
- package/dist/recall-client.d.ts +17 -1
- package/dist/recall-client.js +22 -4
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Load environment variables FIRST before any other imports
|
|
2
2
|
import 'dotenv/config';
|
|
3
3
|
import { voice, initializeLogger } from '@livekit/agents';
|
|
4
|
-
import { Room, RoomEvent
|
|
4
|
+
import { Room, RoomEvent } from '@livekit/rtc-node';
|
|
5
5
|
import { AccessToken } from 'livekit-server-sdk';
|
|
6
6
|
// Initialize logger before anything else
|
|
7
7
|
initializeLogger({ pretty: true, level: 'info' });
|
|
@@ -1224,28 +1224,46 @@ async function main() {
|
|
|
1224
1224
|
let lastCompletedResearch = null;
|
|
1225
1225
|
// No manual queuing — the Claude SDK handles sequential queries internally
|
|
1226
1226
|
// ============================================================
|
|
1227
|
-
// Recall.ai — Meeting Transcript
|
|
1227
|
+
// Recall.ai — Meeting Transcript Listener
|
|
1228
1228
|
// ============================================================
|
|
1229
|
+
// NOTE: LLM-forwarding via Recall webhook STT was DISABLED in the Phase 2
|
|
1230
|
+
// LiveKit-based meeting-bot migration. Reason: Recall sends transcripts as
|
|
1231
|
+
// sentence-level fragments (e.g. "transcript.data" events fire ~once per
|
|
1232
|
+
// sentence). The old code below called currentLLM.chat() PER FRAGMENT, which
|
|
1233
|
+
// meant the agent fired ~10 chat() calls during a single user utterance —
|
|
1234
|
+
// each one prompting a separate response. The agent ended up speaking over
|
|
1235
|
+
// itself answering partial fragments.
|
|
1236
|
+
//
|
|
1237
|
+
// Phase 2 routes meeting audio through LiveKit instead (see
|
|
1238
|
+
// frontend/src/app/meeting-bot/page.tsx). The agent's existing Deepgram Flux
|
|
1239
|
+
// STT processes that audio via end-of-turn detection — ONE chat() call per
|
|
1240
|
+
// actual completed utterance, no fragment storms.
|
|
1241
|
+
//
|
|
1242
|
+
// We keep the listener registered so we have a hook for future work (e.g.
|
|
1243
|
+
// forwarding the live transcript to the frontend chat panel as a read-only
|
|
1244
|
+
// "what was said in the meeting" display, separate from the LLM input path).
|
|
1229
1245
|
const recall = getRecallClient();
|
|
1230
1246
|
if (recall) {
|
|
1231
|
-
console.log('🎥 Recall.ai client initialized (
|
|
1247
|
+
console.log('🎥 Recall.ai client initialized (webhook STT receiver — LLM forwarding disabled, see meeting-bot Phase 2)');
|
|
1232
1248
|
recall.on('transcript', ({ botId, speaker, text }) => {
|
|
1233
1249
|
console.log(`📝 Meeting transcript [${speaker}]: ${text}`);
|
|
1234
|
-
//
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1250
|
+
// INTENTIONALLY DISABLED — see comment above. Audio path is now LiveKit
|
|
1251
|
+
// → meeting-bot page publishes meeting audio → agent STT processes it.
|
|
1252
|
+
// The line below is preserved as a reference for future re-enablement
|
|
1253
|
+
// (e.g. as a display-only feature, NOT as LLM input).
|
|
1254
|
+
//
|
|
1255
|
+
// if (currentLLM && currentSession) {
|
|
1256
|
+
// const meetingText = `[Meeting — ${speaker}]: ${text}`
|
|
1257
|
+
// try {
|
|
1258
|
+
// if (currentVoiceMode === 'pipeline' || currentVoiceMode === 'direct') {
|
|
1259
|
+
// const chatCtx = new llm.ChatContext()
|
|
1260
|
+
// chatCtx.addMessage({ role: 'user', content: meetingText })
|
|
1261
|
+
// ;(currentLLM as any).chat({ chatCtx })
|
|
1262
|
+
// }
|
|
1263
|
+
// } catch (err) {
|
|
1264
|
+
// console.error('❌ Failed to route meeting transcript:', err)
|
|
1265
|
+
// }
|
|
1266
|
+
// }
|
|
1249
1267
|
});
|
|
1250
1268
|
}
|
|
1251
1269
|
// ============================================================
|
|
@@ -1656,9 +1674,34 @@ async function main() {
|
|
|
1656
1674
|
skipTTSQueue: true,
|
|
1657
1675
|
onCompactionEvent: (event) => {
|
|
1658
1676
|
try {
|
|
1659
|
-
// Forward
|
|
1660
|
-
// Spread covers compaction_started/progress/complete (different fields per type).
|
|
1677
|
+
// Forward the raw event so the dedicated banner UI can render it (if/when fixed).
|
|
1661
1678
|
sendToFrontend({ ...event });
|
|
1679
|
+
// ALSO emit as a claude_output chat bubble — reuses the existing message path
|
|
1680
|
+
// that's already working end-to-end. PreCompact → in-progress bubble.
|
|
1681
|
+
// PostCompact → completion bubble with the skills summary. The dedicated
|
|
1682
|
+
// banner has been unreliable in production (data path works on backend, banner
|
|
1683
|
+
// never appears on iPad/iPhone where dev tools aren't accessible). Chat bubbles
|
|
1684
|
+
// are visible without dev tools.
|
|
1685
|
+
if (event.type === 'compaction_started') {
|
|
1686
|
+
const triggerLabel = event.trigger ? ` (${event.trigger})` : '';
|
|
1687
|
+
sendToFrontend({
|
|
1688
|
+
type: 'claude_output',
|
|
1689
|
+
text: `🧠 _Crystallizing session memory…_${triggerLabel}`,
|
|
1690
|
+
agentRole: 'direct',
|
|
1691
|
+
});
|
|
1692
|
+
}
|
|
1693
|
+
else if (event.type === 'compaction_complete') {
|
|
1694
|
+
const ev = event;
|
|
1695
|
+
const n = ev.skillsWritten ?? 0;
|
|
1696
|
+
const names = Array.isArray(ev.skillNames) && ev.skillNames.length > 0
|
|
1697
|
+
? ` — ${ev.skillNames.join(', ')}`
|
|
1698
|
+
: '';
|
|
1699
|
+
sendToFrontend({
|
|
1700
|
+
type: 'claude_output',
|
|
1701
|
+
text: `🧠 Memory crystallized — ${n} skill${n === 1 ? '' : 's'} updated${names}.`,
|
|
1702
|
+
agentRole: 'direct',
|
|
1703
|
+
});
|
|
1704
|
+
}
|
|
1662
1705
|
}
|
|
1663
1706
|
catch { /* non-fatal */ }
|
|
1664
1707
|
},
|
|
@@ -1862,14 +1905,17 @@ async function main() {
|
|
|
1862
1905
|
const sayId = Date.now(); // simple ID to correlate start/end logs
|
|
1863
1906
|
console.log(`🗣️ [${sayId}] session.say START (${data.text.length} chars): "${data.text}"`);
|
|
1864
1907
|
// Forward spoken text + audio to meeting output page when bot is in a meeting.
|
|
1865
|
-
//
|
|
1866
|
-
//
|
|
1908
|
+
// Uses DIRECT_MODE_TTS (same OpenAI fable voice as the live session) — was
|
|
1909
|
+
// previously using directConfig.tts which falls back to DEFAULT_CONFIG.direct.tts
|
|
1910
|
+
// (Deepgram aura-2-asteria-en) when no user config exists, producing a different
|
|
1911
|
+
// voice in the meeting than what the user hears in voice-native. Both paths now
|
|
1912
|
+
// share the single source of truth.
|
|
1867
1913
|
// PCM frames are WAV-encoded and pushed as binary WebSocket frames.
|
|
1868
1914
|
// Recall captures the browser page's audio output and injects it into the meeting.
|
|
1869
1915
|
if (activeMeetingBotId) {
|
|
1870
1916
|
sendToMeetingOutput({ type: 'speak', text: data.text });
|
|
1871
1917
|
if (meetingOutputWs) {
|
|
1872
|
-
synthesizeForMeeting(data.text,
|
|
1918
|
+
synthesizeForMeeting(data.text, DIRECT_MODE_TTS).catch((err) => console.warn('⚠️ Meeting TTS error:', err));
|
|
1873
1919
|
}
|
|
1874
1920
|
}
|
|
1875
1921
|
try {
|
|
@@ -2011,9 +2057,34 @@ async function main() {
|
|
|
2011
2057
|
resumeSessionId,
|
|
2012
2058
|
onCompactionEvent: (event) => {
|
|
2013
2059
|
try {
|
|
2014
|
-
// Forward
|
|
2015
|
-
// Spread covers compaction_started/progress/complete (different fields per type).
|
|
2060
|
+
// Forward the raw event so the dedicated banner UI can render it (if/when fixed).
|
|
2016
2061
|
sendToFrontend({ ...event });
|
|
2062
|
+
// ALSO emit as a claude_output chat bubble — reuses the existing message path
|
|
2063
|
+
// that's already working end-to-end. PreCompact → in-progress bubble.
|
|
2064
|
+
// PostCompact → completion bubble with the skills summary. The dedicated
|
|
2065
|
+
// banner has been unreliable in production (data path works on backend, banner
|
|
2066
|
+
// never appears on iPad/iPhone where dev tools aren't accessible). Chat bubbles
|
|
2067
|
+
// are visible without dev tools.
|
|
2068
|
+
if (event.type === 'compaction_started') {
|
|
2069
|
+
const triggerLabel = event.trigger ? ` (${event.trigger})` : '';
|
|
2070
|
+
sendToFrontend({
|
|
2071
|
+
type: 'claude_output',
|
|
2072
|
+
text: `🧠 _Crystallizing session memory…_${triggerLabel}`,
|
|
2073
|
+
agentRole: 'direct',
|
|
2074
|
+
});
|
|
2075
|
+
}
|
|
2076
|
+
else if (event.type === 'compaction_complete') {
|
|
2077
|
+
const ev = event;
|
|
2078
|
+
const n = ev.skillsWritten ?? 0;
|
|
2079
|
+
const names = Array.isArray(ev.skillNames) && ev.skillNames.length > 0
|
|
2080
|
+
? ` — ${ev.skillNames.join(', ')}`
|
|
2081
|
+
: '';
|
|
2082
|
+
sendToFrontend({
|
|
2083
|
+
type: 'claude_output',
|
|
2084
|
+
text: `🧠 Memory crystallized — ${n} skill${n === 1 ? '' : 's'} updated${names}.`,
|
|
2085
|
+
agentRole: 'direct',
|
|
2086
|
+
});
|
|
2087
|
+
}
|
|
2017
2088
|
}
|
|
2018
2089
|
catch { /* non-fatal */ }
|
|
2019
2090
|
},
|
|
@@ -2530,51 +2601,16 @@ async function main() {
|
|
|
2530
2601
|
console.log('✅ Connected to room:', roomName);
|
|
2531
2602
|
localParticipant = room.localParticipant;
|
|
2532
2603
|
});
|
|
2533
|
-
//
|
|
2534
|
-
//
|
|
2535
|
-
//
|
|
2536
|
-
//
|
|
2537
|
-
//
|
|
2538
|
-
//
|
|
2539
|
-
//
|
|
2540
|
-
//
|
|
2541
|
-
//
|
|
2542
|
-
//
|
|
2543
|
-
//
|
|
2544
|
-
// Filter is `instanceof RemoteParticipant`. The agent IS the LocalParticipant in this
|
|
2545
|
-
// room, and when its TTS plays it appears in the active-speakers list too. An earlier
|
|
2546
|
-
// attempt that compared `s.identity !== room.localParticipant?.identity` failed because
|
|
2547
|
-
// localParticipant.identity could be undefined at event-fire time, letting the agent's
|
|
2548
|
-
// own speech trigger a self-interrupt. The type check is bulletproof.
|
|
2549
|
-
//
|
|
2550
|
-
// Realtime mode skipped — the SDK handles interruption internally there, and manual
|
|
2551
|
-
// interrupt for Gemini realtime crashes its state machine (code 1008, memory v0.4.5).
|
|
2552
|
-
let lastActiveSpeakerInterruptAt = 0;
|
|
2553
|
-
room.on(RoomEvent.ActiveSpeakersChanged, (speakers) => {
|
|
2554
|
-
if (!Array.isArray(speakers) || speakers.length === 0)
|
|
2555
|
-
return;
|
|
2556
|
-
const remoteSpeakers = speakers.filter((s) => s instanceof RemoteParticipant);
|
|
2557
|
-
if (remoteSpeakers.length === 0)
|
|
2558
|
-
return;
|
|
2559
|
-
if (currentVoiceMode === 'realtime')
|
|
2560
|
-
return;
|
|
2561
|
-
if (agentState !== 'speaking')
|
|
2562
|
-
return;
|
|
2563
|
-
const now = Date.now();
|
|
2564
|
-
const debounced = now - lastActiveSpeakerInterruptAt < 1000;
|
|
2565
|
-
lastActiveSpeakerInterruptAt = now;
|
|
2566
|
-
try {
|
|
2567
|
-
if (!debounced) {
|
|
2568
|
-
const ids = remoteSpeakers.map((s) => s.identity).join(',');
|
|
2569
|
-
console.log(`🎤 ActiveSpeakersChanged: remote speakers [${ids}] + agent speaking → interrupting TTS`);
|
|
2570
|
-
}
|
|
2571
|
-
currentSession?.interrupt();
|
|
2572
|
-
}
|
|
2573
|
-
catch (err) {
|
|
2574
|
-
if (!debounced)
|
|
2575
|
-
console.warn('⚠️ active-speaker interrupt failed:', err instanceof Error ? err.message : err);
|
|
2576
|
-
}
|
|
2577
|
-
});
|
|
2604
|
+
// NOTE: previously this section also had a RoomEvent.ActiveSpeakersChanged
|
|
2605
|
+
// handler that interrupted TTS on any sustained audio activity (~50ms after
|
|
2606
|
+
// mic onset). That fired too eagerly — coughs, paper rustles, the agent's
|
|
2607
|
+
// own TTS bleeding through the mic, and other non-speech sounds tripped it
|
|
2608
|
+
// ~10-15% of the time, leaving the agent silent with no recovery path
|
|
2609
|
+
// (because no STT transcript would follow). Dropped in favor of the
|
|
2610
|
+
// user_state_changed → 'speaking' handler below, which is fed by Deepgram
|
|
2611
|
+
// Flux STT's speech-vs-noise classification: slower (~100-300ms) but
|
|
2612
|
+
// confidence-aware. The latency tradeoff is worth eliminating the false
|
|
2613
|
+
// interrupts at the root.
|
|
2578
2614
|
room.on(RoomEvent.Disconnected, () => {
|
|
2579
2615
|
console.log('👋 Disconnected from room');
|
|
2580
2616
|
// Clean up active research and voice queue
|
|
@@ -2868,19 +2904,20 @@ async function main() {
|
|
|
2868
2904
|
}
|
|
2869
2905
|
});
|
|
2870
2906
|
// User state tracking — prevents queue from colliding with server-side VAD.
|
|
2871
|
-
// Also
|
|
2872
|
-
//
|
|
2873
|
-
//
|
|
2874
|
-
//
|
|
2875
|
-
//
|
|
2876
|
-
//
|
|
2907
|
+
// Also the PRIMARY interrupt trigger now that the over-eager ActiveSpeakersChanged
|
|
2908
|
+
// path is gone. Fires when Deepgram Flux STT classifies frames as speech (not noise)
|
|
2909
|
+
// and propagates via agent_activity.onStartOfSpeech → _updateUserState('speaking').
|
|
2910
|
+
// Latency ~100-300ms after mic onset, which is the cost of confidence-aware
|
|
2911
|
+
// detection — vs the prior ActiveSpeakers handler that fired at ~50ms on any audio
|
|
2912
|
+
// activity and tripped ~10-15% false interrupts on coughs, paper rustle, agent's
|
|
2913
|
+
// own TTS bleeding through the mic, etc.
|
|
2877
2914
|
sess.on('user_state_changed', (ev) => {
|
|
2878
2915
|
const prev = userState;
|
|
2879
2916
|
userState = ev.newState;
|
|
2880
2917
|
console.log(`👤 User state: ${prev} → ${ev.newState} (agent: ${agentState})`);
|
|
2881
2918
|
if (ev.newState === 'speaking' && agentState === 'speaking' && sessionVoiceMode !== 'realtime') {
|
|
2882
2919
|
try {
|
|
2883
|
-
console.log('🎤 user_state_changed=speaking + agent speaking → interrupting TTS
|
|
2920
|
+
console.log('🎤 user_state_changed=speaking + agent speaking → interrupting TTS');
|
|
2884
2921
|
currentSession?.interrupt();
|
|
2885
2922
|
}
|
|
2886
2923
|
catch (err) {
|
|
@@ -3889,8 +3926,57 @@ async function main() {
|
|
|
3889
3926
|
(process.env.FLY_APP_NAME
|
|
3890
3927
|
? `https://${process.env.FLY_APP_NAME}.fly.dev`
|
|
3891
3928
|
: `http://localhost:${apiPort}`);
|
|
3929
|
+
// Try to mint a LiveKit bot token + construct the frontend-hosted
|
|
3930
|
+
// meeting-bot page URL. The bot page joins the same LiveKit room
|
|
3931
|
+
// as this agent so meeting audio flows through LiveKit directly
|
|
3932
|
+
// (no agent-side WebSocket+WAV pipe). Falls back to the legacy
|
|
3933
|
+
// /meeting-output webpage if no frontend URL is resolvable, so
|
|
3934
|
+
// the old code path keeps working during the migration window.
|
|
3935
|
+
//
|
|
3936
|
+
// Frontend URL resolution (in priority order):
|
|
3937
|
+
// 1. data.frontendBase — the public URL the user's browser is on,
|
|
3938
|
+
// passed through the join_meeting data channel message. Works
|
|
3939
|
+
// automatically for localhost dev + production without any
|
|
3940
|
+
// env var.
|
|
3941
|
+
// 2. OSBORN_FRONTEND_URL — existing convention from sprites.ts
|
|
3942
|
+
// (frontend/src/lib/sprites.ts:241) that injects the public
|
|
3943
|
+
// frontend URL into sandbox env vars. Defense in depth.
|
|
3944
|
+
//
|
|
3945
|
+
// Auth: the endpoint uses LiveKit room-presence as the auth check
|
|
3946
|
+
// — no shared secret needed. The agent must already be in the
|
|
3947
|
+
// requested room (which it is by this point) for the mint to
|
|
3948
|
+
// succeed.
|
|
3949
|
+
let outputPageUrl;
|
|
3950
|
+
const frontendUrl = data.frontendBase
|
|
3951
|
+
|| process.env.OSBORN_FRONTEND_URL;
|
|
3952
|
+
if (frontendUrl) {
|
|
3953
|
+
try {
|
|
3954
|
+
const botLkId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
3955
|
+
const tokenRes = await fetch(`${frontendUrl}/api/meeting-bot-token`, {
|
|
3956
|
+
method: 'POST',
|
|
3957
|
+
headers: { 'Content-Type': 'application/json' },
|
|
3958
|
+
body: JSON.stringify({ botId: botLkId, roomName }),
|
|
3959
|
+
});
|
|
3960
|
+
if (tokenRes.ok) {
|
|
3961
|
+
const { token, url } = await tokenRes.json();
|
|
3962
|
+
const params = new URLSearchParams({ token, url, room: roomName, botId: botLkId });
|
|
3963
|
+
outputPageUrl = `${frontendUrl}/meeting-bot?${params.toString()}`;
|
|
3964
|
+
console.log(`🎫 Meeting-bot token minted for room=${roomName} bot=${botLkId}`);
|
|
3965
|
+
}
|
|
3966
|
+
else {
|
|
3967
|
+
const errText = await tokenRes.text().catch(() => '');
|
|
3968
|
+
console.warn(`⚠️ meeting-bot-token mint failed (HTTP ${tokenRes.status}: ${errText.substring(0, 120)}) — falling back to legacy /meeting-output path`);
|
|
3969
|
+
}
|
|
3970
|
+
}
|
|
3971
|
+
catch (mintErr) {
|
|
3972
|
+
console.warn(`⚠️ meeting-bot-token mint threw — falling back: ${mintErr.message}`);
|
|
3973
|
+
}
|
|
3974
|
+
}
|
|
3975
|
+
else {
|
|
3976
|
+
console.log('ℹ️ No frontend URL (data.frontendBase + OSBORN_FRONTEND_URL both empty) — using legacy /meeting-output path');
|
|
3977
|
+
}
|
|
3892
3978
|
await sendToFrontend({ type: 'meeting_joining', message: 'Osborn is joining your meeting...' });
|
|
3893
|
-
const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase);
|
|
3979
|
+
const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase, { outputPageUrl });
|
|
3894
3980
|
const sessionId = currentLLM?.sessionId || currentResumeSessionId || 'default';
|
|
3895
3981
|
recallJoin.registerBot(botId, sessionId);
|
|
3896
3982
|
activeMeetingBotId = botId;
|
package/dist/recall-client.d.ts
CHANGED
|
@@ -36,7 +36,23 @@ export interface TranscriptPayload {
|
|
|
36
36
|
export declare class RecallClient extends EventEmitter {
|
|
37
37
|
#private;
|
|
38
38
|
constructor(apiKey: string);
|
|
39
|
-
|
|
39
|
+
/**
|
|
40
|
+
* Join a meeting via Recall.ai.
|
|
41
|
+
*
|
|
42
|
+
* @param meetingUrl Zoom / Google Meet / Teams URL the bot should dial in to
|
|
43
|
+
* @param webhookBaseUrl Base URL for the agent's HTTP endpoints (transcript webhook)
|
|
44
|
+
* @param opts.outputPageUrl Full URL for the bot's camera/audio page. If provided,
|
|
45
|
+
* replaces the default `${webhookBaseUrl}/meeting-output`.
|
|
46
|
+
* Used to point at the frontend-hosted /meeting-bot page
|
|
47
|
+
* with token + room embedded as query params, so the page
|
|
48
|
+
* connects to LiveKit and audio flows through the same
|
|
49
|
+
* room as the osborn agent (no separate WebSocket+WAV pipe).
|
|
50
|
+
* @param opts.botName Display name of the bot in the meeting
|
|
51
|
+
*/
|
|
52
|
+
joinMeeting(meetingUrl: string, webhookBaseUrl: string, opts?: {
|
|
53
|
+
outputPageUrl?: string;
|
|
54
|
+
botName?: string;
|
|
55
|
+
}): Promise<string>;
|
|
40
56
|
leaveMeeting(botId: string): Promise<void>;
|
|
41
57
|
getBotStatus(botId: string): Promise<string>;
|
|
42
58
|
handleWebhook(payload: TranscriptPayload): void;
|
package/dist/recall-client.js
CHANGED
|
@@ -8,7 +8,22 @@ export class RecallClient extends EventEmitter {
|
|
|
8
8
|
super();
|
|
9
9
|
this.#apiKey = apiKey;
|
|
10
10
|
}
|
|
11
|
-
|
|
11
|
+
/**
|
|
12
|
+
* Join a meeting via Recall.ai.
|
|
13
|
+
*
|
|
14
|
+
* @param meetingUrl Zoom / Google Meet / Teams URL the bot should dial in to
|
|
15
|
+
* @param webhookBaseUrl Base URL for the agent's HTTP endpoints (transcript webhook)
|
|
16
|
+
* @param opts.outputPageUrl Full URL for the bot's camera/audio page. If provided,
|
|
17
|
+
* replaces the default `${webhookBaseUrl}/meeting-output`.
|
|
18
|
+
* Used to point at the frontend-hosted /meeting-bot page
|
|
19
|
+
* with token + room embedded as query params, so the page
|
|
20
|
+
* connects to LiveKit and audio flows through the same
|
|
21
|
+
* room as the osborn agent (no separate WebSocket+WAV pipe).
|
|
22
|
+
* @param opts.botName Display name of the bot in the meeting
|
|
23
|
+
*/
|
|
24
|
+
async joinMeeting(meetingUrl, webhookBaseUrl, opts) {
|
|
25
|
+
const botName = opts?.botName ?? 'Osborn';
|
|
26
|
+
const outputPageUrl = opts?.outputPageUrl ?? `${webhookBaseUrl}/meeting-output`;
|
|
12
27
|
// Authoritative structure per https://docs.recall.ai/reference/bot_create
|
|
13
28
|
// and https://docs.recall.ai/docs/real-time-transcription:
|
|
14
29
|
//
|
|
@@ -49,10 +64,13 @@ export class RecallClient extends EventEmitter {
|
|
|
49
64
|
output_media: {
|
|
50
65
|
camera: {
|
|
51
66
|
// `kind` (not `type`) — confirmed from prior debugging.
|
|
52
|
-
//
|
|
67
|
+
// The page Recall renders is responsible for joining the same LiveKit
|
|
68
|
+
// room as the osborn agent: meeting audio captured via getUserMedia is
|
|
69
|
+
// published into the room; osborn's TTS audio (already in the room) is
|
|
70
|
+
// played by the page and captured by Recall as the bot's mic output.
|
|
53
71
|
kind: 'webpage',
|
|
54
72
|
config: {
|
|
55
|
-
url:
|
|
73
|
+
url: outputPageUrl,
|
|
56
74
|
},
|
|
57
75
|
},
|
|
58
76
|
},
|
|
@@ -63,7 +81,7 @@ export class RecallClient extends EventEmitter {
|
|
|
63
81
|
throw new Error(`Recall.ai join failed: ${res.status} ${err}`);
|
|
64
82
|
}
|
|
65
83
|
const bot = (await res.json());
|
|
66
|
-
console.log(`🤖 Recall.ai bot joined meeting: ${bot.id}`);
|
|
84
|
+
console.log(`🤖 Recall.ai bot joined meeting: ${bot.id} (output page: ${outputPageUrl})`);
|
|
67
85
|
return bot.id;
|
|
68
86
|
}
|
|
69
87
|
async leaveMeeting(botId) {
|