osborn 0.9.39 → 0.9.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +145 -25
- package/dist/recall-client.d.ts +17 -1
- package/dist/recall-client.js +22 -4
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1224,28 +1224,46 @@ async function main() {
|
|
|
1224
1224
|
let lastCompletedResearch = null;
|
|
1225
1225
|
// No manual queuing — the Claude SDK handles sequential queries internally
|
|
1226
1226
|
// ============================================================
|
|
1227
|
-
// Recall.ai — Meeting Transcript
|
|
1227
|
+
// Recall.ai — Meeting Transcript Listener
|
|
1228
1228
|
// ============================================================
|
|
1229
|
+
// NOTE: LLM-forwarding via Recall webhook STT was DISABLED in the Phase 2
|
|
1230
|
+
// LiveKit-based meeting-bot migration. Reason: Recall sends transcripts as
|
|
1231
|
+
// sentence-level fragments (e.g. "transcript.data" events fire ~once per
|
|
1232
|
+
// sentence). The old code below called currentLLM.chat() PER FRAGMENT, which
|
|
1233
|
+
// meant the agent fired ~10 chat() calls during a single user utterance —
|
|
1234
|
+
// each one prompting a separate response. The agent ended up speaking over
|
|
1235
|
+
// itself answering partial fragments.
|
|
1236
|
+
//
|
|
1237
|
+
// Phase 2 routes meeting audio through LiveKit instead (see
|
|
1238
|
+
// frontend/src/app/meeting-bot/page.tsx). The agent's existing Deepgram Flux
|
|
1239
|
+
// STT processes that audio via end-of-turn detection — ONE chat() call per
|
|
1240
|
+
// actual completed utterance, no fragment storms.
|
|
1241
|
+
//
|
|
1242
|
+
// We keep the listener registered so we have a hook for future work (e.g.
|
|
1243
|
+
// forwarding the live transcript to the frontend chat panel as a read-only
|
|
1244
|
+
// "what was said in the meeting" display, separate from the LLM input path).
|
|
1229
1245
|
const recall = getRecallClient();
|
|
1230
1246
|
if (recall) {
|
|
1231
|
-
console.log('🎥 Recall.ai client initialized (
|
|
1247
|
+
console.log('🎥 Recall.ai client initialized (webhook STT receiver — LLM forwarding disabled, see meeting-bot Phase 2)');
|
|
1232
1248
|
recall.on('transcript', ({ botId, speaker, text }) => {
|
|
1233
1249
|
console.log(`📝 Meeting transcript [${speaker}]: ${text}`);
|
|
1234
|
-
//
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1250
|
+
// INTENTIONALLY DISABLED — see comment above. Audio path is now LiveKit
|
|
1251
|
+
// → meeting-bot page publishes meeting audio → agent STT processes it.
|
|
1252
|
+
// The line below is preserved as a reference for future re-enablement
|
|
1253
|
+
// (e.g. as a display-only feature, NOT as LLM input).
|
|
1254
|
+
//
|
|
1255
|
+
// if (currentLLM && currentSession) {
|
|
1256
|
+
// const meetingText = `[Meeting — ${speaker}]: ${text}`
|
|
1257
|
+
// try {
|
|
1258
|
+
// if (currentVoiceMode === 'pipeline' || currentVoiceMode === 'direct') {
|
|
1259
|
+
// const chatCtx = new llm.ChatContext()
|
|
1260
|
+
// chatCtx.addMessage({ role: 'user', content: meetingText })
|
|
1261
|
+
// ;(currentLLM as any).chat({ chatCtx })
|
|
1262
|
+
// }
|
|
1263
|
+
// } catch (err) {
|
|
1264
|
+
// console.error('❌ Failed to route meeting transcript:', err)
|
|
1265
|
+
// }
|
|
1266
|
+
// }
|
|
1249
1267
|
});
|
|
1250
1268
|
}
|
|
1251
1269
|
// ============================================================
|
|
@@ -1656,9 +1674,34 @@ async function main() {
|
|
|
1656
1674
|
skipTTSQueue: true,
|
|
1657
1675
|
onCompactionEvent: (event) => {
|
|
1658
1676
|
try {
|
|
1659
|
-
// Forward
|
|
1660
|
-
// Spread covers compaction_started/progress/complete (different fields per type).
|
|
1677
|
+
// Forward the raw event so the dedicated banner UI can render it (if/when fixed).
|
|
1661
1678
|
sendToFrontend({ ...event });
|
|
1679
|
+
// ALSO emit as a claude_output chat bubble — reuses the existing message path
|
|
1680
|
+
// that's already working end-to-end. PreCompact → in-progress bubble.
|
|
1681
|
+
// PostCompact → completion bubble with the skills summary. The dedicated
|
|
1682
|
+
// banner has been unreliable in production (data path works on backend, banner
|
|
1683
|
+
// never appears on iPad/iPhone where dev tools aren't accessible). Chat bubbles
|
|
1684
|
+
// are visible without dev tools.
|
|
1685
|
+
if (event.type === 'compaction_started') {
|
|
1686
|
+
const triggerLabel = event.trigger ? ` (${event.trigger})` : '';
|
|
1687
|
+
sendToFrontend({
|
|
1688
|
+
type: 'claude_output',
|
|
1689
|
+
text: `🧠 _Crystallizing session memory…_${triggerLabel}`,
|
|
1690
|
+
agentRole: 'direct',
|
|
1691
|
+
});
|
|
1692
|
+
}
|
|
1693
|
+
else if (event.type === 'compaction_complete') {
|
|
1694
|
+
const ev = event;
|
|
1695
|
+
const n = ev.skillsWritten ?? 0;
|
|
1696
|
+
const names = Array.isArray(ev.skillNames) && ev.skillNames.length > 0
|
|
1697
|
+
? ` — ${ev.skillNames.join(', ')}`
|
|
1698
|
+
: '';
|
|
1699
|
+
sendToFrontend({
|
|
1700
|
+
type: 'claude_output',
|
|
1701
|
+
text: `🧠 Memory crystallized — ${n} skill${n === 1 ? '' : 's'} updated${names}.`,
|
|
1702
|
+
agentRole: 'direct',
|
|
1703
|
+
});
|
|
1704
|
+
}
|
|
1662
1705
|
}
|
|
1663
1706
|
catch { /* non-fatal */ }
|
|
1664
1707
|
},
|
|
@@ -1862,14 +1905,17 @@ async function main() {
|
|
|
1862
1905
|
const sayId = Date.now(); // simple ID to correlate start/end logs
|
|
1863
1906
|
console.log(`🗣️ [${sayId}] session.say START (${data.text.length} chars): "${data.text}"`);
|
|
1864
1907
|
// Forward spoken text + audio to meeting output page when bot is in a meeting.
|
|
1865
|
-
//
|
|
1866
|
-
//
|
|
1908
|
+
// Uses DIRECT_MODE_TTS (same OpenAI fable voice as the live session) — was
|
|
1909
|
+
// previously using directConfig.tts which falls back to DEFAULT_CONFIG.direct.tts
|
|
1910
|
+
// (Deepgram aura-2-asteria-en) when no user config exists, producing a different
|
|
1911
|
+
// voice in the meeting than what the user hears in voice-native. Both paths now
|
|
1912
|
+
// share the single source of truth.
|
|
1867
1913
|
// PCM frames are WAV-encoded and pushed as binary WebSocket frames.
|
|
1868
1914
|
// Recall captures the browser page's audio output and injects it into the meeting.
|
|
1869
1915
|
if (activeMeetingBotId) {
|
|
1870
1916
|
sendToMeetingOutput({ type: 'speak', text: data.text });
|
|
1871
1917
|
if (meetingOutputWs) {
|
|
1872
|
-
synthesizeForMeeting(data.text,
|
|
1918
|
+
synthesizeForMeeting(data.text, DIRECT_MODE_TTS).catch((err) => console.warn('⚠️ Meeting TTS error:', err));
|
|
1873
1919
|
}
|
|
1874
1920
|
}
|
|
1875
1921
|
try {
|
|
@@ -2011,9 +2057,34 @@ async function main() {
|
|
|
2011
2057
|
resumeSessionId,
|
|
2012
2058
|
onCompactionEvent: (event) => {
|
|
2013
2059
|
try {
|
|
2014
|
-
// Forward
|
|
2015
|
-
// Spread covers compaction_started/progress/complete (different fields per type).
|
|
2060
|
+
// Forward the raw event so the dedicated banner UI can render it (if/when fixed).
|
|
2016
2061
|
sendToFrontend({ ...event });
|
|
2062
|
+
// ALSO emit as a claude_output chat bubble — reuses the existing message path
|
|
2063
|
+
// that's already working end-to-end. PreCompact → in-progress bubble.
|
|
2064
|
+
// PostCompact → completion bubble with the skills summary. The dedicated
|
|
2065
|
+
// banner has been unreliable in production (data path works on backend, banner
|
|
2066
|
+
// never appears on iPad/iPhone where dev tools aren't accessible). Chat bubbles
|
|
2067
|
+
// are visible without dev tools.
|
|
2068
|
+
if (event.type === 'compaction_started') {
|
|
2069
|
+
const triggerLabel = event.trigger ? ` (${event.trigger})` : '';
|
|
2070
|
+
sendToFrontend({
|
|
2071
|
+
type: 'claude_output',
|
|
2072
|
+
text: `🧠 _Crystallizing session memory…_${triggerLabel}`,
|
|
2073
|
+
agentRole: 'direct',
|
|
2074
|
+
});
|
|
2075
|
+
}
|
|
2076
|
+
else if (event.type === 'compaction_complete') {
|
|
2077
|
+
const ev = event;
|
|
2078
|
+
const n = ev.skillsWritten ?? 0;
|
|
2079
|
+
const names = Array.isArray(ev.skillNames) && ev.skillNames.length > 0
|
|
2080
|
+
? ` — ${ev.skillNames.join(', ')}`
|
|
2081
|
+
: '';
|
|
2082
|
+
sendToFrontend({
|
|
2083
|
+
type: 'claude_output',
|
|
2084
|
+
text: `🧠 Memory crystallized — ${n} skill${n === 1 ? '' : 's'} updated${names}.`,
|
|
2085
|
+
agentRole: 'direct',
|
|
2086
|
+
});
|
|
2087
|
+
}
|
|
2017
2088
|
}
|
|
2018
2089
|
catch { /* non-fatal */ }
|
|
2019
2090
|
},
|
|
@@ -3855,8 +3926,57 @@ async function main() {
|
|
|
3855
3926
|
(process.env.FLY_APP_NAME
|
|
3856
3927
|
? `https://${process.env.FLY_APP_NAME}.fly.dev`
|
|
3857
3928
|
: `http://localhost:${apiPort}`);
|
|
3929
|
+
// Try to mint a LiveKit bot token + construct the frontend-hosted
|
|
3930
|
+
// meeting-bot page URL. The bot page joins the same LiveKit room
|
|
3931
|
+
// as this agent so meeting audio flows through LiveKit directly
|
|
3932
|
+
// (no agent-side WebSocket+WAV pipe). Falls back to the legacy
|
|
3933
|
+
// /meeting-output webpage if no frontend URL is resolvable, so
|
|
3934
|
+
// the old code path keeps working during the migration window.
|
|
3935
|
+
//
|
|
3936
|
+
// Frontend URL resolution (in priority order):
|
|
3937
|
+
// 1. data.frontendBase — the public URL the user's browser is on,
|
|
3938
|
+
// passed through the join_meeting data channel message. Works
|
|
3939
|
+
// automatically for localhost dev + production without any
|
|
3940
|
+
// env var.
|
|
3941
|
+
// 2. OSBORN_FRONTEND_URL — existing convention from sprites.ts
|
|
3942
|
+
// (frontend/src/lib/sprites.ts:241) that injects the public
|
|
3943
|
+
// frontend URL into sandbox env vars. Defense in depth.
|
|
3944
|
+
//
|
|
3945
|
+
// Auth: the endpoint uses LiveKit room-presence as the auth check
|
|
3946
|
+
// — no shared secret needed. The agent must already be in the
|
|
3947
|
+
// requested room (which it is by this point) for the mint to
|
|
3948
|
+
// succeed.
|
|
3949
|
+
let outputPageUrl;
|
|
3950
|
+
const frontendUrl = data.frontendBase
|
|
3951
|
+
|| process.env.OSBORN_FRONTEND_URL;
|
|
3952
|
+
if (frontendUrl) {
|
|
3953
|
+
try {
|
|
3954
|
+
const botLkId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
3955
|
+
const tokenRes = await fetch(`${frontendUrl}/api/meeting-bot-token`, {
|
|
3956
|
+
method: 'POST',
|
|
3957
|
+
headers: { 'Content-Type': 'application/json' },
|
|
3958
|
+
body: JSON.stringify({ botId: botLkId, roomName }),
|
|
3959
|
+
});
|
|
3960
|
+
if (tokenRes.ok) {
|
|
3961
|
+
const { token, url } = await tokenRes.json();
|
|
3962
|
+
const params = new URLSearchParams({ token, url, room: roomName, botId: botLkId });
|
|
3963
|
+
outputPageUrl = `${frontendUrl}/meeting-bot?${params.toString()}`;
|
|
3964
|
+
console.log(`🎫 Meeting-bot token minted for room=${roomName} bot=${botLkId}`);
|
|
3965
|
+
}
|
|
3966
|
+
else {
|
|
3967
|
+
const errText = await tokenRes.text().catch(() => '');
|
|
3968
|
+
console.warn(`⚠️ meeting-bot-token mint failed (HTTP ${tokenRes.status}: ${errText.substring(0, 120)}) — falling back to legacy /meeting-output path`);
|
|
3969
|
+
}
|
|
3970
|
+
}
|
|
3971
|
+
catch (mintErr) {
|
|
3972
|
+
console.warn(`⚠️ meeting-bot-token mint threw — falling back: ${mintErr.message}`);
|
|
3973
|
+
}
|
|
3974
|
+
}
|
|
3975
|
+
else {
|
|
3976
|
+
console.log('ℹ️ No frontend URL (data.frontendBase + OSBORN_FRONTEND_URL both empty) — using legacy /meeting-output path');
|
|
3977
|
+
}
|
|
3858
3978
|
await sendToFrontend({ type: 'meeting_joining', message: 'Osborn is joining your meeting...' });
|
|
3859
|
-
const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase);
|
|
3979
|
+
const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase, { outputPageUrl });
|
|
3860
3980
|
const sessionId = currentLLM?.sessionId || currentResumeSessionId || 'default';
|
|
3861
3981
|
recallJoin.registerBot(botId, sessionId);
|
|
3862
3982
|
activeMeetingBotId = botId;
|
package/dist/recall-client.d.ts
CHANGED
|
@@ -36,7 +36,23 @@ export interface TranscriptPayload {
|
|
|
36
36
|
export declare class RecallClient extends EventEmitter {
|
|
37
37
|
#private;
|
|
38
38
|
constructor(apiKey: string);
|
|
39
|
-
|
|
39
|
+
/**
|
|
40
|
+
* Join a meeting via Recall.ai.
|
|
41
|
+
*
|
|
42
|
+
* @param meetingUrl Zoom / Google Meet / Teams URL the bot should dial in to
|
|
43
|
+
* @param webhookBaseUrl Base URL for the agent's HTTP endpoints (transcript webhook)
|
|
44
|
+
* @param opts.outputPageUrl Full URL for the bot's camera/audio page. If provided,
|
|
45
|
+
* replaces the default `${webhookBaseUrl}/meeting-output`.
|
|
46
|
+
* Used to point at the frontend-hosted /meeting-bot page
|
|
47
|
+
* with token + room embedded as query params, so the page
|
|
48
|
+
* connects to LiveKit and audio flows through the same
|
|
49
|
+
* room as the osborn agent (no separate WebSocket+WAV pipe).
|
|
50
|
+
* @param opts.botName Display name of the bot in the meeting
|
|
51
|
+
*/
|
|
52
|
+
joinMeeting(meetingUrl: string, webhookBaseUrl: string, opts?: {
|
|
53
|
+
outputPageUrl?: string;
|
|
54
|
+
botName?: string;
|
|
55
|
+
}): Promise<string>;
|
|
40
56
|
leaveMeeting(botId: string): Promise<void>;
|
|
41
57
|
getBotStatus(botId: string): Promise<string>;
|
|
42
58
|
handleWebhook(payload: TranscriptPayload): void;
|
package/dist/recall-client.js
CHANGED
|
@@ -8,7 +8,22 @@ export class RecallClient extends EventEmitter {
|
|
|
8
8
|
super();
|
|
9
9
|
this.#apiKey = apiKey;
|
|
10
10
|
}
|
|
11
|
-
|
|
11
|
+
/**
|
|
12
|
+
* Join a meeting via Recall.ai.
|
|
13
|
+
*
|
|
14
|
+
* @param meetingUrl Zoom / Google Meet / Teams URL the bot should dial in to
|
|
15
|
+
* @param webhookBaseUrl Base URL for the agent's HTTP endpoints (transcript webhook)
|
|
16
|
+
* @param opts.outputPageUrl Full URL for the bot's camera/audio page. If provided,
|
|
17
|
+
* replaces the default `${webhookBaseUrl}/meeting-output`.
|
|
18
|
+
* Used to point at the frontend-hosted /meeting-bot page
|
|
19
|
+
* with token + room embedded as query params, so the page
|
|
20
|
+
* connects to LiveKit and audio flows through the same
|
|
21
|
+
* room as the osborn agent (no separate WebSocket+WAV pipe).
|
|
22
|
+
* @param opts.botName Display name of the bot in the meeting
|
|
23
|
+
*/
|
|
24
|
+
async joinMeeting(meetingUrl, webhookBaseUrl, opts) {
|
|
25
|
+
const botName = opts?.botName ?? 'Osborn';
|
|
26
|
+
const outputPageUrl = opts?.outputPageUrl ?? `${webhookBaseUrl}/meeting-output`;
|
|
12
27
|
// Authoritative structure per https://docs.recall.ai/reference/bot_create
|
|
13
28
|
// and https://docs.recall.ai/docs/real-time-transcription:
|
|
14
29
|
//
|
|
@@ -49,10 +64,13 @@ export class RecallClient extends EventEmitter {
|
|
|
49
64
|
output_media: {
|
|
50
65
|
camera: {
|
|
51
66
|
// `kind` (not `type`) — confirmed from prior debugging.
|
|
52
|
-
//
|
|
67
|
+
// The page Recall renders is responsible for joining the same LiveKit
|
|
68
|
+
// room as the osborn agent: meeting audio captured via getUserMedia is
|
|
69
|
+
// published into the room; osborn's TTS audio (already in the room) is
|
|
70
|
+
// played by the page and captured by Recall as the bot's mic output.
|
|
53
71
|
kind: 'webpage',
|
|
54
72
|
config: {
|
|
55
|
-
url:
|
|
73
|
+
url: outputPageUrl,
|
|
56
74
|
},
|
|
57
75
|
},
|
|
58
76
|
},
|
|
@@ -63,7 +81,7 @@ export class RecallClient extends EventEmitter {
|
|
|
63
81
|
throw new Error(`Recall.ai join failed: ${res.status} ${err}`);
|
|
64
82
|
}
|
|
65
83
|
const bot = (await res.json());
|
|
66
|
-
console.log(`🤖 Recall.ai bot joined meeting: ${bot.id}`);
|
|
84
|
+
console.log(`🤖 Recall.ai bot joined meeting: ${bot.id} (output page: ${outputPageUrl})`);
|
|
67
85
|
return bot.id;
|
|
68
86
|
}
|
|
69
87
|
async leaveMeeting(botId) {
|