osborn 0.9.41 → 0.9.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +38 -3
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -1161,23 +1161,57 @@ function startApiServer(workingDir, port) {
1161
1161
  // Payload shape from
1162
1162
  // docs.recall.ai/docs/how-to-get-separate-audio-per-participant-realtime:
1163
1163
  // { event: 'audio_separate_raw.data', data: { data: { buffer: '<base64>', ... }, participant: {...} } }
1164
+ //
1165
+ // Diagnostic counters so we can tell from prod logs whether (a) Recall is
1166
+ // streaming any frames at all, (b) they're decoding correctly, and (c)
1167
+ // captureFrame is succeeding. Logged every 100 frames (~5s at 50fps).
1168
+ let totalMessages = 0;
1169
+ let audioFrames = 0;
1170
+ let bytesIn = 0;
1171
+ let lastSpeakerSeen;
1172
+ const startTs = Date.now();
1164
1173
  recallWs.on('message', async (raw) => {
1174
+ totalMessages++;
1165
1175
  if (!source)
1166
1176
  return;
1167
1177
  try {
1168
1178
  const msg = JSON.parse(raw.toString());
1169
- if (msg.event !== 'audio_separate_raw.data')
1179
+ if (msg.event !== 'audio_separate_raw.data') {
1180
+ // First-time event-type diagnostic — log unknown event types once so
1181
+ // we know if Recall's payload shape changed
1182
+ if (totalMessages <= 3) {
1183
+ console.log(`[meeting-audio-in] non-audio event: ${msg.event}`);
1184
+ }
1170
1185
  return;
1186
+ }
1171
1187
  const b64 = msg.data?.data?.buffer;
1172
- if (!b64)
1188
+ if (!b64) {
1189
+ if (audioFrames === 0) {
1190
+ console.warn(`[meeting-audio-in] first audio event had no buffer field. payload keys=${Object.keys(msg.data?.data ?? {}).join(',')}`);
1191
+ }
1173
1192
  return;
1193
+ }
1174
1194
  const pcmBuf = Buffer.from(b64, 'base64');
1195
+ bytesIn += pcmBuf.byteLength;
1196
+ const speakerName = msg.data?.data?.participant?.name || msg.data?.participant?.name;
1197
+ if (speakerName && speakerName !== lastSpeakerSeen) {
1198
+ console.log(`[meeting-audio-in] now hearing: ${speakerName}`);
1199
+ lastSpeakerSeen = speakerName;
1200
+ }
1175
1201
  // AudioFrame expects Int16Array. The PCM buffer is S16LE — view it
1176
1202
  // directly without copy. Length / 2 = samples (each sample 2 bytes).
1177
1203
  const samplesPerChannel = pcmBuf.byteLength / 2;
1178
1204
  const int16 = new Int16Array(pcmBuf.buffer, pcmBuf.byteOffset, samplesPerChannel);
1179
1205
  const frame = new AudioFrame(int16, 16000, 1, samplesPerChannel);
1180
1206
  await source.captureFrame(frame);
1207
+ audioFrames++;
1208
+ if (audioFrames === 1) {
1209
+ console.log(`[meeting-audio-in] FIRST audio frame captured (${pcmBuf.byteLength} bytes, ${samplesPerChannel} samples)`);
1210
+ }
1211
+ if (audioFrames % 100 === 0) {
1212
+ const elapsed = ((Date.now() - startTs) / 1000).toFixed(1);
1213
+ console.log(`[meeting-audio-in] heartbeat: ${audioFrames} frames, ${(bytesIn / 1024).toFixed(1)} KB in ${elapsed}s (last speaker: ${lastSpeakerSeen ?? 'unknown'})`);
1214
+ }
1181
1215
  }
1182
1216
  catch (err) {
1183
1217
  // Don't log every frame parse failure — could be noisy if Recall sends
@@ -1188,7 +1222,8 @@ function startApiServer(workingDir, port) {
1188
1222
  }
1189
1223
  });
1190
1224
  recallWs.on('close', async () => {
1191
- console.log('🎙️ Recall audio-in WebSocket closed — tearing down LiveKit publisher');
1225
+ const elapsed = ((Date.now() - startTs) / 1000).toFixed(1);
1226
+ console.log(`🎙️ Recall audio-in WebSocket closed — tearing down LiveKit publisher. Total: ${audioFrames} audio frames / ${totalMessages} messages / ${(bytesIn / 1024).toFixed(1)} KB over ${elapsed}s`);
1192
1227
  await cleanup();
1193
1228
  });
1194
1229
  recallWs.on('error', (err) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.9.41",
3
+ "version": "0.9.42",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {