osborn 0.8.28 → 0.8.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +51 -3
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1109,13 +1109,61 @@ async function main() {
|
|
|
1109
1109
|
if (handle && typeof handle.addDoneCallback === 'function') {
|
|
1110
1110
|
// SpeechHandle — track it and register interruption callback
|
|
1111
1111
|
currentSpeechHandle = handle;
|
|
1112
|
+
// Wall-clock timer: capture when audio actually starts playing (first frame)
|
|
1113
|
+
// Used as fallback if LiveKit's playbackPosition is 0 (race condition)
|
|
1114
|
+
let playbackStartedAt = null;
|
|
1115
|
+
const audioOutputRef = currentSession?._activity?.agentSession?.output?.audio;
|
|
1116
|
+
if (audioOutputRef && typeof audioOutputRef.on === 'function') {
|
|
1117
|
+
const onPlaybackStarted = () => {
|
|
1118
|
+
playbackStartedAt = Date.now();
|
|
1119
|
+
console.log(`🔊 [${sayId}] audio first frame out (playbackStarted)`);
|
|
1120
|
+
audioOutputRef.off('playbackStarted', onPlaybackStarted);
|
|
1121
|
+
};
|
|
1122
|
+
audioOutputRef.on('playbackStarted', onPlaybackStarted);
|
|
1123
|
+
}
|
|
1112
1124
|
handle.addDoneCallback((sh) => {
|
|
1113
1125
|
if (sh.interrupted) {
|
|
1114
1126
|
console.log(`🔇 [${sayId}] session.say INTERRUPTED`);
|
|
1115
1127
|
const audioOutput = currentSession?._activity?.agentSession?.output?.audio;
|
|
1116
|
-
const
|
|
1117
|
-
const
|
|
1118
|
-
|
|
1128
|
+
const sdkTranscript = audioOutput?.lastPlaybackEvent?.synchronizedTranscript;
|
|
1129
|
+
const sdkPlaybackSec = audioOutput?.lastPlaybackEvent?.playbackPosition ?? 0;
|
|
1130
|
+
let spokenText;
|
|
1131
|
+
let method;
|
|
1132
|
+
if (sdkTranscript) {
|
|
1133
|
+
// Best case: LiveKit gave us word-accurate transcript (requires alignedTranscript TTS)
|
|
1134
|
+
spokenText = sdkTranscript;
|
|
1135
|
+
method = 'sdk-transcript';
|
|
1136
|
+
}
|
|
1137
|
+
else if (sdkPlaybackSec > 0) {
|
|
1138
|
+
// Second: LiveKit gave us playback duration — estimate chars from it
|
|
1139
|
+
const CHARS_PER_SEC = 14;
|
|
1140
|
+
const charCount = Math.min(Math.round(sdkPlaybackSec * CHARS_PER_SEC), data.text.length);
|
|
1141
|
+
const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
|
|
1142
|
+
spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
|
|
1143
|
+
method = 'sdk-position';
|
|
1144
|
+
}
|
|
1145
|
+
else if (playbackStartedAt !== null) {
|
|
1146
|
+
// Third: use our wall-clock timer from first audio frame
|
|
1147
|
+
const elapsedSec = (Date.now() - playbackStartedAt) / 1000;
|
|
1148
|
+
const CHARS_PER_SEC = 14;
|
|
1149
|
+
const charCount = Math.min(Math.round(elapsedSec * CHARS_PER_SEC), data.text.length);
|
|
1150
|
+
const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
|
|
1151
|
+
spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
|
|
1152
|
+
method = 'wall-clock';
|
|
1153
|
+
}
|
|
1154
|
+
else {
|
|
1155
|
+
// Fallback: interrupt fired before first frame — pass full block
|
|
1156
|
+
spokenText = data.text;
|
|
1157
|
+
method = 'full-block-fallback';
|
|
1158
|
+
}
|
|
1159
|
+
console.log('🔇 Interruption estimate:', JSON.stringify({
|
|
1160
|
+
method,
|
|
1161
|
+
sdkPlaybackSec,
|
|
1162
|
+
isSynced: !!sdkTranscript,
|
|
1163
|
+
spokenChars: spokenText.length,
|
|
1164
|
+
fullChars: data.text.length,
|
|
1165
|
+
heard: spokenText.slice(0, 80) + (spokenText.length > 80 ? '...' : '')
|
|
1166
|
+
}));
|
|
1119
1167
|
handleSpeechDone(sh, spokenText, data.text);
|
|
1120
1168
|
}
|
|
1121
1169
|
else {
|