osborn 0.8.28 → 0.8.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Learned Behaviors
|
|
2
|
+
|
|
3
|
+
Auto-extracted from voice sessions via PreCompact.
|
|
4
|
+
Last updated: 2026-05-01 | Session: unknown...
|
|
5
|
+
|
|
6
|
+
USER CORRECTIONS:
|
|
7
|
+
- First write attempt for interruption timer was DENIED because it replaced existing LiveKit code instead of adding on top — always ADD fallbacks, never replace existing mechanisms; user said "you're basically deleting what we have"
|
|
8
|
+
- User denied permission dialog for file edit because they wanted to verify the approach first before committing — ask "should I build this?" before delegating write tasks on significant changes
|
|
9
|
+
- When proposing solution approach, verify against LiveKit SDK source AND docs before implementing — user asked "can you verify that we're doing it the right way in terms of live kits"
|
|
10
|
+
|
|
11
|
+
USER PREFERENCES:
|
|
12
|
+
- Voice transcription: "MPM"=npm, "mobile publish"=npm publish, "live gate"=LiveKit, "left kit"=LiveKit, "basketball man"=bash command, "Osborne"=osborn — interpret flexibly
|
|
13
|
+
- Sprite is the publish path for npm going forward (not Mac), set up with automation token
|
|
14
|
+
- Prefers additive code changes: keep existing mechanisms as primary, add new approaches as fallback tiers
|
|
15
|
+
- Wants to understand the "why" before approving implementation — explain the mechanism, not just the solution
|
|
16
|
+
- Wants confirmation before committing to solutions when uncertain: "before we commit to doing this"
|
|
17
|
+
|
|
18
|
+
DOMAIN KNOWLEDGE:
|
|
19
|
+
- `osborn --version` has no version flag — it starts a new LiveKit-connected instance; use `npm list -g osborn` or `npm list -g osborn --json` for version
|
|
20
|
+
- npm automation token (not web login token) required for headless publish — web login token still requires 2FA for publish
|
|
21
|
+
- `playbackPosition` is 0 because interrupt often fires before TTS audio frames start flowing (`pushedDuration = 0`); also race condition between `addDoneCallback` and `waitForPlayoutTask`
|
|
22
|
+
- `skipTTSQueue: true` does NOT bypass audio frame tracking — `ttsTask` code path is identical
|
|
23
|
+
- `EVENT_PLAYBACK_STARTED` ('playbackStarted') is public, fires on first audio frame, accessible via `audioOutput.on()`
|
|
24
|
+
- OpenAI TTS-1 rate: ~14 chars/second confirmed from community benchmarks
|
|
25
|
+
- `check-version` in sandbox/route.ts was incorrectly using `osborn --version` (starts instance); fixed to `npm list -g osborn --json`
|
|
26
|
+
- execInSprite works any time the Sprite container is running, independent of osborn agent health
|
|
27
|
+
- `checkVersion()` already existed in dashboard/page.tsx (lines 325-338); `installedVersion` and `latestVersion` state already existed
|
|
28
|
+
- Dashboard handleRestart/handleUpdate were not reading API response bodies — server returns rich data but client discarded it
|
|
29
|
+
- Railway auto-deploys from GitHub main branch push
|
|
30
|
+
|
|
31
|
+
EFFECTIVE PATTERNS:
|
|
32
|
+
- Layered fallback approach: preserve existing SDK mechanisms, add custom approach only as fallback — explicit user preference confirmed
|
|
33
|
+
- Taking checkpoint before significant changes gives confidence to experiment
|
|
34
|
+
- Reading API response bodies and surfacing errors/success messages is a simple fix for "silent" operations
|
|
35
|
+
|
|
36
|
+
ANTI-PATTERNS:
|
|
37
|
+
- Running `osborn --version` on the Sprite created stray processes connected to LiveKit room — never run the osborn binary to check version
|
|
38
|
+
- Replacing existing LiveKit code with custom implementation instead of adding on top — user explicitly denied and corrected this
|
|
39
|
+
- Delegating write task without confirming approach with user first on novel/uncertain changes
|
|
40
|
+
- Not reading the API response body after `await fetch()` — silent failure mode
|
|
41
|
+
</summary>
|
package/dist/index.js
CHANGED
|
@@ -1109,13 +1109,61 @@ async function main() {
|
|
|
1109
1109
|
if (handle && typeof handle.addDoneCallback === 'function') {
|
|
1110
1110
|
// SpeechHandle — track it and register interruption callback
|
|
1111
1111
|
currentSpeechHandle = handle;
|
|
1112
|
+
// Wall-clock timer: capture when audio actually starts playing (first frame)
|
|
1113
|
+
// Used as fallback if LiveKit's playbackPosition is 0 (race condition)
|
|
1114
|
+
let playbackStartedAt = null;
|
|
1115
|
+
const audioOutputRef = currentSession?.output?.audio;
|
|
1116
|
+
if (audioOutputRef && typeof audioOutputRef.on === 'function') {
|
|
1117
|
+
const onPlaybackStarted = () => {
|
|
1118
|
+
playbackStartedAt = Date.now();
|
|
1119
|
+
console.log(`🔊 [${sayId}] audio first frame out (playbackStarted)`);
|
|
1120
|
+
audioOutputRef.off('playbackStarted', onPlaybackStarted);
|
|
1121
|
+
};
|
|
1122
|
+
audioOutputRef.on('playbackStarted', onPlaybackStarted);
|
|
1123
|
+
}
|
|
1112
1124
|
handle.addDoneCallback((sh) => {
|
|
1113
1125
|
if (sh.interrupted) {
|
|
1114
1126
|
console.log(`🔇 [${sayId}] session.say INTERRUPTED`);
|
|
1115
|
-
const audioOutput = currentSession?.
|
|
1116
|
-
const
|
|
1117
|
-
const
|
|
1118
|
-
|
|
1127
|
+
const audioOutput = currentSession?.output?.audio;
|
|
1128
|
+
const sdkTranscript = audioOutput?.lastPlaybackEvent?.synchronizedTranscript;
|
|
1129
|
+
const sdkPlaybackSec = audioOutput?.lastPlaybackEvent?.playbackPosition ?? 0;
|
|
1130
|
+
let spokenText;
|
|
1131
|
+
let method;
|
|
1132
|
+
if (sdkTranscript) {
|
|
1133
|
+
// Best case: LiveKit gave us word-accurate transcript (requires alignedTranscript TTS)
|
|
1134
|
+
spokenText = sdkTranscript;
|
|
1135
|
+
method = 'sdk-transcript';
|
|
1136
|
+
}
|
|
1137
|
+
else if (sdkPlaybackSec > 0) {
|
|
1138
|
+
// Second: LiveKit gave us playback duration — estimate chars from it
|
|
1139
|
+
const CHARS_PER_SEC = 14;
|
|
1140
|
+
const charCount = Math.min(Math.round(sdkPlaybackSec * CHARS_PER_SEC), data.text.length);
|
|
1141
|
+
const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
|
|
1142
|
+
spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
|
|
1143
|
+
method = 'sdk-position';
|
|
1144
|
+
}
|
|
1145
|
+
else if (playbackStartedAt !== null) {
|
|
1146
|
+
// Third: use our wall-clock timer from first audio frame
|
|
1147
|
+
const elapsedSec = (Date.now() - playbackStartedAt) / 1000;
|
|
1148
|
+
const CHARS_PER_SEC = 14;
|
|
1149
|
+
const charCount = Math.min(Math.round(elapsedSec * CHARS_PER_SEC), data.text.length);
|
|
1150
|
+
const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
|
|
1151
|
+
spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
|
|
1152
|
+
method = 'wall-clock';
|
|
1153
|
+
}
|
|
1154
|
+
else {
|
|
1155
|
+
// Fallback: interrupt fired before first frame — pass full block
|
|
1156
|
+
spokenText = data.text;
|
|
1157
|
+
method = 'full-block-fallback';
|
|
1158
|
+
}
|
|
1159
|
+
console.log('🔇 Interruption estimate:', JSON.stringify({
|
|
1160
|
+
method,
|
|
1161
|
+
sdkPlaybackSec,
|
|
1162
|
+
isSynced: !!sdkTranscript,
|
|
1163
|
+
spokenChars: spokenText.length,
|
|
1164
|
+
fullChars: data.text.length,
|
|
1165
|
+
heard: spokenText.slice(0, 80) + (spokenText.length > 80 ? '...' : '')
|
|
1166
|
+
}));
|
|
1119
1167
|
handleSpeechDone(sh, spokenText, data.text);
|
|
1120
1168
|
}
|
|
1121
1169
|
else {
|