osborn 0.8.28 → 0.8.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ # Learned Behaviors
2
+
3
+ Auto-extracted from voice sessions via PreCompact.
4
+ Last updated: 2026-05-01 | Session: unknown...
5
+
6
+ USER CORRECTIONS:
7
+ - First write attempt for interruption timer was DENIED because it replaced existing LiveKit code instead of adding on top — always ADD fallbacks, never replace existing mechanisms; user said "you're basically deleting what we have"
8
+ - User denied permission dialog for file edit because they wanted to verify the approach first before committing — ask "should I build this?" before delegating write tasks on significant changes
9
+ - When proposing solution approach, verify against LiveKit SDK source AND docs before implementing — user asked "can you verify that we're doing it the right way in terms of live kits"
10
+
11
+ USER PREFERENCES:
12
+ - Voice transcription: "MPM"=npm, "mobile publish"=npm publish, "live gate"=LiveKit, "left kit"=LiveKit, "basketball man"=bash command, "Osborne"=osborn — interpret flexibly
13
+ - Sprite is the publish path for npm going forward (not Mac), set up with automation token
14
+ - Prefers additive code changes: keep existing mechanisms as primary, add new approaches as fallback tiers
15
+ - Wants to understand the "why" before approving implementation — explain the mechanism, not just the solution
16
+ - Wants confirmation before committing to solutions when uncertain: "before we commit to doing this"
17
+
18
+ DOMAIN KNOWLEDGE:
19
+ - `osborn --version` has no version flag — it starts a new LiveKit-connected instance; use `npm list -g osborn` or `npm list -g osborn --json` for version
20
+ - npm automation token (not web login token) required for headless publish — web login token still requires 2FA for publish
21
+ - `playbackPosition` is 0 because interrupt often fires before TTS audio frames start flowing (`pushedDuration = 0`); also race condition between `addDoneCallback` and `waitForPlayoutTask`
22
+ - `skipTTSQueue: true` does NOT bypass audio frame tracking — `ttsTask` code path is identical
23
+ - `EVENT_PLAYBACK_STARTED` ('playbackStarted') is public, fires on first audio frame, accessible via `audioOutput.on()`
24
+ - OpenAI TTS-1 rate: ~14 chars/second confirmed from community benchmarks
25
+ - `check-version` in sandbox/route.ts was incorrectly using `osborn --version` (starts instance); fixed to `npm list -g osborn --json`
26
+ - execInSprite works any time the Sprite container is running, independent of osborn agent health
27
+ - `checkVersion()` already existed in dashboard/page.tsx (lines 325-338); `installedVersion` and `latestVersion` state already existed
28
+ - Dashboard handleRestart/handleUpdate were not reading API response bodies — server returns rich data but client discarded it
29
+ - Railway auto-deploys from GitHub main branch push
30
+
31
+ EFFECTIVE PATTERNS:
32
+ - Layered fallback approach: preserve existing SDK mechanisms, add custom approach only as fallback — explicit user preference confirmed
33
+ - Taking checkpoint before significant changes gives confidence to experiment
34
+ - Reading API response bodies and surfacing errors/success messages is a simple fix for "silent" operations
35
+
36
+ ANTI-PATTERNS:
37
+ - Running `osborn --version` on the Sprite created stray processes connected to LiveKit room — never run the osborn binary to check version
38
+ - Replacing existing LiveKit code with custom implementation instead of adding on top — user explicitly denied and corrected this
39
+ - Delegating write task without confirming approach with user first on novel/uncertain changes
40
+ - Not reading the API response body after `await fetch()` — silent failure mode
41
+ </summary>
package/dist/index.js CHANGED
@@ -1109,13 +1109,61 @@ async function main() {
1109
1109
  if (handle && typeof handle.addDoneCallback === 'function') {
1110
1110
  // SpeechHandle — track it and register interruption callback
1111
1111
  currentSpeechHandle = handle;
1112
+ // Wall-clock timer: capture when audio actually starts playing (first frame)
1113
+ // Used as fallback if LiveKit's playbackPosition is 0 (race condition)
1114
+ let playbackStartedAt = null;
1115
+ const audioOutputRef = currentSession?.output?.audio;
1116
+ if (audioOutputRef && typeof audioOutputRef.on === 'function') {
1117
+ const onPlaybackStarted = () => {
1118
+ playbackStartedAt = Date.now();
1119
+ console.log(`🔊 [${sayId}] audio first frame out (playbackStarted)`);
1120
+ audioOutputRef.off('playbackStarted', onPlaybackStarted);
1121
+ };
1122
+ audioOutputRef.on('playbackStarted', onPlaybackStarted);
1123
+ }
1112
1124
  handle.addDoneCallback((sh) => {
1113
1125
  if (sh.interrupted) {
1114
1126
  console.log(`🔇 [${sayId}] session.say INTERRUPTED`);
1115
- const audioOutput = currentSession?._activity?.agentSession?.output?.audio;
1116
- const spokenText = audioOutput?.lastPlaybackEvent?.synchronizedTranscript || data.text;
1117
- const playbackPositionSec = audioOutput?.lastPlaybackEvent?.playbackPosition ?? 0;
1118
- console.log('🔇 Synchronized transcript:', JSON.stringify({ chars: spokenText.length, fullChars: data.text.length, playbackSec: playbackPositionSec, isSynced: !!audioOutput?.lastPlaybackEvent?.synchronizedTranscript }));
1127
+ const audioOutput = currentSession?.output?.audio;
1128
+ const sdkTranscript = audioOutput?.lastPlaybackEvent?.synchronizedTranscript;
1129
+ const sdkPlaybackSec = audioOutput?.lastPlaybackEvent?.playbackPosition ?? 0;
1130
+ let spokenText;
1131
+ let method;
1132
+ if (sdkTranscript) {
1133
+ // Best case: LiveKit gave us word-accurate transcript (requires alignedTranscript TTS)
1134
+ spokenText = sdkTranscript;
1135
+ method = 'sdk-transcript';
1136
+ }
1137
+ else if (sdkPlaybackSec > 0) {
1138
+ // Second: LiveKit gave us playback duration — estimate chars from it
1139
+ const CHARS_PER_SEC = 14;
1140
+ const charCount = Math.min(Math.round(sdkPlaybackSec * CHARS_PER_SEC), data.text.length);
1141
+ const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
1142
+ spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
1143
+ method = 'sdk-position';
1144
+ }
1145
+ else if (playbackStartedAt !== null) {
1146
+ // Third: use our wall-clock timer from first audio frame
1147
+ const elapsedSec = (Date.now() - playbackStartedAt) / 1000;
1148
+ const CHARS_PER_SEC = 14;
1149
+ const charCount = Math.min(Math.round(elapsedSec * CHARS_PER_SEC), data.text.length);
1150
+ const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
1151
+ spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
1152
+ method = 'wall-clock';
1153
+ }
1154
+ else {
1155
+ // Fallback: interrupt fired before first frame — pass full block
1156
+ spokenText = data.text;
1157
+ method = 'full-block-fallback';
1158
+ }
1159
+ console.log('🔇 Interruption estimate:', JSON.stringify({
1160
+ method,
1161
+ sdkPlaybackSec,
1162
+ isSynced: !!sdkTranscript,
1163
+ spokenChars: spokenText.length,
1164
+ fullChars: data.text.length,
1165
+ heard: spokenText.slice(0, 80) + (spokenText.length > 80 ? '...' : '')
1166
+ }));
1119
1167
  handleSpeechDone(sh, spokenText, data.text);
1120
1168
  }
1121
1169
  else {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.8.28",
3
+ "version": "0.8.30",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {