osborn 0.9.36 → 0.9.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,10 +31,23 @@ RUN npm install -g "osborn@${OSBORN_VERSION}" @anthropic-ai/claude-code
31
31
  # Persistent workspace + claude config dirs
32
32
  RUN mkdir -p /workspace /root/.claude
33
33
 
34
+ # Marker so orchestration (machines.ts isManifestAware) can detect this image
35
+ # supports the manifest-driven update flow. Pre-marker machines fall back to
36
+ # the image-swap update path, which brings them onto a marker-aware image;
37
+ # from then on, all updates use the manifest flow defined in the entrypoint.
38
+ RUN touch /etc/osborn-manifest-aware
39
+
34
40
  ENV OSBORN_CWD=/workspace
35
41
  ENV OSBORN_API_PORT=8741
36
42
  ENV NODE_ENV=production
37
43
 
44
+ # HOME points at the volume so user-space config from any tool that respects
45
+ # HOME (gh, git, ssh, aws, etc.) automatically writes to the persistent
46
+ # volume instead of the ephemeral container overlay. The existing /root/.claude
47
+ # symlink machinery below stays in place — it's redundant with HOME=/workspace
48
+ # but harmless.
49
+ ENV HOME=/workspace
50
+
38
51
  WORKDIR /workspace
39
52
 
40
53
  EXPOSE 8741
@@ -52,6 +65,10 @@ ln -sf /workspace/.claude /root/.claude
52
65
  # Suppress Claude Code interactive onboarding prompts
53
66
  ONBOARDING_JSON='{"numStartups":10,"installMethod":"npm","autoUpdates":false,"hasCompletedOnboarding":true,"hasTrustDialogAccepted":true,"hasTrustDialogHooksAccepted":true,"hasCompletedProjectOnboarding":true,"hasAcknowledgedCostThreshold":true,"effortCalloutV2Dismissed":true,"theme":"dark","projects":{"/workspace":{"hasTrustDialogAccepted":true,"hasTrustDialogHooksAccepted":true,"hasCompletedProjectOnboarding":true}}}'
54
67
  echo "$ONBOARDING_JSON" > /root/.claude.json
68
+ # Additional write at $HOME/.claude.json. With HOME=/workspace this is where
69
+ # Claude Code actually reads its top-level config from; the /root/.claude.json
70
+ # write above becomes dead but is left in place (harmless).
71
+ echo "$ONBOARDING_JSON" > /workspace/.claude.json
55
72
  mkdir -p /workspace/.claude
56
73
  echo "$ONBOARDING_JSON" > /workspace/.claude/.config.json
57
74
  echo "$ONBOARDING_JSON" > /workspace/.claude/claude.json
@@ -79,6 +96,23 @@ if [ -d "$PKG_SKILLS_DIR" ]; then
79
96
  done
80
97
  fi
81
98
 
99
+ # Manifest-driven version check.
100
+ # Orchestration writes /workspace/.osborn-want-version on update (machines.ts
101
+ # updateViaManifest). On every boot we compare to the currently-installed
102
+ # osborn and run `npm install -g osborn@<want>` if they differ. The install
103
+ # lands in the container overlay (default npm prefix) — Fly wipes overlay on
104
+ # stop/start so the install re-runs on every boot until the base image is
105
+ # rebuilt with that version baked in. Update is fast between Fly restarts;
106
+ # only the first boot after a restart pays the npm install cost.
107
+ WANT=$(cat /workspace/.osborn-want-version 2>/dev/null | tr -d '[:space:]')
108
+ if [ -n "$WANT" ]; then
109
+ CURRENT=$(osborn --version 2>/dev/null | head -1 | tr -d '[:space:]')
110
+ if [ "$WANT" != "$CURRENT" ]; then
111
+ echo "[sandbox] osborn ${CURRENT:-none} → ${WANT} (manifest install)"
112
+ npm install -g "osborn@${WANT}" || echo "[sandbox] install failed — running ${CURRENT:-image-baked} version"
113
+ fi
114
+ fi
115
+
82
116
  exec osborn
83
117
  ENTRYPOINT
84
118
 
package/dist/index.js CHANGED
@@ -1294,9 +1294,34 @@ async function main() {
1294
1294
  console.warn('⚠️ Failed to read JSONL for interruption context:', err);
1295
1295
  }
1296
1296
  }
1297
- // Store — consumed when user's next message arrives via chat()
1298
- lastInterruption = { spokenText: fullText, recentMessages, timestamp: Date.now() };
1299
- console.log(`📋 Interruption context stored (text: ${fullText.length} chars, JSONL: ${recentMessages.length} chars)`);
1297
+ // Store — consumed when user's next message arrives via chat().
1298
+ // Preserve any already-buffered suppressedText (the user may have started speaking
1299
+ // BEFORE the previous TTS completed, and we may have already suppressed in-flight
1300
+ // tts_say events that arrived during that overlap).
1301
+ const carriedSuppressed = lastInterruption?.suppressedText ?? '';
1302
+ lastInterruption = { spokenText: fullText, recentMessages, suppressedText: carriedSuppressed, timestamp: Date.now() };
1303
+ console.log(`📋 Interruption context stored (text: ${fullText.length} chars, JSONL: ${recentMessages.length} chars, suppressed carried: ${carriedSuppressed.length} chars)`);
1304
+ }
1305
+ /**
1306
+ * Append text the agent tried to say while the user was speaking, but which we
1307
+ * suppressed at the tts_say gate to avoid talking over them. Folded into
1308
+ * lastInterruption so it travels to Claude in the next chat() call.
1309
+ * If no interruption context exists yet (e.g. user just started speaking with no
1310
+ * prior TTS interrupt), creates a fresh entry.
1311
+ */
1312
+ function appendSuppressedText(text) {
1313
+ const t = text.trim();
1314
+ if (!t)
1315
+ return;
1316
+ if (lastInterruption) {
1317
+ const sep = lastInterruption.suppressedText ? '\n' : '';
1318
+ lastInterruption.suppressedText = lastInterruption.suppressedText + sep + t;
1319
+ lastInterruption.timestamp = Date.now();
1320
+ }
1321
+ else {
1322
+ lastInterruption = { spokenText: '', recentMessages: '', suppressedText: t, timestamp: Date.now() };
1323
+ }
1324
+ console.log(`🤐 Suppressed text buffered (+${t.length} chars, total ${lastInterruption.suppressedText.length}): "${t.substring(0, 80)}${t.length > 80 ? '...' : ''}"`);
1300
1325
  }
1301
1326
  /**
1302
1327
  * Callback for PipelineDirectLLM — returns pending interruption context and clears it.
@@ -1311,7 +1336,11 @@ async function main() {
1311
1336
  lastInterruption = null;
1312
1337
  return null;
1313
1338
  }
1314
- const ctx = { spokenText: lastInterruption.spokenText, recentMessages: lastInterruption.recentMessages };
1339
+ const ctx = {
1340
+ spokenText: lastInterruption.spokenText,
1341
+ recentMessages: lastInterruption.recentMessages,
1342
+ suppressedText: lastInterruption.suppressedText,
1343
+ };
1315
1344
  lastInterruption = null;
1316
1345
  return ctx;
1317
1346
  }
@@ -1821,6 +1850,15 @@ async function main() {
1821
1850
  console.log(`🔇 tts_say fired but text is empty — skipping`);
1822
1851
  return;
1823
1852
  }
1853
+ // Suppress while the user is mid-utterance. Without this, agent text generated
1854
+ // in parallel by the Claude SDK plays right over the user — same problem as
1855
+ // pre-interrupt overlap, but at the *output* side. The suppressed text gets
1856
+ // folded into lastInterruption so the next chat() to Claude carries it as
1857
+ // "you wrote this but the user did not hear it — re-articulate if relevant."
1858
+ if (userState === 'speaking') {
1859
+ appendSuppressedText(data.text);
1860
+ return;
1861
+ }
1824
1862
  const sayId = Date.now(); // simple ID to correlate start/end logs
1825
1863
  console.log(`🗣️ [${sayId}] session.say START (${data.text.length} chars): "${data.text}"`);
1826
1864
  // Forward spoken text + audio to meeting output page when bot is in a meeting.
@@ -14,6 +14,13 @@ import { EventEmitter } from 'events';
14
14
  export interface InterruptionContext {
15
15
  spokenText: string;
16
16
  recentMessages: string;
17
+ /**
18
+ * Text the agent generated while the user was still speaking, which we
19
+ * suppressed at session.say() to avoid talking over the user. The agent
20
+ * receives this so it knows what it tried to say but the user did not hear,
21
+ * and can re-articulate the relevant bits in its next response.
22
+ */
23
+ suppressedText: string;
17
24
  }
18
25
  export interface PipelineDirectOptions extends ClaudeLLMOptions {
19
26
  onFastBrainResult?: (result: FastBrainPanelResult) => void;
@@ -83,31 +83,89 @@ export class PipelineDirectLLM extends llm.LLM {
83
83
  }
84
84
  }
85
85
  console.log(`📥 [pipeline] chat() call #${callN} (${userText.length} chars): "${userText}"`);
86
- // Check for pending interruption context — enrich user message if interrupted
86
+ // Always check the pending playback context — it can carry two independent
87
+ // signals: (a) an actual interruption (spokenText + recentMessages) when the
88
+ // user cut Osborn off mid-TTS, OR (b) suppressed text generated by the SDK
89
+ // while the user was speaking, regardless of whether they were actually
90
+ // interrupting active TTS. We need to forward BOTH cases so the agent knows
91
+ // what it produced that the user didn't hear, and so the buffer is cleared
92
+ // every turn even when there was no interrupt.
87
93
  const interruptCtx = this.#opts.getAndConsumeInterruptionContext?.();
88
94
  if (interruptCtx && userText.trim()) {
89
- console.log(`🔇 [pipeline] Enriching user message with interruption context`);
90
- // Interrupt Claude's current work before sending enriched message
91
- this.#claudeLLM.interruptQuery().catch(() => { });
92
- // Replace user message in chatCtx with context-enriched version
93
- const enrichedMessage = [
94
- `[INTERRUPTED] The user interrupted your response mid-speech.`,
95
- ``,
96
- `What the user heard before cutoff:`,
97
- `"${interruptCtx.spokenText}"`,
98
- ``,
99
- `Your recent messages (full untruncated — you wrote these):`,
100
- interruptCtx.recentMessages || '(no recent messages found)',
101
- ``,
102
- `User's message: "${userText}"`,
103
- ``,
104
- `RESPOND with speech first, then act:`,
105
- `- ALWAYS reply with at least one spoken sentence before doing any tool calls`,
106
- `- If it's a quick side question, answer it then continue where you left off`,
107
- `- If they want to change direction, acknowledge and follow their lead`,
108
- `- Clarify when asked to or the question requires going over what you just said`,
109
- `- If relevant details were cut off — whether they answer the current question or an earlier one — weave them back in naturally so the user stays in context without having to ask again.`,
110
- ].join('\n');
95
+ const hasInterrupt = !!interruptCtx.spokenText;
96
+ const hasSuppressed = !!interruptCtx.suppressedText;
97
+ const suppressedBlock = hasSuppressed
98
+ ? [
99
+ ``,
100
+ `Text you generated while the user was speaking NOT played (we suppressed it so we wouldn't talk over them):`,
101
+ `"${interruptCtx.suppressedText}"`,
102
+ `If any of that is still relevant to the user's current message, re-articulate the key points naturally. If it's no longer relevant, drop it.`,
103
+ ].join('\n')
104
+ : '';
105
+ let enrichedMessage;
106
+ if (hasInterrupt) {
107
+ // Actual mid-TTS interruption — keep the full [INTERRUPTED] template
108
+ console.log(`🔇 [pipeline] Enriching: interrupt (spoken=${interruptCtx.spokenText.length} chars, suppressed=${interruptCtx.suppressedText.length} chars)`);
109
+ this.#claudeLLM.interruptQuery().catch(() => { });
110
+ enrichedMessage = [
111
+ `[INTERRUPTED] The user interrupted your response mid-speech.`,
112
+ ``,
113
+ `What the user heard before cutoff:`,
114
+ `"${interruptCtx.spokenText}"`,
115
+ ``,
116
+ `WHAT THE USER DID NOT HEAR (you wrote this but it was cut off):`,
117
+ `Anything in "Your recent messages" below that appears AFTER the quoted heard text is content the user did not hear. The user has no memory of it.`,
118
+ ``,
119
+ `Your recent messages (full untruncated — you wrote these):`,
120
+ interruptCtx.recentMessages || '(no recent messages found)',
121
+ suppressedBlock,
122
+ ``,
123
+ `User's message: "${userText}"`,
124
+ ``,
125
+ `CONTEXT PRESERVATION (READ THIS):`,
126
+ `The user has NO memory of unheard content. If any of it bears on their current message — answers a question they just asked, sets up a follow-up they're now asking about, or shows a knowledge gap and fills in a detail they're reacting to — you MUST surface it. Briefly is fine if their message is off-topic or explores a direction they haven't asked about yet. But never assume they remember what they never heard.`,
127
+ ``,
128
+ `RESPOND with speech first, then act:`,
129
+ `- ALWAYS reply with at least one spoken sentence before doing any tool calls`,
130
+ `- If it's a quick side question, answer it then continue where you left off`,
131
+ `- If they want to change direction, acknowledge and follow their lead`,
132
+ `- Clarify when asked to or the question requires going over what you just said`,
133
+ `- If relevant details were cut off — whether they answer the current question or an earlier one — weave them back in naturally so the user stays in context without having to ask again.`,
134
+ ].join('\n');
135
+ }
136
+ else if (hasSuppressed) {
137
+ // No real interrupt — user was speaking while we had text queued. They
138
+ // weren't cutting Osborn off, just talking over a gap. Don't claim an
139
+ // interrupt happened; symmetric structure to [INTERRUPTED] so Claude
140
+ // treats both signals consistently.
141
+ console.log(`🤐 [pipeline] Enriching: suppressed-only (${interruptCtx.suppressedText.length} chars, no interrupt)`);
142
+ enrichedMessage = [
143
+ `[CONTEXT] You generated speech while the user was already talking. None of it played.`,
144
+ ``,
145
+ `What the user is saying now:`,
146
+ `"${userText}"`,
147
+ ``,
148
+ `Text you produced that the user did NOT hear:`,
149
+ `"${interruptCtx.suppressedText}"`,
150
+ ``,
151
+ `CONTEXT PRESERVATION (READ THIS):`,
152
+ `The user has NO memory of the unheard text. If any of it bears on their current message — answers a question they just asked, sets up a follow-up they're now asking about, or shows a knowledge gap and fills in a detail they're reacting to — you MUST surface it. Briefly is fine if their message is off-topic or explores a direction they haven't asked about yet. But never assume they remember what they never heard.`,
153
+ ``,
154
+ `RESPOND with speech first, then act:`,
155
+ `- ALWAYS reply with at least one spoken sentence before doing any tool calls`,
156
+ `- Three likely cases — figure out which applies:`,
157
+ ` (a) the user didn't realize you were responding → forward the key points of the unheard text`,
158
+ ` (b) the user changed direction → drop the unheard text, follow their lead`,
159
+ ` (c) the user's message builds on the unheard text → integrate it as if they'd heard it`,
160
+ `- Keep it tight — this is a voice response.`,
161
+ ].join('\n');
162
+ }
163
+ else {
164
+ // Context object existed but both fields empty — defensive no-op,
165
+ // shouldn't happen because appendSuppressedText only creates entries
166
+ // when text is non-empty.
167
+ enrichedMessage = userText;
168
+ }
111
169
  // Modify the last user message in chatCtx
112
170
  for (let i = chatCtx.items.length - 1; i >= 0; i--) {
113
171
  const item = chatCtx.items[i];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.9.36",
3
+ "version": "0.9.38",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {