npm - bloby-bot - Versions diffs - 0.47.6 → 0.47.8 - Mend

bloby-bot 0.47.6 → 0.47.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/supervisor/harnesses/pi/async-queue.ts +10 -0
package/supervisor/harnesses/pi/index.ts +21 -0
package/supervisor/harnesses/pi/providers/stream-google.ts +9 -2
package/supervisor/harnesses/pi/providers/types.ts +5 -2
package/supervisor/harnesses/pi/session.ts +40 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bloby-bot",
-  "version": "0.47.6",
+  "version": "0.47.8",
   "releaseNotes": [
     "1. # voice note (PTT bubble)",
     "2. # audio file + caption",

package/supervisor/harnesses/pi/async-queue.ts CHANGED Viewed

@@ -9,6 +9,12 @@
 export interface AsyncQueue<T> extends AsyncIterable<T> {
   push(item: T): void;
   end(): void;
+  /**
+   * Non-blocking drain: return every item currently buffered without waiting
+   * for a new one. Used by the session to fold mid-turn user messages into
+   * the model's history without breaking the outer `for await` consumer.
+   */
+  drainPending(): T[];
 }
 export function createAsyncQueue<T>(): AsyncQueue<T> {
@@ -30,6 +36,10 @@ export function createAsyncQueue<T>(): AsyncQueue<T> {
       done = true;
       if (resolve) resolve({ value: undefined as any, done: true });
     },
+    drainPending() {
+      if (pending.length === 0) return [];
+      return pending.splice(0, pending.length);
+    },
     [Symbol.asyncIterator]() {
       return {
         next(): Promise<IteratorResult<T>> {

package/supervisor/harnesses/pi/index.ts CHANGED Viewed

@@ -82,6 +82,26 @@ function formatConversationHistory(messages: RecentMessage[]): string {
   return messages.map((m) => `${m.role}: ${m.content}`).join('\n\n');
 }
+/**
+ * Live-conversation pacing hint. The Claude Agent SDK trains its model to do
+ * this natively; non-Anthropic models (Gemini especially) tend to go silent
+ * during tool loops and never report progress. This nudge makes the
+ * conversation feel alive — quick acknowledgement before long tasks, short
+ * status notes between tool calls, and inline answers if the user types
+ * something while the agent is mid-task.
+ */
+const LIVE_CONVERSATION_HINT = `
+---
+# Live-conversation pacing
+You are running in a streaming chat where the user can keep typing while you work. Make the conversation feel alive:
+- Before kicking off a multi-step task, say one short line acknowledging it ("On it, looking at the widget now.").
+- Between tool calls on long tasks, drop a brief progress note ("Found the file, checking the layout next.") so the user knows you're still working.
+- If a new user message arrives while you're mid-task, you'll see it as a fresh user-role message in the conversation history. Answer it briefly inline, mention you're still working on the main task, then continue.
+- Final answers should be concise and concrete.`;
 async function buildSystemPrompt(
   names?: { botName: string; humanName: string },
   recentMessages?: RecentMessage[],
@@ -89,6 +109,7 @@ async function buildSystemPrompt(
   const memoryFiles = readMemoryFiles();
   const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
   let systemPrompt = basePrompt;
+  systemPrompt += LIVE_CONVERSATION_HINT;
   systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
   try {

package/supervisor/harnesses/pi/providers/stream-google.ts CHANGED Viewed

@@ -100,8 +100,12 @@ function toGeminiParts(content: PiContentBlock[]): any[] {
     } else if (b.type === 'image') {
       parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
     } else if (b.type === 'tool_use') {
-      // Assistant turn: the model asked to invoke a tool.
-      parts.push({ functionCall: { name: b.name, args: b.input || {} } });
+      // Assistant turn: the model asked to invoke a tool. Thinking-capable
+      // Gemini 3.x rejects (HTTP 400) any echoed functionCall whose
+      // thoughtSignature is missing, so we forward it verbatim when present.
+      const part: any = { functionCall: { name: b.name, args: b.input || {} } };
+      if (b.thoughtSignature) part.thoughtSignature = b.thoughtSignature;
+      parts.push(part);
     } else if (b.type === 'tool_result') {
       // Function responses can be strings, objects, or even error markers.
       // Wrap text in `{ output: ... }` (Gemini's docs use a flexible
@@ -262,6 +266,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
             id,
             name: part.functionCall.name,
             input: part.functionCall.args || {},
+            // Thinking-capable models attach a signature that we must echo
+            // back unchanged on the next turn. Optional on non-thinking models.
+            thoughtSignature: typeof part.thoughtSignature === 'string' ? part.thoughtSignature : undefined,
           };
           continue;
         }

package/supervisor/harnesses/pi/providers/types.ts CHANGED Viewed

@@ -17,7 +17,10 @@ export type PiRole = 'user' | 'assistant' | 'tool';
 export type PiContentBlock =
   | { type: 'text'; text: string }
   | { type: 'image'; mediaType: string; data: string }   // base64
-  | { type: 'tool_use'; id: string; name: string; input: any }
+  // `thoughtSignature` is a Gemini 3.x thinking-model field. Pi-flavored
+  // providers that emit reasoning attach it to function-call parts; the API
+  // rejects the next turn with HTTP 400 if we don't echo it back verbatim.
+  | { type: 'tool_use'; id: string; name: string; input: any; thoughtSignature?: string }
   | { type: 'tool_result'; toolUseId: string; content: string; isError?: boolean };
 export interface PiMessage {
@@ -50,7 +53,7 @@ export type PiStopReason = 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | 'a
 export type PiStreamEvent =
   | { type: 'text_delta'; delta: string }
   | { type: 'text_end'; text: string }
-  | { type: 'tool_use'; id: string; name: string; input: any }
+  | { type: 'tool_use'; id: string; name: string; input: any; thoughtSignature?: string }
   | { type: 'done'; stopReason: PiStopReason; usage?: PiUsage }
   | { type: 'error'; error: string };

package/supervisor/harnesses/pi/session.ts CHANGED Viewed

@@ -68,7 +68,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
   /** One stream round — collect the assistant blocks the model emits this pass. */
   interface RoundResult {
     text: string;
-    toolUses: { id: string; name: string; input: any }[];
+    toolUses: { id: string; name: string; input: any; thoughtSignature?: string }[];
     errored: boolean;
   }
@@ -100,7 +100,12 @@ export function createPiSession(init: PiSessionInit): PiSession {
             result.text = evt.text;
             break;
           case 'tool_use':
-            result.toolUses.push({ id: evt.id, name: evt.name, input: evt.input });
+            result.toolUses.push({
+              id: evt.id,
+              name: evt.name,
+              input: evt.input,
+              thoughtSignature: evt.thoughtSignature,
+            });
             init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
             break;
           case 'error':
@@ -136,14 +141,17 @@ export function createPiSession(init: PiSessionInit): PiSession {
     }
   }
-  async function runOneTurn(userMsg: PiMessage): Promise<void> {
+  async function runOneTurn(input: AsyncQueue<PiMessage>, firstUserMsg: PiMessage): Promise<void> {
     if (init.abortController.signal.aborted) return;
-    messages.push(userMsg);
+    // Stack any messages that arrived in the same millisecond into one turn.
+    messages.push(firstUserMsg);
+    for (const extra of input.drainPending()) messages.push(extra);
     init.onEvent({ type: 'turn_started' });
     let accumulatedText = '';
     const usedTools = new Set<string>();
     let turnErrored = false;
+    let pendingInterleave = false;
     for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
       if (init.abortController.signal.aborted) break;
@@ -157,14 +165,21 @@ export function createPiSession(init: PiSessionInit): PiSession {
         assistantContent.push({ type: 'text', text });
       }
       for (const tu of toolUses) {
-        assistantContent.push({ type: 'tool_use', id: tu.id, name: tu.name, input: tu.input });
+        assistantContent.push({
+          type: 'tool_use',
+          id: tu.id,
+          name: tu.name,
+          input: tu.input,
+          // Forward Gemini's thoughtSignature unchanged so the next turn's
+          // request echoes it back; without it the API rejects with 400.
+          thoughtSignature: tu.thoughtSignature,
+        });
       }
       if (assistantContent.length > 0) {
         messages.push({ role: 'assistant', content: assistantContent });
       }
       if (errored) { turnErrored = true; break; }
-      if (toolUses.length === 0) break;        // model finished — exit loop
       // Run every tool the model asked for this round, then feed the results
       // back as a single user message Gemini accepts as a batch.
@@ -185,7 +200,24 @@ export function createPiSession(init: PiSessionInit): PiSession {
       if (toolResultBlocks.length > 0) {
         messages.push({ role: 'user', content: toolResultBlocks });
       }
-      // Loop continues — re-stream with the new tool results in context.
+      // Fold any user messages that arrived during this round into history so
+      // the next stream pass sees them. This is what makes the conversation
+      // feel alive: while the agent is grinding on a long task, a question
+      // typed mid-stream lands in the very next request as a user-role part,
+      // and the model can answer it inline before continuing.
+      const interleaved = input.drainPending();
+      if (interleaved.length > 0) {
+        log.info(`[pi/session] interleaved ${interleaved.length} mid-turn user message(s) into history`);
+        for (const m of interleaved) messages.push(m);
+        pendingInterleave = true;
+      } else {
+        pendingInterleave = false;
+      }
+      // Exit when the model has nothing more to do AND no new user messages
+      // arrived mid-round. Either condition by itself keeps the loop alive.
+      if (toolUses.length === 0 && !pendingInterleave) break;
     }
     if (!turnErrored) {
@@ -202,7 +234,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
       for await (const userMsg of input) {
         if (init.abortController.signal.aborted) break;
         try {
-          await runOneTurn(userMsg);
+          await runOneTurn(input, userMsg);
         } catch (err: any) {
           log.warn(`[pi/session] Turn failed: ${err?.message || err}`);
           init.onEvent({ type: 'error', error: err?.message || String(err) });