npm - bloby-bot - Versions diffs - 0.47.7 → 0.47.9 - Mend

bloby-bot 0.47.7 → 0.47.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/supervisor/harnesses/pi/providers/stream-anthropic.ts ADDED Viewed

@@ -0,0 +1,318 @@
+/**
+ * Anthropic Messages-API streaming provider.
+ *
+ * Different from the existing Claude harness: that one uses the Claude Agent
+ * SDK + subscription OAuth, this one talks directly to `api.anthropic.com/v1/messages`
+ * with a pay-per-token API key. Lets users bring their own Anthropic credentials
+ * through the pi flow instead of (or alongside) the subscription path.
+ *
+ * Wire shape: SSE with typed events — `message_start`, `content_block_start`,
+ * `content_block_delta`, `content_block_stop`, `message_delta`, `message_stop`,
+ * `ping`, `error`. Each event's `data:` JSON carries a `type` field matching
+ * the event name, so we ignore the SSE `event:` line and route off the JSON.
+ */
+import { log } from '../../../../shared/logger.js';
+import type {
+  PiStreamRequest,
+  PiStreamEvent,
+  PiMessage,
+  PiContentBlock,
+  PiStopReason,
+} from './types.js';
+/* ── SSE parser (shares the LF/CRLF-tolerant pattern from the other providers) ── */
+async function* parseSse(res: Response): AsyncIterable<any> {
+  if (!res.body) return;
+  const reader = res.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+  try {
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      let idx;
+      while (
+        (idx = (() => {
+          const a = buffer.indexOf('\n\n');
+          const b = buffer.indexOf('\r\n\r\n');
+          if (a < 0) return b;
+          if (b < 0) return a;
+          return Math.min(a, b);
+        })()) !== -1
+      ) {
+        const isCrlf = buffer.slice(idx, idx + 4) === '\r\n\r\n';
+        const raw = buffer.slice(0, idx);
+        buffer = buffer.slice(idx + (isCrlf ? 4 : 2));
+        const parsed = parseEvent(raw);
+        if (parsed !== undefined) yield parsed;
+      }
+    }
+    buffer += decoder.decode();
+    if (buffer.trim()) {
+      const parsed = parseEvent(buffer);
+      if (parsed !== undefined) yield parsed;
+    }
+  } finally {
+    try { reader.releaseLock(); } catch {}
+  }
+}
+function parseEvent(raw: string): any | undefined {
+  const lines = raw.split(/\r?\n/);
+  const dataLines = lines
+    .filter((l) => l.startsWith('data:'))
+    .map((l) => l.slice(5).trimStart());
+  if (!dataLines.length) return undefined;
+  const data = dataLines.join('\n');
+  if (!data) return undefined;
+  try { return JSON.parse(data); } catch { return undefined; }
+}
+/* ── Message conversion (pi → Anthropic) ── */
+function toAnthropicContent(blocks: PiContentBlock[]): any[] {
+  const out: any[] = [];
+  for (const b of blocks) {
+    if (b.type === 'text') {
+      out.push({ type: 'text', text: b.text });
+    } else if (b.type === 'image') {
+      out.push({
+        type: 'image',
+        source: { type: 'base64', media_type: b.mediaType, data: b.data },
+      });
+    } else if (b.type === 'tool_use') {
+      out.push({
+        type: 'tool_use',
+        id: b.id,
+        name: b.name,
+        input: b.input || {},
+      });
+    } else if (b.type === 'tool_result') {
+      out.push({
+        type: 'tool_result',
+        tool_use_id: b.toolUseId,
+        content: b.content,
+        is_error: b.isError || false,
+      });
+    }
+  }
+  return out;
+}
+function toAnthropicMessages(pi: PiMessage[]): any[] {
+  return pi
+    .filter((m) => m.content.length > 0)
+    .map((m) => ({
+      role: m.role === 'assistant' ? 'assistant' : 'user',
+      content: toAnthropicContent(m.content),
+    }))
+    .filter((m) => m.content.length > 0);
+}
+function toAnthropicTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
+  return tools.map((t) => ({
+    name: t.name,
+    description: t.description,
+    input_schema: t.inputSchema,
+  }));
+}
+function mapStopReason(reason?: string): PiStopReason {
+  switch (reason) {
+    case 'end_turn':      return 'end_turn';
+    case 'stop_sequence': return 'end_turn';
+    case 'max_tokens':    return 'max_tokens';
+    case 'tool_use':      return 'tool_use';
+    case 'pause_turn':    return 'end_turn';
+    case 'refusal':       return 'error';
+    default:              return 'end_turn';
+  }
+}
+/* ── Streaming entry point ── */
+interface PartialBlock {
+  kind: 'text' | 'tool_use' | 'other';
+  text?: string;
+  toolUseId?: string;
+  toolName?: string;
+  toolArgsBuf?: string;
+}
+export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiStreamEvent> {
+  const url = `${req.baseUrl.replace(/\/+$/, '')}/messages`;
+  const body: any = {
+    model: req.modelId,
+    messages: toAnthropicMessages(req.messages),
+    max_tokens: req.maxOutputTokens ?? 8192,
+    stream: true,
+  };
+  if (req.systemPrompt?.trim()) body.system = req.systemPrompt;
+  if (req.tools && req.tools.length > 0) body.tools = toAnthropicTools(req.tools);
+  let res: Response;
+  try {
+    res = await fetch(url, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json',
+        'accept': 'text/event-stream',
+        'x-api-key': req.apiKey,
+        'anthropic-version': '2023-06-01',
+      },
+      body: JSON.stringify(body),
+      signal: req.signal,
+    });
+  } catch (err: any) {
+    yield { type: 'error', error: err?.message || String(err) };
+    return;
+  }
+  if (!res.ok) {
+    let detail = '';
+    try { detail = await res.text(); } catch {}
+    yield { type: 'error', error: `Anthropic ${res.status} ${res.statusText}${detail ? `: ${detail.slice(0, 400)}` : ''}` };
+    return;
+  }
+  // Anthropic streams content blocks by index. Track partial state per index
+  // so deltas land on the right block.
+  const blocks = new Map<number, PartialBlock>();
+  let accumulated = '';
+  let lastStop: string | undefined;
+  let usage: { inputTokens?: number; outputTokens?: number } | undefined;
+  let chunkCount = 0;
+  let firstChunkSummary = '';
+  try {
+    for await (const evt of parseSse(res)) {
+      chunkCount++;
+      if (chunkCount === 1) {
+        try { firstChunkSummary = JSON.stringify(evt).slice(0, 600); } catch {}
+      }
+      const type = evt?.type;
+      switch (type) {
+        case 'message_start': {
+          const u = evt?.message?.usage;
+          if (u) usage = { inputTokens: u.input_tokens, outputTokens: u.output_tokens };
+          break;
+        }
+        case 'content_block_start': {
+          const idx = evt?.index ?? 0;
+          const block = evt?.content_block || {};
+          if (block.type === 'text') {
+            blocks.set(idx, { kind: 'text', text: '' });
+          } else if (block.type === 'tool_use') {
+            blocks.set(idx, {
+              kind: 'tool_use',
+              toolUseId: block.id,
+              toolName: block.name,
+              toolArgsBuf: '',
+            });
+          } else {
+            blocks.set(idx, { kind: 'other' });
+          }
+          break;
+        }
+        case 'content_block_delta': {
+          const idx = evt?.index ?? 0;
+          const delta = evt?.delta || {};
+          const slot = blocks.get(idx);
+          if (!slot) break;
+          if (delta.type === 'text_delta' && typeof delta.text === 'string') {
+            slot.text = (slot.text || '') + delta.text;
+            accumulated += delta.text;
+            yield { type: 'text_delta', delta: delta.text };
+          } else if (delta.type === 'input_json_delta' && typeof delta.partial_json === 'string') {
+            slot.toolArgsBuf = (slot.toolArgsBuf || '') + delta.partial_json;
+          }
+          // `thinking_delta` (extended thinking) is ignored for now — the
+          // pi harness doesn't surface reasoning to the user yet.
+          break;
+        }
+        case 'content_block_stop': {
+          const idx = evt?.index ?? 0;
+          const slot = blocks.get(idx);
+          if (!slot) break;
+          if (slot.kind === 'tool_use' && slot.toolUseId && slot.toolName) {
+            let input: any = {};
+            if (slot.toolArgsBuf) {
+              try { input = JSON.parse(slot.toolArgsBuf); }
+              catch { input = { _raw: slot.toolArgsBuf }; }
+            }
+            yield {
+              type: 'tool_use',
+              id: slot.toolUseId,
+              name: slot.toolName,
+              input,
+            };
+          }
+          break;
+        }
+        case 'message_delta': {
+          if (evt?.delta?.stop_reason) lastStop = evt.delta.stop_reason;
+          const u = evt?.usage;
+          if (u && (u.output_tokens !== undefined || u.input_tokens !== undefined)) {
+            usage = {
+              inputTokens: u.input_tokens ?? usage?.inputTokens,
+              outputTokens: u.output_tokens ?? usage?.outputTokens,
+            };
+          }
+          break;
+        }
+        case 'error': {
+          const msg = evt?.error?.message || evt?.message || 'Unknown error';
+          yield { type: 'error', error: `Anthropic stream error: ${msg}` };
+          yield { type: 'done', stopReason: 'error', usage };
+          return;
+        }
+        case 'message_stop':
+        case 'ping':
+        default:
+          // ping is keep-alive; message_stop is bookkeeping.
+          break;
+      }
+    }
+  } catch (err: any) {
+    if (err?.name === 'AbortError') {
+      yield { type: 'done', stopReason: 'aborted' };
+      return;
+    }
+    yield { type: 'error', error: err?.message || String(err) };
+    return;
+  }
+  const hadToolUse = Array.from(blocks.values()).some((b) => b.kind === 'tool_use');
+  log.info(
+    `[pi/anthropic] stream done — chunks=${chunkCount} text=${accumulated.length} ` +
+    `toolCalls=${Array.from(blocks.values()).filter((b) => b.kind === 'tool_use').length} ` +
+    `stopReason=${lastStop || 'none'} ` +
+    `promptTok=${usage?.inputTokens ?? '?'} outTok=${usage?.outputTokens ?? '?'}`,
+  );
+  if (chunkCount === 0) {
+    log.warn(`[pi/anthropic] zero chunks parsed — content-type=${res.headers.get('content-type') || '?'}`);
+  } else if (!accumulated && !hadToolUse) {
+    log.info(`[pi/anthropic] first chunk (truncated): ${firstChunkSummary}`);
+  }
+  if (accumulated) yield { type: 'text_end', text: accumulated };
+  if (!accumulated && !hadToolUse) {
+    yield {
+      type: 'error',
+      error: `Anthropic returned no output (stopReason=${lastStop || 'unknown'}).`,
+    };
+    yield { type: 'done', stopReason: 'error', usage };
+    return;
+  }
+  yield {
+    type: 'done',
+    stopReason: hadToolUse ? 'tool_use' : mapStopReason(lastStop),
+    usage,
+  };
+}

package/supervisor/harnesses/pi/providers/stream-openai-completions.ts ADDED Viewed

@@ -0,0 +1,328 @@
+/**
+ * OpenAI-completions streaming provider.
+ *
+ * One wire protocol — `POST {baseUrl}/chat/completions` with `stream: true`
+ * and SSE — covers a long list of vendors:
+ *   OpenAI, DeepSeek, Groq, xAI (Grok), Cerebras, OpenRouter, Mistral,
+ *   Ollama, LM Studio, plus any custom `/v1`-compatible endpoint.
+ *
+ * Only base URL + auth key change between them, so this single
+ * implementation is the workhorse of the pi harness.
+ */
+import { log } from '../../../../shared/logger.js';
+import type {
+  PiStreamRequest,
+  PiStreamEvent,
+  PiMessage,
+  PiContentBlock,
+  PiStopReason,
+} from './types.js';
+/* ── SSE parser (LF or CRLF tolerant, flushes the trailing event) ── */
+async function* parseSse(res: Response): AsyncIterable<any> {
+  if (!res.body) return;
+  const reader = res.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+  try {
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      let idx;
+      while (
+        (idx = (() => {
+          const a = buffer.indexOf('\n\n');
+          const b = buffer.indexOf('\r\n\r\n');
+          if (a < 0) return b;
+          if (b < 0) return a;
+          return Math.min(a, b);
+        })()) !== -1
+      ) {
+        const isCrlf = buffer.slice(idx, idx + 4) === '\r\n\r\n';
+        const raw = buffer.slice(0, idx);
+        buffer = buffer.slice(idx + (isCrlf ? 4 : 2));
+        const parsed = parseEvent(raw);
+        if (parsed !== undefined) yield parsed;
+      }
+    }
+    buffer += decoder.decode();
+    if (buffer.trim()) {
+      const parsed = parseEvent(buffer);
+      if (parsed !== undefined) yield parsed;
+    }
+  } finally {
+    try { reader.releaseLock(); } catch {}
+  }
+}
+function parseEvent(raw: string): any | undefined {
+  const lines = raw.split(/\r?\n/);
+  const dataLines = lines
+    .filter((l) => l.startsWith('data:'))
+    .map((l) => l.slice(5).trimStart());
+  if (!dataLines.length) return undefined;
+  const data = dataLines.join('\n');
+  if (!data || data === '[DONE]') return undefined;
+  try { return JSON.parse(data); } catch { return undefined; }
+}
+/* ── Message conversion (pi → OpenAI) ── */
+function isToolResultMessage(m: PiMessage): boolean {
+  return m.role === 'user' && m.content.length > 0 && m.content.every((b) => b.type === 'tool_result');
+}
+/**
+ * The OpenAI Chat Completions schema wants tool results as their own
+ * `role: "tool"` messages, one per result, immediately following the
+ * assistant message that emitted the tool_calls. Our unified PiMessage
+ * keeps tool_result blocks inside a user-role message instead — split them
+ * here so the wire payload is valid.
+ */
+function toOpenAIMessages(pi: PiMessage[]): any[] {
+  const out: any[] = [];
+  for (const m of pi) {
+    if (isToolResultMessage(m)) {
+      for (const block of m.content) {
+        if (block.type !== 'tool_result') continue;
+        out.push({
+          role: 'tool',
+          tool_call_id: block.toolUseId,
+          content: block.content,
+        });
+      }
+      continue;
+    }
+    if (m.role === 'assistant') {
+      const textParts: string[] = [];
+      const toolCalls: any[] = [];
+      for (const b of m.content) {
+        if (b.type === 'text') textParts.push(b.text);
+        else if (b.type === 'tool_use') {
+          toolCalls.push({
+            id: b.id,
+            type: 'function',
+            function: {
+              name: b.name,
+              arguments: JSON.stringify(b.input || {}),
+            },
+          });
+        }
+      }
+      const msg: any = { role: 'assistant', content: textParts.join('') || null };
+      if (toolCalls.length > 0) msg.tool_calls = toolCalls;
+      out.push(msg);
+      continue;
+    }
+    // role === 'user' with non-tool-result content (text + optional images)
+    const contentBlocks: any[] = [];
+    let plainText = '';
+    let hasImage = false;
+    for (const b of m.content) {
+      if (b.type === 'text') {
+        plainText += (plainText ? '\n' : '') + b.text;
+      } else if (b.type === 'image') {
+        hasImage = true;
+        contentBlocks.push({
+          type: 'image_url',
+          image_url: { url: `data:${b.mediaType};base64,${b.data}` },
+        });
+      }
+    }
+    if (hasImage) {
+      // Mixed image+text: prepend text part to the content array.
+      if (plainText) contentBlocks.unshift({ type: 'text', text: plainText });
+      out.push({ role: 'user', content: contentBlocks });
+    } else {
+      out.push({ role: 'user', content: plainText });
+    }
+  }
+  return out;
+}
+function toOpenAITools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
+  return tools.map((t) => ({
+    type: 'function',
+    function: {
+      name: t.name,
+      description: t.description,
+      parameters: t.inputSchema,
+    },
+  }));
+}
+function mapFinishReason(reason?: string): PiStopReason {
+  switch (reason) {
+    case 'stop':         return 'end_turn';
+    case 'length':       return 'max_tokens';
+    case 'tool_calls':
+    case 'function_call':
+      return 'tool_use';
+    case 'content_filter': return 'error';
+    default:             return 'end_turn';
+  }
+}
+/* ── Streaming entry point ── */
+interface PartialToolCall {
+  id: string;
+  name: string;
+  argsBuf: string;
+}
+export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncIterable<PiStreamEvent> {
+  const url = `${req.baseUrl.replace(/\/+$/, '')}/chat/completions`;
+  const openaiMessages = toOpenAIMessages(req.messages);
+  // Inline the system prompt as the first message — most providers honour it
+  // there; the few that prefer `instructions` (Responses API) aren't us.
+  if (req.systemPrompt?.trim()) {
+    openaiMessages.unshift({ role: 'system', content: req.systemPrompt });
+  }
+  const body: any = {
+    model: req.modelId,
+    messages: openaiMessages,
+    stream: true,
+    max_tokens: req.maxOutputTokens ?? 8192,
+  };
+  if (req.tools && req.tools.length > 0) {
+    body.tools = toOpenAITools(req.tools);
+    body.tool_choice = 'auto';
+  }
+  let res: Response;
+  try {
+    const headers: Record<string, string> = {
+      'content-type': 'application/json',
+      'accept': 'text/event-stream',
+    };
+    if (req.apiKey) headers['authorization'] = `Bearer ${req.apiKey}`;
+    res = await fetch(url, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify(body),
+      signal: req.signal,
+    });
+  } catch (err: any) {
+    yield { type: 'error', error: err?.message || String(err) };
+    return;
+  }
+  if (!res.ok) {
+    let detail = '';
+    try { detail = await res.text(); } catch {}
+    yield { type: 'error', error: `OpenAI-compat ${res.status} ${res.statusText}${detail ? `: ${detail.slice(0, 400)}` : ''}` };
+    return;
+  }
+  let accumulated = '';
+  let lastFinish: string | undefined;
+  let usage: { inputTokens?: number; outputTokens?: number } | undefined;
+  const toolCallsByIndex = new Map<number, PartialToolCall>();
+  let chunkCount = 0;
+  let firstChunkSummary = '';
+  try {
+    for await (const chunk of parseSse(res)) {
+      chunkCount++;
+      if (chunkCount === 1) {
+        try { firstChunkSummary = JSON.stringify(chunk).slice(0, 600); } catch {}
+      }
+      const choice = chunk?.choices?.[0];
+      if (!choice) {
+        if (chunk?.usage) {
+          usage = {
+            inputTokens: chunk.usage.prompt_tokens,
+            outputTokens: chunk.usage.completion_tokens,
+          };
+        }
+        continue;
+      }
+      const delta = choice.delta || {};
+      if (typeof delta.content === 'string' && delta.content.length > 0) {
+        accumulated += delta.content;
+        yield { type: 'text_delta', delta: delta.content };
+      }
+      // Tool-call deltas: function name + arguments stream in pieces keyed by
+      // `index`. Accumulate per index and only emit the full tool_use once we
+      // see finish_reason: 'tool_calls' (or at stream end).
+      const toolDeltas: any[] = Array.isArray(delta.tool_calls) ? delta.tool_calls : [];
+      for (const td of toolDeltas) {
+        const idx = typeof td.index === 'number' ? td.index : 0;
+        let partial = toolCallsByIndex.get(idx);
+        if (!partial) {
+          partial = { id: td.id || '', name: '', argsBuf: '' };
+          toolCallsByIndex.set(idx, partial);
+        }
+        if (td.id) partial.id = td.id;
+        const fn = td.function || {};
+        if (fn.name) partial.name = fn.name;
+        if (typeof fn.arguments === 'string') partial.argsBuf += fn.arguments;
+      }
+      if (choice.finish_reason) lastFinish = choice.finish_reason;
+      if (chunk?.usage) {
+        usage = {
+          inputTokens: chunk.usage.prompt_tokens,
+          outputTokens: chunk.usage.completion_tokens,
+        };
+      }
+    }
+  } catch (err: any) {
+    if (err?.name === 'AbortError') {
+      yield { type: 'done', stopReason: 'aborted' };
+      return;
+    }
+    yield { type: 'error', error: err?.message || String(err) };
+    return;
+  }
+  log.info(
+    `[pi/openai-compat] stream done — chunks=${chunkCount} text=${accumulated.length} ` +
+    `toolCalls=${toolCallsByIndex.size} finishReason=${lastFinish || 'none'} ` +
+    `promptTok=${usage?.inputTokens ?? '?'} outTok=${usage?.outputTokens ?? '?'}`,
+  );
+  if (chunkCount === 0) {
+    log.warn(`[pi/openai-compat] zero chunks parsed — content-type=${res.headers.get('content-type') || '?'}`);
+  } else if (!accumulated && toolCallsByIndex.size === 0) {
+    log.info(`[pi/openai-compat] first chunk (truncated): ${firstChunkSummary}`);
+  }
+  if (accumulated) yield { type: 'text_end', text: accumulated };
+  for (const partial of toolCallsByIndex.values()) {
+    let input: any = {};
+    if (partial.argsBuf) {
+      try { input = JSON.parse(partial.argsBuf); }
+      catch { input = { _raw: partial.argsBuf }; }
+    }
+    yield {
+      type: 'tool_use',
+      id: partial.id || `call_${partial.name}_${Math.random().toString(36).slice(2, 10)}`,
+      name: partial.name,
+      input,
+    };
+  }
+  if (!accumulated && toolCallsByIndex.size === 0) {
+    yield {
+      type: 'error',
+      error: `Provider returned no output (finishReason=${lastFinish || 'unknown'}). ` +
+             `This usually means the model rejected the request or hit a content filter.`,
+    };
+    yield { type: 'done', stopReason: 'error', usage };
+    return;
+  }
+  yield {
+    type: 'done',
+    stopReason: toolCallsByIndex.size > 0 ? 'tool_use' : mapFinishReason(lastFinish),
+    usage,
+  };
+}

package/supervisor/harnesses/pi/providers/stream.ts CHANGED Viewed

@@ -8,14 +8,16 @@
 import type { PiApiFlavor } from '../sub-providers.js';
 import type { PiStreamRequest, PiStreamEvent } from './types.js';
 import { streamGoogle } from './stream-google.js';
+import { streamOpenAICompletions } from './stream-openai-completions.js';
+import { streamAnthropic } from './stream-anthropic.js';
 export function streamProvider(flavor: PiApiFlavor, req: PiStreamRequest): AsyncIterable<PiStreamEvent> {
   switch (flavor) {
     case 'google-gemini':
       return streamGoogle(req);
     case 'openai-completions':
-      throw new Error('openai-completions streaming is not implemented yet (Phase 3).');
+      return streamOpenAICompletions(req);
     case 'anthropic-messages':
-      throw new Error('anthropic-messages streaming is not implemented yet (Phase 3).');
+      return streamAnthropic(req);
   }
 }