npm - @animalabs/membrane - Versions diffs - 0.5.54 → 0.5.63 - Mend

@animalabs/membrane 0.5.54 → 0.5.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/dist/formatters/native.d.ts.map +1 -1
package/dist/formatters/native.js +11 -0
package/dist/formatters/native.js.map +1 -1
package/dist/formatters/normalize-tool-pairs.d.ts +4 -2
package/dist/formatters/normalize-tool-pairs.d.ts.map +1 -1
package/dist/formatters/normalize-tool-pairs.js +95 -22
package/dist/formatters/normalize-tool-pairs.js.map +1 -1
package/dist/formatters/types.d.ts +26 -0
package/dist/formatters/types.d.ts.map +1 -1
package/dist/membrane.d.ts +10 -0
package/dist/membrane.d.ts.map +1 -1
package/dist/membrane.js +118 -13
package/dist/membrane.js.map +1 -1
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +83 -2
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/openai-compatible.d.ts.map +1 -1
package/dist/providers/openai-compatible.js +3 -0
package/dist/providers/openai-compatible.js.map +1 -1
package/dist/providers/openai-completions.d.ts.map +1 -1
package/dist/providers/openai-completions.js +57 -3
package/dist/providers/openai-completions.js.map +1 -1
package/dist/providers/openai.d.ts.map +1 -1
package/dist/providers/openai.js +3 -0
package/dist/providers/openai.js.map +1 -1
package/dist/types/provider.d.ts +9 -0
package/dist/types/provider.d.ts.map +1 -1
package/dist/types/request.d.ts +10 -0
package/dist/types/request.d.ts.map +1 -1
package/package.json +1 -1
package/src/formatters/native.ts +10 -0
package/src/formatters/normalize-tool-pairs.ts +100 -25
package/src/formatters/types.ts +28 -1
package/src/membrane.ts +129 -13
package/src/providers/anthropic.ts +87 -3
package/src/providers/openai-compatible.ts +4 -0
package/src/providers/openai-completions.ts +58 -2
package/src/providers/openai.ts +4 -0
package/src/types/provider.ts +10 -0
package/src/types/request.ts +12 -1

package/src/providers/anthropic.ts CHANGED Viewed

@@ -122,12 +122,20 @@ export class AnthropicAdapter implements ProviderAdapter {
       let cacheReadTokens: number | undefined;
       let stopReason: string = 'end_turn';
       let stopSequence: string | undefined;
+      let stopDetails: unknown;
       // Content block tracking — finalized on content_block_stop
       const contentBlocks: Record<string, unknown>[] = [];
       let currentBlockIndex = -1;
       let currentBlockContent = '';
       let currentBlockInputJson = '';
+      // When wrapThinkingTags is set (XML formatter path), native thinking
+      // deltas are wrapped in <thinking>...</thinking> on the chunk stream so
+      // the tag-based parser tracks them as thinking instead of visible text.
+      // Tag opened lazily on the first delta — display:'omitted' models emit
+      // thinking blocks with no thinking_delta at all (signature only).
+      const wrapThinkingTags = options?.wrapThinkingTags === true;
+      let thinkingTagOpen = false;
       for await (const event of stream) {
         resetIdleTimer();
@@ -152,7 +160,21 @@ export class AnthropicAdapter implements ProviderAdapter {
             callbacks.onChunk(chunk);
           } else if (event.delta.type === 'thinking_delta') {
             currentBlockContent += event.delta.thinking;
+            if (wrapThinkingTags && !thinkingTagOpen) {
+              callbacks.onChunk('<thinking>');
+              thinkingTagOpen = true;
+            }
             callbacks.onChunk(event.delta.thinking);
+          } else if ((event.delta as { type: string }).type === 'signature_delta') {
+            // Accumulate the cryptographic signature that authenticates this
+            // thinking block. Without this, signatures never land on the
+            // streaming path and the next request — which carries the block
+            // back in history — fails Anthropic's signature validation.
+            const sig = (event.delta as { signature?: string }).signature;
+            const block = contentBlocks[currentBlockIndex];
+            if (block && block.type === 'thinking' && sig) {
+              block.signature = ((block.signature as string | undefined) ?? '') + sig;
+            }
           } else if ((event.delta as { type: string }).type === 'input_json_delta') {
             currentBlockInputJson += (event.delta as { partial_json: string }).partial_json;
           }
@@ -166,6 +188,10 @@ export class AnthropicAdapter implements ProviderAdapter {
               block.text = currentBlockContent;
             } else if (block.type === 'thinking') {
               block.thinking = currentBlockContent;
+              if (thinkingTagOpen) {
+                callbacks.onChunk('</thinking>\n');
+                thinkingTagOpen = false;
+              }
             } else if (block.type === 'tool_use' && currentBlockInputJson) {
               try { block.input = JSON.parse(currentBlockInputJson); } catch { /* partial JSON */ }
             }
@@ -176,9 +202,15 @@ export class AnthropicAdapter implements ProviderAdapter {
           // All content blocks are finalized by the time message_delta arrives.
           // Capture final metadata and exit — message_stop and the SSE connection
           // teardown after it add only variable latency with no useful data.
-          const delta = event.delta as { stop_reason?: string; stop_sequence?: string };
+          const delta = event.delta as {
+            stop_reason?: string;
+            stop_sequence?: string;
+            stop_details?: unknown;
+          };
           stopReason = delta.stop_reason ?? 'end_turn';
           stopSequence = delta.stop_sequence ?? undefined;
+          // stop_details carries refusal metadata (e.g., category: 'reasoning_extraction')
+          stopDetails = delta.stop_details ?? undefined;
           const deltaUsage = event.usage as unknown as {
             output_tokens: number;
             cache_creation_input_tokens?: number | null;
@@ -219,6 +251,7 @@ export class AnthropicAdapter implements ProviderAdapter {
           content: contentBlocks,
           stop_reason: stopReason,
           stop_sequence: stopSequence ?? null,
+          stop_details: stopDetails ?? null,
           model,
           usage: {
             input_tokens: inputTokens,
@@ -249,7 +282,11 @@ export class AnthropicAdapter implements ProviderAdapter {
   private buildRequest(request: ProviderRequest): Anthropic.MessageCreateParams {
     // Strip provider-specific fields (e.g., sourceUrl for Gemini) from image blocks
-    // before sending to Anthropic, which rejects extra inputs
+    // before sending to Anthropic, which rejects extra inputs.
+    // Also normalize nested tool_result content blocks: Membrane uses camelCase
+    // `mediaType`, Anthropic expects snake_case `media_type`. Without this,
+    // an image returned by a tool reaches the API as `{source: {mediaType: ...}}`
+    // and is silently rejected (the model sees the text label only).
     const sanitizedMessages = (request.messages as any[]).map((msg: any) => {
       if (!Array.isArray(msg.content)) return msg;
       return {
@@ -259,6 +296,12 @@ export class AnthropicAdapter implements ProviderAdapter {
             const { sourceUrl, ...rest } = block;
             return rest;
           }
+          if (block.type === 'tool_result' && Array.isArray(block.content)) {
+            return {
+              ...block,
+              content: toAnthropicToolResultContent(block.content as ContentBlock[]),
+            };
+          }
           return block;
         }),
       };
@@ -396,6 +439,41 @@ export class AnthropicAdapter implements ProviderAdapter {
 // Content Conversion Utilities
 // ============================================================================
+/**
+ * Convert Membrane tool-result content blocks to Anthropic's tool_result.content
+ * mixed array (text + image). This is what carries an image returned by a tool
+ * (e.g. an MCP fetch_attachment result) all the way to the model. Other block
+ * types are not valid inside tool_result.content per the Anthropic API and are
+ * dropped.
+ */
+function toAnthropicToolResultContent(
+  blocks: ContentBlock[],
+): Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> {
+  const out: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> = [];
+  for (const block of blocks) {
+    if (block.type === 'text') {
+      out.push({ type: 'text', text: block.text });
+    } else if (block.type === 'image') {
+      if (block.source.type === 'base64') {
+        out.push({
+          type: 'image',
+          source: {
+            type: 'base64',
+            media_type: block.source.mediaType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
+            data: block.source.data,
+          },
+        });
+      } else if (block.source.type === 'url') {
+        out.push({
+          type: 'image',
+          source: { type: 'url', url: block.source.url },
+        });
+      }
+    }
+  }
+  return out;
+}
 /**
  * Convert normalized content blocks to Anthropic format
  * Preserves cache_control for prompt caching
@@ -425,6 +503,11 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
               data: block.source.data,
             },
           });
+        } else if (block.source.type === 'url') {
+          result.push({
+            type: 'image',
+            source: { type: 'url', url: block.source.url },
+          });
         }
         break;
@@ -454,7 +537,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
           tool_use_id: block.toolUseId,
           content: typeof block.content === 'string'
             ? block.content
-            : JSON.stringify(block.content),
+            : toAnthropicToolResultContent(block.content),
           is_error: block.isError,
         });
         break;
@@ -463,6 +546,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
         result.push({
           type: 'thinking',
           thinking: block.thinking,
+          ...(block.signature ? { signature: block.signature } : {}),
         } as any);
         break;
     }

package/src/providers/openai-compatible.ts CHANGED Viewed

@@ -301,6 +301,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
       params.frequency_penalty = request.frequencyPenalty;
     }
+    if (request.repetitionPenalty !== undefined) {
+      params.repetition_penalty = request.repetitionPenalty;
+    }
     // OpenAI-compatible APIs may limit stop sequences (OpenAI: 4) — truncate to be safe
     if (request.stopSequences && request.stopSequences.length > 0) {
       params.stop = request.stopSequences.slice(0, 4);

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -41,6 +41,7 @@ interface CompletionsRequest {
   top_p?: number;
   presence_penalty?: number;
   frequency_penalty?: number;
+  repetition_penalty?: number;
   stop?: string[];
   stream?: boolean;
 }
@@ -194,6 +195,19 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
       let accumulated = '';
       let finishReason = 'stop';
+      // Post-facto truncation of the adapter's own eotToken.
+      // The adapter serializes the prompt with this.eotToken and sends it as an
+      // API stop string, but some backends leak the stop string into streamed
+      // output. Since the bot-level formatter may use a different (or empty)
+      // turn-end token, downstream post-facto checks can't be relied on to
+      // catch it — the layer that introduced the token must truncate it.
+      // emittedLen tracks how much of `accumulated` has been emitted; a tail of
+      // eot.length-1 chars is held back in case the token is split across chunks.
+      const eot = this.eotToken;
+      let emittedLen = 0;
+      let eotFound = false;
+      streamLoop:
       while (true) {
         const { done, value } = await reader.read();
         if (done) break;
@@ -210,7 +224,28 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
             if (text) {
               accumulated += text;
-              callbacks.onChunk(text);
+              if (eot) {
+                const idx = accumulated.indexOf(eot);
+                if (idx !== -1) {
+                  // Truncate at the token, flush the un-emitted prefix, stop
+                  accumulated = accumulated.slice(0, idx);
+                  if (accumulated.length > emittedLen) {
+                    callbacks.onChunk(accumulated.slice(emittedLen));
+                  }
+                  emittedLen = accumulated.length;
+                  eotFound = true;
+                  finishReason = 'stop';
+                  break streamLoop;
+                }
+                // Emit all but a held-back tail that could be a partial token
+                const safeLen = Math.max(emittedLen, accumulated.length - (eot.length - 1));
+                if (safeLen > emittedLen) {
+                  callbacks.onChunk(accumulated.slice(emittedLen, safeLen));
+                  emittedLen = safeLen;
+                }
+              } else {
+                callbacks.onChunk(text);
+              }
             }
             if (parsed.choices?.[0]?.finish_reason) {
@@ -222,6 +257,14 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
         }
       }
+      // Flush any held-back tail if the token never completed
+      if (eot && !eotFound && accumulated.length > emittedLen) {
+        callbacks.onChunk(accumulated.slice(emittedLen));
+      }
+      if (eotFound) {
+        try { await reader.cancel(); } catch { /* stream already closed */ }
+      }
       return this.buildStreamedResponse(accumulated, finishReason, request.model, completionsRequest);
     } catch (error) {
@@ -383,6 +426,10 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
       params.frequency_penalty = request.frequencyPenalty;
     }
+    if (request.repetitionPenalty !== undefined) {
+      params.repetition_penalty = request.repetitionPenalty;
+    }
     if (stopSequences.length > 0) {
       params.stop = stopSequences;
     }
@@ -419,7 +466,16 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
   private parseResponse(response: CompletionsResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
     const choice = response.choices[0];
-    const text = choice?.text ?? '';
+    let text = choice?.text ?? '';
+    // Post-facto truncation of the adapter's own eotToken — some backends
+    // leak the stop string into the output (see stream() for details)
+    if (this.eotToken) {
+      const idx = text.indexOf(this.eotToken);
+      if (idx !== -1) {
+        text = text.slice(0, idx);
+      }
+    }
     return {
       content: this.textToContent(text),

package/src/providers/openai.ts CHANGED Viewed

@@ -399,6 +399,10 @@ export class OpenAIAdapter implements ProviderAdapter {
       params.frequency_penalty = request.frequencyPenalty;
     }
+    if (request.repetitionPenalty !== undefined) {
+      params.repetition_penalty = request.repetitionPenalty;
+    }
     // Reasoning models (o1, o3, o4) don't support stop sequences
     // OpenAI limits stop sequences to 4 — truncate to fit
     if (request.stopSequences && request.stopSequences.length > 0 && !noStopSupport(model)) {

package/src/types/provider.ts CHANGED Viewed

@@ -215,6 +215,9 @@ export interface ProviderRequest {
   /** Frequency penalty */
   frequencyPenalty?: number;
+  /** Repetition penalty (multiplicative, vLLM/HuggingFace style) */
+  repetitionPenalty?: number;
   /** Stop sequences */
   stopSequences?: string[];
@@ -232,6 +235,13 @@ export interface ProviderRequestOptions {
   idleTimeoutMs?: number;
   /** Called with the raw API request body right before fetch */
   onRequest?: (rawRequest: unknown) => void;
+  /**
+   * Wrap native thinking deltas in <thinking>...</thinking> tags on the
+   * onChunk stream. Used by the XML formatter path so its tag-based parser
+   * tracks thinking blocks; without this, native thinking content streams
+   * indistinguishably from visible text.
+   */
+  wrapThinkingTags?: boolean;
 }
 export interface ProviderResponse {

package/src/types/request.ts CHANGED Viewed

@@ -30,11 +30,22 @@ export interface GenerationConfig {
   /** Frequency penalty (provider-specific) */
   frequencyPenalty?: number;
+  /** Repetition penalty — multiplicative (vLLM/HuggingFace style, typically 1.0-1.2) */
+  repetitionPenalty?: number;
   /** Enable thinking/reasoning mode */
   thinking?: {
     enabled: boolean;
     budgetTokens?: number;
+    /** Thinking type for the API: 'enabled' (default, explicit budget) or 'adaptive' (model-managed) */
+    type?: 'enabled' | 'adaptive';
+    /**
+     * Controls how thinking content is returned: 'summarized' (readable summary)
+     * or 'omitted' (empty thinking field, signature only). Models like Fable 5 /
+     * Opus 4.7+ default to 'omitted' — set 'summarized' to receive thinking text.
+     */
+    display?: 'summarized' | 'omitted';
   };
   /** Image generation config (Gemini) */