@animalabs/membrane 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/context/index.d.ts +10 -0
  2. package/dist/context/index.d.ts.map +1 -0
  3. package/dist/context/index.js +9 -0
  4. package/dist/context/index.js.map +1 -0
  5. package/dist/context/process.d.ts +22 -0
  6. package/dist/context/process.d.ts.map +1 -0
  7. package/dist/context/process.js +369 -0
  8. package/dist/context/process.js.map +1 -0
  9. package/dist/context/types.d.ts +118 -0
  10. package/dist/context/types.d.ts.map +1 -0
  11. package/dist/context/types.js +60 -0
  12. package/dist/context/types.js.map +1 -0
  13. package/dist/index.d.ts +12 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +18 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/membrane.d.ts +96 -0
  18. package/dist/membrane.d.ts.map +1 -0
  19. package/dist/membrane.js +893 -0
  20. package/dist/membrane.js.map +1 -0
  21. package/dist/providers/anthropic.d.ts +36 -0
  22. package/dist/providers/anthropic.d.ts.map +1 -0
  23. package/dist/providers/anthropic.js +265 -0
  24. package/dist/providers/anthropic.js.map +1 -0
  25. package/dist/providers/index.d.ts +8 -0
  26. package/dist/providers/index.d.ts.map +1 -0
  27. package/dist/providers/index.js +8 -0
  28. package/dist/providers/index.js.map +1 -0
  29. package/dist/providers/openai-compatible.d.ts +74 -0
  30. package/dist/providers/openai-compatible.d.ts.map +1 -0
  31. package/dist/providers/openai-compatible.js +412 -0
  32. package/dist/providers/openai-compatible.js.map +1 -0
  33. package/dist/providers/openai.d.ts +69 -0
  34. package/dist/providers/openai.d.ts.map +1 -0
  35. package/dist/providers/openai.js +455 -0
  36. package/dist/providers/openai.js.map +1 -0
  37. package/dist/providers/openrouter.d.ts +76 -0
  38. package/dist/providers/openrouter.d.ts.map +1 -0
  39. package/dist/providers/openrouter.js +492 -0
  40. package/dist/providers/openrouter.js.map +1 -0
  41. package/dist/transforms/chat.d.ts +52 -0
  42. package/dist/transforms/chat.d.ts.map +1 -0
  43. package/dist/transforms/chat.js +136 -0
  44. package/dist/transforms/chat.js.map +1 -0
  45. package/dist/transforms/index.d.ts +6 -0
  46. package/dist/transforms/index.d.ts.map +1 -0
  47. package/dist/transforms/index.js +6 -0
  48. package/dist/transforms/index.js.map +1 -0
  49. package/dist/transforms/prefill.d.ts +89 -0
  50. package/dist/transforms/prefill.d.ts.map +1 -0
  51. package/dist/transforms/prefill.js +401 -0
  52. package/dist/transforms/prefill.js.map +1 -0
  53. package/dist/types/config.d.ts +103 -0
  54. package/dist/types/config.d.ts.map +1 -0
  55. package/dist/types/config.js +21 -0
  56. package/dist/types/config.js.map +1 -0
  57. package/dist/types/content.d.ts +81 -0
  58. package/dist/types/content.d.ts.map +1 -0
  59. package/dist/types/content.js +40 -0
  60. package/dist/types/content.js.map +1 -0
  61. package/dist/types/errors.d.ts +42 -0
  62. package/dist/types/errors.d.ts.map +1 -0
  63. package/dist/types/errors.js +208 -0
  64. package/dist/types/errors.js.map +1 -0
  65. package/dist/types/index.d.ts +18 -0
  66. package/dist/types/index.d.ts.map +1 -0
  67. package/dist/types/index.js +9 -0
  68. package/dist/types/index.js.map +1 -0
  69. package/dist/types/message.d.ts +46 -0
  70. package/dist/types/message.d.ts.map +1 -0
  71. package/dist/types/message.js +38 -0
  72. package/dist/types/message.js.map +1 -0
  73. package/dist/types/provider.d.ts +155 -0
  74. package/dist/types/provider.d.ts.map +1 -0
  75. package/dist/types/provider.js +5 -0
  76. package/dist/types/provider.js.map +1 -0
  77. package/dist/types/request.d.ts +78 -0
  78. package/dist/types/request.d.ts.map +1 -0
  79. package/dist/types/request.js +5 -0
  80. package/dist/types/request.js.map +1 -0
  81. package/dist/types/response.d.ts +131 -0
  82. package/dist/types/response.d.ts.map +1 -0
  83. package/dist/types/response.js +7 -0
  84. package/dist/types/response.js.map +1 -0
  85. package/dist/types/streaming.d.ts +164 -0
  86. package/dist/types/streaming.d.ts.map +1 -0
  87. package/dist/types/streaming.js +5 -0
  88. package/dist/types/streaming.js.map +1 -0
  89. package/dist/types/tools.d.ts +71 -0
  90. package/dist/types/tools.d.ts.map +1 -0
  91. package/dist/types/tools.js +5 -0
  92. package/dist/types/tools.js.map +1 -0
  93. package/dist/utils/index.d.ts +5 -0
  94. package/dist/utils/index.d.ts.map +1 -0
  95. package/dist/utils/index.js +5 -0
  96. package/dist/utils/index.js.map +1 -0
  97. package/dist/utils/stream-parser.d.ts +53 -0
  98. package/dist/utils/stream-parser.d.ts.map +1 -0
  99. package/dist/utils/stream-parser.js +359 -0
  100. package/dist/utils/stream-parser.js.map +1 -0
  101. package/dist/utils/tool-parser.d.ts +130 -0
  102. package/dist/utils/tool-parser.d.ts.map +1 -0
  103. package/dist/utils/tool-parser.js +571 -0
  104. package/dist/utils/tool-parser.js.map +1 -0
  105. package/package.json +37 -0
  106. package/src/context/index.ts +24 -0
  107. package/src/context/process.ts +520 -0
  108. package/src/context/types.ts +231 -0
  109. package/src/index.ts +23 -0
  110. package/src/membrane.ts +1174 -0
  111. package/src/providers/anthropic.ts +340 -0
  112. package/src/providers/index.ts +31 -0
  113. package/src/providers/openai-compatible.ts +570 -0
  114. package/src/providers/openai.ts +625 -0
  115. package/src/providers/openrouter.ts +662 -0
  116. package/src/transforms/chat.ts +212 -0
  117. package/src/transforms/index.ts +22 -0
  118. package/src/transforms/prefill.ts +585 -0
  119. package/src/types/config.ts +172 -0
  120. package/src/types/content.ts +181 -0
  121. package/src/types/errors.ts +277 -0
  122. package/src/types/index.ts +154 -0
  123. package/src/types/message.ts +89 -0
  124. package/src/types/provider.ts +249 -0
  125. package/src/types/request.ts +131 -0
  126. package/src/types/response.ts +223 -0
  127. package/src/types/streaming.ts +231 -0
  128. package/src/types/tools.ts +92 -0
  129. package/src/utils/index.ts +15 -0
  130. package/src/utils/stream-parser.ts +440 -0
  131. package/src/utils/tool-parser.ts +715 -0
@@ -0,0 +1,893 @@
1
+ /**
2
+ * Membrane - LLM middleware core class
3
+ *
4
+ * A selective boundary that transforms what passes through.
5
+ */
6
+ import { DEFAULT_RETRY_CONFIG, MembraneError, classifyError, } from './types/index.js';
7
+ import { transformToPrefill, } from './transforms/index.js';
8
+ import { parseToolCalls, formatToolResults, parseAccumulatedIntoBlocks, hasImageInToolResults, formatToolResultsForSplitTurn, } from './utils/tool-parser.js';
9
+ import { IncrementalXmlParser } from './utils/stream-parser.js';
10
+ // ============================================================================
11
+ // Membrane Class
12
+ // ============================================================================
13
+ export class Membrane {
14
+ adapter;
15
+ registry;
16
+ retryConfig;
17
+ config;
18
+ constructor(adapter, config = {}) {
19
+ this.adapter = adapter;
20
+ this.registry = config.registry;
21
+ this.retryConfig = { ...DEFAULT_RETRY_CONFIG, ...config.retry };
22
+ this.config = config;
23
+ }
24
+ // ==========================================================================
25
+ // Main API
26
+ // ==========================================================================
27
+ /**
28
+ * Complete a request (non-streaming)
29
+ */
30
+ async complete(request, options = {}) {
31
+ const startTime = Date.now();
32
+ let attempts = 0;
33
+ while (true) {
34
+ attempts++;
35
+ try {
36
+ const { providerRequest, prefillResult } = this.transformRequest(request);
37
+ // Call beforeRequest hook
38
+ let finalRequest = providerRequest;
39
+ if (this.config.hooks?.beforeRequest) {
40
+ finalRequest = await this.config.hooks.beforeRequest(request, providerRequest) ?? providerRequest;
41
+ }
42
+ const providerResponse = await this.adapter.complete(finalRequest, {
43
+ signal: options.signal,
44
+ timeoutMs: options.timeoutMs,
45
+ });
46
+ const response = this.transformResponse(providerResponse, request, prefillResult, startTime, attempts, finalRequest);
47
+ // Call afterResponse hook
48
+ if (this.config.hooks?.afterResponse) {
49
+ return await this.config.hooks.afterResponse(response, providerResponse.raw);
50
+ }
51
+ return response;
52
+ }
53
+ catch (error) {
54
+ const errorInfo = classifyError(error);
55
+ if (errorInfo.retryable && attempts < this.retryConfig.maxRetries) {
56
+ // Check hook for retry decision
57
+ if (this.config.hooks?.onError) {
58
+ const decision = await this.config.hooks.onError(errorInfo, attempts);
59
+ if (decision === 'abort') {
60
+ throw error instanceof MembraneError ? error : new MembraneError(errorInfo);
61
+ }
62
+ }
63
+ // Wait before retry
64
+ const delay = this.calculateRetryDelay(attempts);
65
+ await this.sleep(delay);
66
+ continue;
67
+ }
68
+ throw error instanceof MembraneError ? error : new MembraneError(errorInfo);
69
+ }
70
+ }
71
+ }
72
+ /**
73
+ * Stream a request with inline tool execution.
74
+ *
75
+ * Returns either a complete NormalizedResponse or an AbortedResponse
76
+ * if the request was cancelled via the abort signal. Use `isAbortedResponse()`
77
+ * to check which type was returned.
78
+ *
79
+ * @example
80
+ * ```typescript
81
+ * const result = await membrane.stream(request, { signal: controller.signal });
82
+ * if (isAbortedResponse(result)) {
83
+ * console.log('Aborted:', result.rawAssistantText);
84
+ * // Use rawAssistantText as prefill to continue, or toolCalls/toolResults to rebuild state
85
+ * } else {
86
+ * console.log('Complete:', result.content);
87
+ * }
88
+ * ```
89
+ */
90
+ async stream(request, options = {}) {
91
+ // Determine tool mode
92
+ const toolMode = this.resolveToolMode(request);
93
+ if (toolMode === 'native' && request.tools && request.tools.length > 0) {
94
+ return this.streamWithNativeTools(request, options);
95
+ }
96
+ else {
97
+ return this.streamWithXmlTools(request, options);
98
+ }
99
+ }
100
+ /**
101
+ * Determine the effective tool mode
102
+ */
103
+ resolveToolMode(request) {
104
+ // Explicit mode takes precedence
105
+ if (request.toolMode && request.toolMode !== 'auto') {
106
+ return request.toolMode;
107
+ }
108
+ // Auto mode: choose based on provider
109
+ // OpenRouter and OpenAI-compatible APIs use native tools
110
+ // Anthropic direct with prefill mode uses XML tools
111
+ if (this.adapter.name === 'openrouter') {
112
+ return 'native';
113
+ }
114
+ // Default to XML for prefill compatibility
115
+ return 'xml';
116
+ }
117
+ /**
118
+ * Stream with XML-based tool execution (prefill mode)
119
+ *
120
+ * Uses IncrementalXmlParser to track XML nesting depth for:
121
+ * - False-positive stop sequence detection (e.g., "\nUser:" inside tool results)
122
+ * - Structured block events for UI
123
+ */
124
+ async streamWithXmlTools(request, options) {
125
+ const startTime = Date.now();
126
+ const { onChunk, onContentBlockUpdate, onToolCalls, onPreToolContent, onUsage, onBlock, maxToolDepth = 10, signal, } = options;
127
+ // Initialize incremental parser for XML tracking
128
+ const parser = new IncrementalXmlParser();
129
+ let toolDepth = 0;
130
+ let totalUsage = { inputTokens: 0, outputTokens: 0 };
131
+ const contentBlocks = [];
132
+ let lastStopReason = 'end_turn';
133
+ let rawRequest;
134
+ let rawResponse;
135
+ // Track executed tool calls and results
136
+ const executedToolCalls = [];
137
+ const executedToolResults = [];
138
+ // Transform initial request (XML tools are injected into system prompt)
139
+ let { providerRequest, prefillResult } = this.transformRequest(request);
140
+ try {
141
+ // Tool execution loop
142
+ while (toolDepth <= maxToolDepth) {
143
+ rawRequest = providerRequest;
144
+ // Track if we manually detected a stop sequence (API doesn't always stop)
145
+ let detectedStopSequence = null;
146
+ let truncatedAccumulated = null;
147
+ // Track where to start checking for stop sequences (skip already-processed content)
148
+ const checkFromIndex = parser.getAccumulated().length;
149
+ // Stream from provider
150
+ const streamResult = await this.streamOnce(providerRequest, {
151
+ onChunk: (chunk) => {
152
+ // If we already detected a stop sequence, ignore remaining chunks
153
+ if (detectedStopSequence) {
154
+ return;
155
+ }
156
+ // Process chunk with enriched streaming API
157
+ const { content, blockEvents } = parser.processChunk(chunk);
158
+ // Check for stop sequences only in NEW content (not already-processed)
159
+ const accumulated = parser.getAccumulated();
160
+ const newContent = accumulated.slice(checkFromIndex);
161
+ for (const stopSeq of prefillResult.stopSequences) {
162
+ const idx = newContent.indexOf(stopSeq);
163
+ if (idx !== -1) {
164
+ // Found stop sequence - mark it and truncate
165
+ const absoluteIdx = checkFromIndex + idx;
166
+ detectedStopSequence = stopSeq;
167
+ truncatedAccumulated = accumulated.slice(0, absoluteIdx);
168
+ // Emit only the portion up to stop sequence with metadata
169
+ const alreadyEmitted = accumulated.length - chunk.length;
170
+ if (absoluteIdx > alreadyEmitted) {
171
+ const truncatedChunk = accumulated.slice(alreadyEmitted, absoluteIdx);
172
+ const meta = {
173
+ type: parser.getCurrentBlockType(),
174
+ visible: parser.getCurrentBlockType() === 'text',
175
+ blockIndex: 0, // Approximate
176
+ };
177
+ onChunk?.(truncatedChunk, meta);
178
+ }
179
+ return;
180
+ }
181
+ }
182
+ // Emit block events first
183
+ if (onBlock) {
184
+ for (const event of blockEvents) {
185
+ onBlock(event);
186
+ }
187
+ }
188
+ // Emit content chunks with metadata
189
+ for (const { text, meta } of content) {
190
+ onChunk?.(text, meta);
191
+ }
192
+ },
193
+ onContentBlock: onContentBlockUpdate
194
+ ? (index, block) => onContentBlockUpdate(index, block)
195
+ : undefined,
196
+ }, { signal });
197
+ // If we detected stop sequence manually, fix up the parser and result
198
+ if (detectedStopSequence && truncatedAccumulated !== null) {
199
+ parser.reset();
200
+ parser.push(truncatedAccumulated);
201
+ streamResult.stopReason = 'stop_sequence';
202
+ streamResult.stopSequence = detectedStopSequence;
203
+ }
204
+ rawResponse = streamResult.raw;
205
+ lastStopReason = this.mapStopReason(streamResult.stopReason);
206
+ // Accumulate usage
207
+ totalUsage.inputTokens += streamResult.usage.inputTokens;
208
+ totalUsage.outputTokens += streamResult.usage.outputTokens;
209
+ onUsage?.(totalUsage);
210
+ // Get accumulated text from parser
211
+ const accumulated = parser.getAccumulated();
212
+ // Check for tool calls (if handler provided)
213
+ if (onToolCalls && streamResult.stopSequence === '</function_calls>') {
214
+ // Append the closing tag (we truncated before it, or API stopped before it)
215
+ const closeTag = '</function_calls>';
216
+ parser.push(closeTag);
217
+ // Note: closing tag is structural XML, not emitted via onChunk (invisible)
218
+ const parsed = parseToolCalls(parser.getAccumulated());
219
+ if (parsed && parsed.calls.length > 0) {
220
+ // Notify about pre-tool content
221
+ if (onPreToolContent && parsed.beforeText.trim()) {
222
+ await onPreToolContent(parsed.beforeText);
223
+ }
224
+ // Track the tool calls
225
+ executedToolCalls.push(...parsed.calls);
226
+ // Execute tools
227
+ const context = {
228
+ rawText: parsed.fullMatch,
229
+ preamble: parsed.beforeText,
230
+ depth: toolDepth,
231
+ previousResults: executedToolResults,
232
+ accumulated: parser.getAccumulated(),
233
+ };
234
+ const results = await onToolCalls(parsed.calls, context);
235
+ // Track the tool results
236
+ executedToolResults.push(...results);
237
+ // Check if results contain images (requires split-turn injection)
238
+ if (hasImageInToolResults(results)) {
239
+ // Use split-turn injection for images
240
+ const splitContent = formatToolResultsForSplitTurn(results);
241
+ // Append the text portion to accumulated (before image)
242
+ parser.push(splitContent.beforeImageXml);
243
+ const toolResultMeta = {
244
+ type: 'tool_result',
245
+ visible: false,
246
+ blockIndex: 0,
247
+ };
248
+ onChunk?.(splitContent.beforeImageXml, toolResultMeta);
249
+ // Build continuation with image injection
250
+ providerRequest = this.buildContinuationRequestWithImages(request, prefillResult, parser.getAccumulated(), splitContent.images, splitContent.afterImageXml);
251
+ // Also add afterImageXml to accumulated for complete rawAssistantText
252
+ // This is prefilled but represents assistant's logical output
253
+ parser.push(splitContent.afterImageXml);
254
+ onChunk?.(splitContent.afterImageXml, toolResultMeta);
255
+ prefillResult.assistantPrefill = parser.getAccumulated();
256
+ }
257
+ else {
258
+ // Standard path: no images, use simple XML injection
259
+ const resultsXml = formatToolResults(results);
260
+ parser.push(resultsXml);
261
+ const toolResultMeta = {
262
+ type: 'tool_result',
263
+ visible: false,
264
+ blockIndex: 0,
265
+ };
266
+ onChunk?.(resultsXml, toolResultMeta);
267
+ // Update prefill and continue
268
+ prefillResult.assistantPrefill = parser.getAccumulated();
269
+ providerRequest = this.buildContinuationRequest(request, prefillResult, parser.getAccumulated());
270
+ }
271
+ toolDepth++;
272
+ continue;
273
+ }
274
+ }
275
+ // Check for false-positive stop (unclosed block)
276
+ // Only resume if we stopped on a stop_sequence (not end_turn or max_tokens)
277
+ // Use parser's nesting detection instead of regex-based hasUnclosedToolBlock
278
+ if (lastStopReason === 'stop_sequence' && parser.isInsideBlock()) {
279
+ // False positive! The stop sequence (e.g., "\nUser:") appeared inside XML content
280
+ // Re-add the consumed stop sequence and resume streaming
281
+ if (streamResult.stopSequence) {
282
+ parser.push(streamResult.stopSequence);
283
+ const meta = {
284
+ type: parser.getCurrentBlockType(),
285
+ visible: parser.getCurrentBlockType() === 'text',
286
+ blockIndex: 0,
287
+ };
288
+ onChunk?.(streamResult.stopSequence, meta);
289
+ }
290
+ // Resume streaming - but limit resumptions to prevent infinite loops
291
+ toolDepth++; // Count this as a "depth" to limit iterations
292
+ if (toolDepth > maxToolDepth) {
293
+ break;
294
+ }
295
+ prefillResult.assistantPrefill = parser.getAccumulated();
296
+ providerRequest = this.buildContinuationRequest(request, prefillResult, parser.getAccumulated());
297
+ continue;
298
+ }
299
+ // No more tools or tool handling disabled, we're done
300
+ break;
301
+ }
302
+ // Build final response
303
+ return this.buildFinalResponse(parser.getAccumulated(), contentBlocks, lastStopReason, totalUsage, request, prefillResult, startTime, 1, // attempts
304
+ rawRequest, rawResponse, executedToolCalls, executedToolResults);
305
+ }
306
+ catch (error) {
307
+ // Check if this is an abort error
308
+ if (this.isAbortError(error)) {
309
+ return this.buildAbortedResponse(parser.getAccumulated(), totalUsage, executedToolCalls, executedToolResults, 'user');
310
+ }
311
+ // Re-throw non-abort errors
312
+ throw error;
313
+ }
314
+ }
315
+ /**
316
+ * Stream with native API tool execution
317
+ */
318
+ async streamWithNativeTools(request, options) {
319
+ const startTime = Date.now();
320
+ const { onChunk, onContentBlockUpdate, onToolCalls, onPreToolContent, onUsage, maxToolDepth = 10, signal, } = options;
321
+ let toolDepth = 0;
322
+ let totalUsage = { inputTokens: 0, outputTokens: 0 };
323
+ let lastStopReason = 'end_turn';
324
+ let rawRequest;
325
+ let rawResponse;
326
+ // Track all text for rawAssistantText
327
+ let allTextAccumulated = '';
328
+ // Track executed tool calls and results
329
+ const executedToolCalls = [];
330
+ const executedToolResults = [];
331
+ // Build messages array that we'll update with tool results
332
+ let messages = [...request.messages];
333
+ let allContentBlocks = [];
334
+ try {
335
+ // Tool execution loop
336
+ while (toolDepth <= maxToolDepth) {
337
+ // Build provider request with native tools
338
+ const providerRequest = this.buildNativeToolRequest(request, messages);
339
+ rawRequest = providerRequest;
340
+ // Stream from provider
341
+ let textAccumulated = '';
342
+ let blockIndex = 0;
343
+ const streamResult = await this.streamOnce(providerRequest, {
344
+ onChunk: (chunk) => {
345
+ textAccumulated += chunk;
346
+ allTextAccumulated += chunk;
347
+ // For native mode, emit text chunks with basic metadata
348
+ // TODO: Use native API content_block events for richer metadata
349
+ const meta = {
350
+ type: 'text',
351
+ visible: true,
352
+ blockIndex,
353
+ };
354
+ onChunk?.(chunk, meta);
355
+ },
356
+ onContentBlock: onContentBlockUpdate
357
+ ? (index, block) => onContentBlockUpdate(index, block)
358
+ : undefined,
359
+ }, { signal });
360
+ rawResponse = streamResult.raw;
361
+ lastStopReason = this.mapStopReason(streamResult.stopReason);
362
+ // Accumulate usage
363
+ totalUsage.inputTokens += streamResult.usage.inputTokens;
364
+ totalUsage.outputTokens += streamResult.usage.outputTokens;
365
+ onUsage?.(totalUsage);
366
+ // Parse content blocks from response
367
+ const responseBlocks = this.parseProviderContent(streamResult.content);
368
+ allContentBlocks.push(...responseBlocks);
369
+ // Check for tool_use blocks
370
+ const toolUseBlocks = responseBlocks.filter((b) => b.type === 'tool_use');
371
+ if (onToolCalls && toolUseBlocks.length > 0 && lastStopReason === 'tool_use') {
372
+ // Notify about pre-tool content
373
+ const textBlocks = responseBlocks.filter(b => b.type === 'text');
374
+ if (onPreToolContent && textBlocks.length > 0) {
375
+ const preToolText = textBlocks.map(b => b.text).join('');
376
+ if (preToolText.trim()) {
377
+ await onPreToolContent(preToolText);
378
+ }
379
+ }
380
+ // Convert to normalized ToolCall[]
381
+ const toolCalls = toolUseBlocks.map(block => ({
382
+ id: block.id,
383
+ name: block.name,
384
+ input: block.input,
385
+ }));
386
+ // Track tool calls
387
+ executedToolCalls.push(...toolCalls);
388
+ // Execute tools
389
+ const context = {
390
+ rawText: JSON.stringify(toolUseBlocks),
391
+ preamble: textAccumulated,
392
+ depth: toolDepth,
393
+ previousResults: executedToolResults,
394
+ accumulated: allTextAccumulated,
395
+ };
396
+ const results = await onToolCalls(toolCalls, context);
397
+ // Track tool results
398
+ executedToolResults.push(...results);
399
+ // Add tool results to content blocks
400
+ for (const result of results) {
401
+ allContentBlocks.push({
402
+ type: 'tool_result',
403
+ toolUseId: result.toolUseId,
404
+ content: result.content,
405
+ isError: result.isError,
406
+ });
407
+ }
408
+ // Add assistant message with tool use and user message with tool results
409
+ messages.push({
410
+ participant: 'Claude',
411
+ content: responseBlocks,
412
+ });
413
+ messages.push({
414
+ participant: 'User',
415
+ content: results.map(r => ({
416
+ type: 'tool_result',
417
+ toolUseId: r.toolUseId,
418
+ content: r.content,
419
+ isError: r.isError,
420
+ })),
421
+ });
422
+ toolDepth++;
423
+ continue;
424
+ }
425
+ // No more tools, we're done
426
+ break;
427
+ }
428
+ const durationMs = Date.now() - startTime;
429
+ return {
430
+ content: allContentBlocks,
431
+ rawAssistantText: allTextAccumulated,
432
+ toolCalls: executedToolCalls,
433
+ toolResults: executedToolResults,
434
+ stopReason: lastStopReason,
435
+ usage: totalUsage,
436
+ details: {
437
+ stop: {
438
+ reason: lastStopReason,
439
+ wasTruncated: lastStopReason === 'max_tokens',
440
+ },
441
+ usage: { ...totalUsage },
442
+ timing: {
443
+ totalDurationMs: durationMs,
444
+ attempts: 1,
445
+ },
446
+ model: {
447
+ requested: request.config.model,
448
+ actual: request.config.model,
449
+ provider: this.adapter.name,
450
+ },
451
+ cache: {
452
+ markersInRequest: 0,
453
+ tokensCreated: 0,
454
+ tokensRead: 0,
455
+ hitRatio: 0,
456
+ },
457
+ },
458
+ raw: {
459
+ request: rawRequest,
460
+ response: rawResponse,
461
+ },
462
+ };
463
+ }
464
+ catch (error) {
465
+ // Check if this is an abort error
466
+ if (this.isAbortError(error)) {
467
+ return this.buildAbortedResponse(allTextAccumulated, totalUsage, executedToolCalls, executedToolResults, 'user');
468
+ }
469
+ // Re-throw non-abort errors
470
+ throw error;
471
+ }
472
+ }
473
+ /**
474
+ * Build a provider request with native tool support
475
+ */
476
+ buildNativeToolRequest(request, messages) {
477
+ // Convert messages to provider format
478
+ const providerMessages = [];
479
+ for (const msg of messages) {
480
+ const isAssistant = msg.participant === 'Claude';
481
+ const role = isAssistant ? 'assistant' : 'user';
482
+ // Convert content blocks
483
+ const content = [];
484
+ for (const block of msg.content) {
485
+ if (block.type === 'text') {
486
+ content.push({ type: 'text', text: block.text });
487
+ }
488
+ else if (block.type === 'tool_use') {
489
+ content.push({
490
+ type: 'tool_use',
491
+ id: block.id,
492
+ name: block.name,
493
+ input: block.input,
494
+ });
495
+ }
496
+ else if (block.type === 'tool_result') {
497
+ content.push({
498
+ type: 'tool_result',
499
+ tool_use_id: block.toolUseId,
500
+ content: block.content,
501
+ is_error: block.isError,
502
+ });
503
+ }
504
+ }
505
+ providerMessages.push({ role, content });
506
+ }
507
+ // Convert tools to provider format
508
+ const tools = request.tools?.map(tool => ({
509
+ name: tool.name,
510
+ description: tool.description,
511
+ input_schema: tool.inputSchema,
512
+ }));
513
+ return {
514
+ model: request.config.model,
515
+ maxTokens: request.config.maxTokens,
516
+ temperature: request.config.temperature,
517
+ messages: providerMessages,
518
+ system: request.system,
519
+ tools,
520
+ extra: request.providerParams,
521
+ };
522
+ }
523
+ /**
524
+ * Parse provider response content into normalized blocks
525
+ */
526
+ parseProviderContent(content) {
527
+ if (!content)
528
+ return [];
529
+ if (Array.isArray(content)) {
530
+ const blocks = [];
531
+ for (const item of content) {
532
+ if (item.type === 'text') {
533
+ blocks.push({ type: 'text', text: item.text });
534
+ }
535
+ else if (item.type === 'tool_use') {
536
+ blocks.push({
537
+ type: 'tool_use',
538
+ id: item.id,
539
+ name: item.name,
540
+ input: item.input,
541
+ });
542
+ }
543
+ else if (item.type === 'thinking') {
544
+ blocks.push({
545
+ type: 'thinking',
546
+ thinking: item.thinking,
547
+ signature: item.signature,
548
+ });
549
+ }
550
+ }
551
+ return blocks;
552
+ }
553
+ if (typeof content === 'string') {
554
+ return [{ type: 'text', text: content }];
555
+ }
556
+ return [];
557
+ }
558
+ // ==========================================================================
559
+ // Internal Methods
560
+ // ==========================================================================
561
+ transformRequest(request) {
562
+ // For now, use prefill transform
563
+ // In full implementation, would check capabilities and choose transform
564
+ // Extract user-provided stop sequences to pass to prefill transform
565
+ const additionalStopSequences = Array.isArray(request.stopSequences)
566
+ ? request.stopSequences
567
+ : request.stopSequences?.sequences ?? [];
568
+ // Request-level maxParticipantsForStop takes precedence over instance config
569
+ const maxParticipantsForStop = request.maxParticipantsForStop
570
+ ?? this.config.maxParticipantsForStop
571
+ ?? 10;
572
+ const prefillResult = transformToPrefill(request, {
573
+ assistantName: this.config.assistantParticipant ?? 'Claude',
574
+ promptCaching: true, // Enable cache control by default
575
+ additionalStopSequences,
576
+ maxParticipantsForStop,
577
+ });
578
+ // Use the pre-built messages from prefill transform
579
+ // These include cache_control markers on appropriate content blocks
580
+ const providerRequest = {
581
+ model: request.config.model,
582
+ maxTokens: request.config.maxTokens,
583
+ temperature: request.config.temperature,
584
+ messages: prefillResult.messages,
585
+ // System is now part of messages with cache_control
586
+ // But we still pass it for providers that need it separately
587
+ system: prefillResult.systemContent.length > 0
588
+ ? prefillResult.systemContent
589
+ : undefined,
590
+ stopSequences: prefillResult.stopSequences,
591
+ extra: request.providerParams,
592
+ };
593
+ return { providerRequest, prefillResult };
594
+ }
595
+ async streamOnce(request, callbacks, options) {
596
+ return await this.adapter.stream(request, callbacks, options);
597
+ }
598
+ buildContinuationRequest(originalRequest, prefillResult, accumulated) {
599
+ // Anthropic quirk: assistant content cannot end with trailing whitespace
600
+ const trimmedAccumulated = accumulated.trimEnd();
601
+ // Build continuation messages: keep all messages up to last assistant,
602
+ // then replace/add the accumulated content
603
+ const messages = [...prefillResult.messages];
604
+ // Find and update the last assistant message, or add one
605
+ let foundAssistant = false;
606
+ for (let i = messages.length - 1; i >= 0; i--) {
607
+ if (messages[i]?.role === 'assistant') {
608
+ messages[i] = { role: 'assistant', content: trimmedAccumulated };
609
+ foundAssistant = true;
610
+ break;
611
+ }
612
+ }
613
+ if (!foundAssistant) {
614
+ messages.push({ role: 'assistant', content: trimmedAccumulated });
615
+ }
616
+ return {
617
+ model: originalRequest.config.model,
618
+ maxTokens: originalRequest.config.maxTokens,
619
+ temperature: originalRequest.config.temperature,
620
+ messages,
621
+ system: prefillResult.systemContent.length > 0
622
+ ? prefillResult.systemContent
623
+ : undefined,
624
+ stopSequences: prefillResult.stopSequences,
625
+ extra: originalRequest.providerParams,
626
+ };
627
+ }
628
+ /**
629
+ * Build continuation request with split-turn image injection.
630
+ *
631
+ * When tool results contain images in prefill mode, we must:
632
+ * 1. End assistant turn mid-XML (after text content, inside <function_results>)
633
+ * 2. Insert user turn with only image content
634
+ * 3. Continue with assistant prefill containing closing XML tags
635
+ *
636
+ * This is required because Anthropic API only allows images in user turns.
637
+ *
638
+ * Structure:
639
+ * ```
640
+ * Assistant: "...response..." + <function_results><result>text content
641
+ * User: [image blocks]
642
+ * Assistant (prefill): </result></function_results>
643
+ * ```
644
+ */
645
+ buildContinuationRequestWithImages(originalRequest, prefillResult, accumulated, images, afterImageXml) {
646
+ // Anthropic quirk: assistant content cannot end with trailing whitespace
647
+ const trimmedAccumulated = accumulated.trimEnd();
648
+ // Build messages: copy existing, then modify/add for split-turn
649
+ const messages = [];
650
+ // Copy all messages except the last assistant message
651
+ for (const msg of prefillResult.messages) {
652
+ if (msg.role === 'assistant') {
653
+ // Skip - we'll add our own assistant messages
654
+ continue;
655
+ }
656
+ messages.push({ ...msg });
657
+ }
658
+ // Add assistant message with accumulated content (ends mid-XML)
659
+ messages.push({
660
+ role: 'assistant',
661
+ content: trimmedAccumulated,
662
+ });
663
+ // Add user message with just the images
664
+ messages.push({
665
+ role: 'user',
666
+ content: images,
667
+ });
668
+ // Add assistant prefill with closing XML tags
669
+ // Anthropic quirk: assistant content cannot end with trailing whitespace
670
+ const trimmedAfterXml = afterImageXml.trimEnd();
671
+ messages.push({
672
+ role: 'assistant',
673
+ content: trimmedAfterXml,
674
+ });
675
+ return {
676
+ model: originalRequest.config.model,
677
+ maxTokens: originalRequest.config.maxTokens,
678
+ temperature: originalRequest.config.temperature,
679
+ messages,
680
+ system: prefillResult.systemContent.length > 0
681
+ ? prefillResult.systemContent
682
+ : undefined,
683
+ stopSequences: prefillResult.stopSequences,
684
+ extra: originalRequest.providerParams,
685
+ };
686
+ }
687
+ transformResponse(providerResponse, request, prefillResult, startTime, attempts, rawRequest) {
688
+ // Extract text from response
689
+ const content = [];
690
+ const toolCalls = [];
691
+ // Build raw text for rawAssistantText
692
+ let rawAssistantText = '';
693
+ if (Array.isArray(providerResponse.content)) {
694
+ for (const block of providerResponse.content) {
695
+ if (block.type === 'text') {
696
+ content.push({ type: 'text', text: block.text });
697
+ rawAssistantText += block.text;
698
+ }
699
+ else if (block.type === 'tool_use') {
700
+ content.push({
701
+ type: 'tool_use',
702
+ id: block.id,
703
+ name: block.name,
704
+ input: block.input,
705
+ });
706
+ toolCalls.push({
707
+ id: block.id,
708
+ name: block.name,
709
+ input: block.input,
710
+ });
711
+ }
712
+ else if (block.type === 'thinking') {
713
+ content.push({
714
+ type: 'thinking',
715
+ thinking: block.thinking,
716
+ signature: block.signature,
717
+ });
718
+ }
719
+ }
720
+ }
721
+ else if (typeof providerResponse.content === 'string') {
722
+ content.push({ type: 'text', text: providerResponse.content });
723
+ rawAssistantText = providerResponse.content;
724
+ }
725
+ const stopReason = this.mapStopReason(providerResponse.stopReason);
726
+ const durationMs = Date.now() - startTime;
727
+ const usage = {
728
+ inputTokens: providerResponse.usage.inputTokens,
729
+ outputTokens: providerResponse.usage.outputTokens,
730
+ };
731
+ return {
732
+ content,
733
+ rawAssistantText,
734
+ toolCalls,
735
+ toolResults: [], // complete() doesn't execute tools
736
+ stopReason,
737
+ usage,
738
+ details: {
739
+ stop: {
740
+ reason: stopReason,
741
+ triggeredSequence: providerResponse.stopSequence,
742
+ wasTruncated: stopReason === 'max_tokens',
743
+ },
744
+ usage: {
745
+ inputTokens: providerResponse.usage.inputTokens,
746
+ outputTokens: providerResponse.usage.outputTokens,
747
+ cacheCreationTokens: providerResponse.usage.cacheCreationTokens,
748
+ cacheReadTokens: providerResponse.usage.cacheReadTokens,
749
+ },
750
+ timing: {
751
+ totalDurationMs: durationMs,
752
+ attempts,
753
+ },
754
+ model: {
755
+ requested: request.config.model,
756
+ actual: providerResponse.model,
757
+ provider: this.adapter.name,
758
+ },
759
+ cache: {
760
+ markersInRequest: prefillResult.cacheMarkersApplied,
761
+ tokensCreated: providerResponse.usage.cacheCreationTokens ?? 0,
762
+ tokensRead: providerResponse.usage.cacheReadTokens ?? 0,
763
+ hitRatio: this.calculateCacheHitRatio(providerResponse.usage),
764
+ },
765
+ },
766
+ raw: {
767
+ request: rawRequest ?? null,
768
+ response: providerResponse.raw,
769
+ },
770
+ };
771
+ }
772
+ buildFinalResponse(accumulated, contentBlocks, stopReason, usage, request, prefillResult, startTime, attempts, rawRequest, rawResponse, executedToolCalls = [], executedToolResults = []) {
773
+ // Parse accumulated text into structured content blocks
774
+ // This extracts thinking, tool_use, tool_result, and text blocks
775
+ let finalContent;
776
+ let toolCalls;
777
+ let toolResults;
778
+ if (contentBlocks.length > 0) {
779
+ // Native mode - content blocks already structured
780
+ finalContent = contentBlocks;
781
+ toolCalls = executedToolCalls;
782
+ toolResults = executedToolResults;
783
+ }
784
+ else {
785
+ // XML mode - parse accumulated text into blocks
786
+ const parsed = parseAccumulatedIntoBlocks(accumulated);
787
+ finalContent = parsed.blocks;
788
+ toolCalls = parsed.toolCalls.length > 0 ? parsed.toolCalls : executedToolCalls;
789
+ toolResults = parsed.toolResults.length > 0 ? parsed.toolResults : executedToolResults;
790
+ }
791
+ const durationMs = Date.now() - startTime;
792
+ return {
793
+ content: finalContent,
794
+ rawAssistantText: accumulated,
795
+ toolCalls,
796
+ toolResults,
797
+ stopReason,
798
+ usage,
799
+ details: {
800
+ stop: {
801
+ reason: stopReason,
802
+ wasTruncated: stopReason === 'max_tokens',
803
+ },
804
+ usage: {
805
+ ...usage,
806
+ },
807
+ timing: {
808
+ totalDurationMs: durationMs,
809
+ attempts,
810
+ },
811
+ model: {
812
+ requested: request.config.model,
813
+ actual: request.config.model, // TODO: get from response
814
+ provider: this.adapter.name,
815
+ },
816
+ cache: {
817
+ markersInRequest: 0,
818
+ tokensCreated: 0,
819
+ tokensRead: 0,
820
+ hitRatio: 0,
821
+ },
822
+ },
823
+ raw: {
824
+ request: rawRequest,
825
+ response: rawResponse,
826
+ },
827
+ };
828
+ }
829
+ mapStopReason(providerReason) {
830
+ switch (providerReason) {
831
+ case 'end_turn':
832
+ return 'end_turn';
833
+ case 'max_tokens':
834
+ return 'max_tokens';
835
+ case 'stop_sequence':
836
+ return 'stop_sequence';
837
+ case 'tool_use':
838
+ return 'tool_use';
839
+ default:
840
+ return 'end_turn';
841
+ }
842
+ }
843
+ calculateCacheHitRatio(usage) {
844
+ const cacheRead = usage.cacheReadTokens ?? 0;
845
+ const total = usage.inputTokens ?? 0;
846
+ if (total === 0)
847
+ return 0;
848
+ return cacheRead / total;
849
+ }
850
+ calculateRetryDelay(attempt) {
851
+ const { retryDelayMs, backoffMultiplier, maxRetryDelayMs } = this.retryConfig;
852
+ const delay = retryDelayMs * Math.pow(backoffMultiplier, attempt - 1);
853
+ return Math.min(delay, maxRetryDelayMs);
854
+ }
855
+ sleep(ms) {
856
+ return new Promise((resolve) => setTimeout(resolve, ms));
857
+ }
858
+ /**
859
+ * Check if an error is an abort error
860
+ */
861
+ isAbortError(error) {
862
+ if (error instanceof Error) {
863
+ // Standard AbortError
864
+ if (error.name === 'AbortError')
865
+ return true;
866
+ // Anthropic SDK abort
867
+ if (error.message.includes('aborted') || error.message.includes('abort'))
868
+ return true;
869
+ }
870
+ // DOMException for browser environments
871
+ if (typeof DOMException !== 'undefined' && error instanceof DOMException) {
872
+ return error.name === 'AbortError';
873
+ }
874
+ return false;
875
+ }
876
+ /**
877
+ * Build an AbortedResponse from current execution state
878
+ */
879
+ buildAbortedResponse(accumulated, usage, toolCalls, toolResults, reason) {
880
+ // Parse accumulated text into content blocks for partial content
881
+ const { blocks } = parseAccumulatedIntoBlocks(accumulated);
882
+ return {
883
+ aborted: true,
884
+ partialContent: blocks.length > 0 ? blocks : undefined,
885
+ partialUsage: usage,
886
+ reason,
887
+ rawAssistantText: accumulated || undefined,
888
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
889
+ toolResults: toolResults.length > 0 ? toolResults : undefined,
890
+ };
891
+ }
892
+ }
893
+ //# sourceMappingURL=membrane.js.map