@animalabs/membrane 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/context/index.d.ts +10 -0
  2. package/dist/context/index.d.ts.map +1 -0
  3. package/dist/context/index.js +9 -0
  4. package/dist/context/index.js.map +1 -0
  5. package/dist/context/process.d.ts +22 -0
  6. package/dist/context/process.d.ts.map +1 -0
  7. package/dist/context/process.js +369 -0
  8. package/dist/context/process.js.map +1 -0
  9. package/dist/context/types.d.ts +118 -0
  10. package/dist/context/types.d.ts.map +1 -0
  11. package/dist/context/types.js +60 -0
  12. package/dist/context/types.js.map +1 -0
  13. package/dist/index.d.ts +12 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +18 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/membrane.d.ts +96 -0
  18. package/dist/membrane.d.ts.map +1 -0
  19. package/dist/membrane.js +893 -0
  20. package/dist/membrane.js.map +1 -0
  21. package/dist/providers/anthropic.d.ts +36 -0
  22. package/dist/providers/anthropic.d.ts.map +1 -0
  23. package/dist/providers/anthropic.js +265 -0
  24. package/dist/providers/anthropic.js.map +1 -0
  25. package/dist/providers/index.d.ts +8 -0
  26. package/dist/providers/index.d.ts.map +1 -0
  27. package/dist/providers/index.js +8 -0
  28. package/dist/providers/index.js.map +1 -0
  29. package/dist/providers/openai-compatible.d.ts +74 -0
  30. package/dist/providers/openai-compatible.d.ts.map +1 -0
  31. package/dist/providers/openai-compatible.js +412 -0
  32. package/dist/providers/openai-compatible.js.map +1 -0
  33. package/dist/providers/openai.d.ts +69 -0
  34. package/dist/providers/openai.d.ts.map +1 -0
  35. package/dist/providers/openai.js +455 -0
  36. package/dist/providers/openai.js.map +1 -0
  37. package/dist/providers/openrouter.d.ts +76 -0
  38. package/dist/providers/openrouter.d.ts.map +1 -0
  39. package/dist/providers/openrouter.js +492 -0
  40. package/dist/providers/openrouter.js.map +1 -0
  41. package/dist/transforms/chat.d.ts +52 -0
  42. package/dist/transforms/chat.d.ts.map +1 -0
  43. package/dist/transforms/chat.js +136 -0
  44. package/dist/transforms/chat.js.map +1 -0
  45. package/dist/transforms/index.d.ts +6 -0
  46. package/dist/transforms/index.d.ts.map +1 -0
  47. package/dist/transforms/index.js +6 -0
  48. package/dist/transforms/index.js.map +1 -0
  49. package/dist/transforms/prefill.d.ts +89 -0
  50. package/dist/transforms/prefill.d.ts.map +1 -0
  51. package/dist/transforms/prefill.js +401 -0
  52. package/dist/transforms/prefill.js.map +1 -0
  53. package/dist/types/config.d.ts +103 -0
  54. package/dist/types/config.d.ts.map +1 -0
  55. package/dist/types/config.js +21 -0
  56. package/dist/types/config.js.map +1 -0
  57. package/dist/types/content.d.ts +81 -0
  58. package/dist/types/content.d.ts.map +1 -0
  59. package/dist/types/content.js +40 -0
  60. package/dist/types/content.js.map +1 -0
  61. package/dist/types/errors.d.ts +42 -0
  62. package/dist/types/errors.d.ts.map +1 -0
  63. package/dist/types/errors.js +208 -0
  64. package/dist/types/errors.js.map +1 -0
  65. package/dist/types/index.d.ts +18 -0
  66. package/dist/types/index.d.ts.map +1 -0
  67. package/dist/types/index.js +9 -0
  68. package/dist/types/index.js.map +1 -0
  69. package/dist/types/message.d.ts +46 -0
  70. package/dist/types/message.d.ts.map +1 -0
  71. package/dist/types/message.js +38 -0
  72. package/dist/types/message.js.map +1 -0
  73. package/dist/types/provider.d.ts +155 -0
  74. package/dist/types/provider.d.ts.map +1 -0
  75. package/dist/types/provider.js +5 -0
  76. package/dist/types/provider.js.map +1 -0
  77. package/dist/types/request.d.ts +78 -0
  78. package/dist/types/request.d.ts.map +1 -0
  79. package/dist/types/request.js +5 -0
  80. package/dist/types/request.js.map +1 -0
  81. package/dist/types/response.d.ts +131 -0
  82. package/dist/types/response.d.ts.map +1 -0
  83. package/dist/types/response.js +7 -0
  84. package/dist/types/response.js.map +1 -0
  85. package/dist/types/streaming.d.ts +164 -0
  86. package/dist/types/streaming.d.ts.map +1 -0
  87. package/dist/types/streaming.js +5 -0
  88. package/dist/types/streaming.js.map +1 -0
  89. package/dist/types/tools.d.ts +71 -0
  90. package/dist/types/tools.d.ts.map +1 -0
  91. package/dist/types/tools.js +5 -0
  92. package/dist/types/tools.js.map +1 -0
  93. package/dist/utils/index.d.ts +5 -0
  94. package/dist/utils/index.d.ts.map +1 -0
  95. package/dist/utils/index.js +5 -0
  96. package/dist/utils/index.js.map +1 -0
  97. package/dist/utils/stream-parser.d.ts +53 -0
  98. package/dist/utils/stream-parser.d.ts.map +1 -0
  99. package/dist/utils/stream-parser.js +359 -0
  100. package/dist/utils/stream-parser.js.map +1 -0
  101. package/dist/utils/tool-parser.d.ts +130 -0
  102. package/dist/utils/tool-parser.d.ts.map +1 -0
  103. package/dist/utils/tool-parser.js +571 -0
  104. package/dist/utils/tool-parser.js.map +1 -0
  105. package/package.json +37 -0
  106. package/src/context/index.ts +24 -0
  107. package/src/context/process.ts +520 -0
  108. package/src/context/types.ts +231 -0
  109. package/src/index.ts +23 -0
  110. package/src/membrane.ts +1174 -0
  111. package/src/providers/anthropic.ts +340 -0
  112. package/src/providers/index.ts +31 -0
  113. package/src/providers/openai-compatible.ts +570 -0
  114. package/src/providers/openai.ts +625 -0
  115. package/src/providers/openrouter.ts +662 -0
  116. package/src/transforms/chat.ts +212 -0
  117. package/src/transforms/index.ts +22 -0
  118. package/src/transforms/prefill.ts +585 -0
  119. package/src/types/config.ts +172 -0
  120. package/src/types/content.ts +181 -0
  121. package/src/types/errors.ts +277 -0
  122. package/src/types/index.ts +154 -0
  123. package/src/types/message.ts +89 -0
  124. package/src/types/provider.ts +249 -0
  125. package/src/types/request.ts +131 -0
  126. package/src/types/response.ts +223 -0
  127. package/src/types/streaming.ts +231 -0
  128. package/src/types/tools.ts +92 -0
  129. package/src/utils/index.ts +15 -0
  130. package/src/utils/stream-parser.ts +440 -0
  131. package/src/utils/tool-parser.ts +715 -0
@@ -0,0 +1,1174 @@
1
+ /**
2
+ * Membrane - LLM middleware core class
3
+ *
4
+ * A selective boundary that transforms what passes through.
5
+ */
6
+
7
+ import type {
8
+ NormalizedRequest,
9
+ NormalizedResponse,
10
+ AbortedResponse,
11
+ ContentBlock,
12
+ ProviderAdapter,
13
+ ModelRegistry,
14
+ MembraneConfig,
15
+ StreamOptions,
16
+ CompleteOptions,
17
+ BasicUsage,
18
+ DetailedUsage,
19
+ StopReason,
20
+ TimingInfo,
21
+ CacheInfo,
22
+ ToolCall,
23
+ ToolResult,
24
+ ToolContext,
25
+ RetryConfig,
26
+ ToolMode,
27
+ ToolDefinition,
28
+ } from './types/index.js';
29
+ import {
30
+ DEFAULT_RETRY_CONFIG,
31
+ MembraneError,
32
+ classifyError,
33
+ isTextContent,
34
+ isAbortedResponse,
35
+ } from './types/index.js';
36
+ import {
37
+ transformToPrefill,
38
+ type PrefillTransformResult,
39
+ } from './transforms/index.js';
40
+ import {
41
+ parseToolCalls,
42
+ formatToolResults,
43
+ parseAccumulatedIntoBlocks,
44
+ hasImageInToolResults,
45
+ formatToolResultsForSplitTurn,
46
+ type ProviderImageBlock,
47
+ } from './utils/tool-parser.js';
48
+ import { IncrementalXmlParser, type ProcessChunkResult } from './utils/stream-parser.js';
49
+ import type { ChunkMeta, BlockEvent } from './types/streaming.js';
50
+
51
+ // ============================================================================
52
+ // Membrane Class
53
+ // ============================================================================
54
+
55
+ export class Membrane {
56
+ private adapter: ProviderAdapter;
57
+ private registry?: ModelRegistry;
58
+ private retryConfig: RetryConfig;
59
+ private config: MembraneConfig;
60
+
61
+ constructor(
62
+ adapter: ProviderAdapter,
63
+ config: MembraneConfig = {}
64
+ ) {
65
+ this.adapter = adapter;
66
+ this.registry = config.registry;
67
+ this.retryConfig = { ...DEFAULT_RETRY_CONFIG, ...config.retry };
68
+ this.config = config;
69
+ }
70
+
71
+ // ==========================================================================
72
+ // Main API
73
+ // ==========================================================================
74
+
75
+ /**
76
+ * Complete a request (non-streaming)
77
+ */
78
+ async complete(
79
+ request: NormalizedRequest,
80
+ options: CompleteOptions = {}
81
+ ): Promise<NormalizedResponse> {
82
+ const startTime = Date.now();
83
+ let attempts = 0;
84
+
85
+ while (true) {
86
+ attempts++;
87
+
88
+ try {
89
+ const { providerRequest, prefillResult } = this.transformRequest(request);
90
+
91
+ // Call beforeRequest hook
92
+ let finalRequest = providerRequest;
93
+ if (this.config.hooks?.beforeRequest) {
94
+ finalRequest = await this.config.hooks.beforeRequest(request, providerRequest) ?? providerRequest;
95
+ }
96
+
97
+ const providerResponse = await this.adapter.complete(finalRequest, {
98
+ signal: options.signal,
99
+ timeoutMs: options.timeoutMs,
100
+ });
101
+
102
+ const response = this.transformResponse(
103
+ providerResponse,
104
+ request,
105
+ prefillResult,
106
+ startTime,
107
+ attempts,
108
+ finalRequest
109
+ );
110
+
111
+ // Call afterResponse hook
112
+ if (this.config.hooks?.afterResponse) {
113
+ return await this.config.hooks.afterResponse(response, providerResponse.raw);
114
+ }
115
+
116
+ return response;
117
+
118
+ } catch (error) {
119
+ const errorInfo = classifyError(error);
120
+
121
+ if (errorInfo.retryable && attempts < this.retryConfig.maxRetries) {
122
+ // Check hook for retry decision
123
+ if (this.config.hooks?.onError) {
124
+ const decision = await this.config.hooks.onError(errorInfo, attempts);
125
+ if (decision === 'abort') {
126
+ throw error instanceof MembraneError ? error : new MembraneError(errorInfo);
127
+ }
128
+ }
129
+
130
+ // Wait before retry
131
+ const delay = this.calculateRetryDelay(attempts);
132
+ await this.sleep(delay);
133
+ continue;
134
+ }
135
+
136
+ throw error instanceof MembraneError ? error : new MembraneError(errorInfo);
137
+ }
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Stream a request with inline tool execution.
143
+ *
144
+ * Returns either a complete NormalizedResponse or an AbortedResponse
145
+ * if the request was cancelled via the abort signal. Use `isAbortedResponse()`
146
+ * to check which type was returned.
147
+ *
148
+ * @example
149
+ * ```typescript
150
+ * const result = await membrane.stream(request, { signal: controller.signal });
151
+ * if (isAbortedResponse(result)) {
152
+ * console.log('Aborted:', result.rawAssistantText);
153
+ * // Use rawAssistantText as prefill to continue, or toolCalls/toolResults to rebuild state
154
+ * } else {
155
+ * console.log('Complete:', result.content);
156
+ * }
157
+ * ```
158
+ */
159
+ async stream(
160
+ request: NormalizedRequest,
161
+ options: StreamOptions = {}
162
+ ): Promise<NormalizedResponse | AbortedResponse> {
163
+ // Determine tool mode
164
+ const toolMode = this.resolveToolMode(request);
165
+
166
+ if (toolMode === 'native' && request.tools && request.tools.length > 0) {
167
+ return this.streamWithNativeTools(request, options);
168
+ } else {
169
+ return this.streamWithXmlTools(request, options);
170
+ }
171
+ }
172
+
173
+ /**
174
+ * Determine the effective tool mode
175
+ */
176
+ private resolveToolMode(request: NormalizedRequest): ToolMode {
177
+ // Explicit mode takes precedence
178
+ if (request.toolMode && request.toolMode !== 'auto') {
179
+ return request.toolMode;
180
+ }
181
+
182
+ // Auto mode: choose based on provider
183
+ // OpenRouter and OpenAI-compatible APIs use native tools
184
+ // Anthropic direct with prefill mode uses XML tools
185
+ if (this.adapter.name === 'openrouter') {
186
+ return 'native';
187
+ }
188
+
189
+ // Default to XML for prefill compatibility
190
+ return 'xml';
191
+ }
192
+
193
+ /**
194
+ * Stream with XML-based tool execution (prefill mode)
195
+ *
196
+ * Uses IncrementalXmlParser to track XML nesting depth for:
197
+ * - False-positive stop sequence detection (e.g., "\nUser:" inside tool results)
198
+ * - Structured block events for UI
199
+ */
200
+ private async streamWithXmlTools(
201
+ request: NormalizedRequest,
202
+ options: StreamOptions
203
+ ): Promise<NormalizedResponse | AbortedResponse> {
204
+ const startTime = Date.now();
205
+ const {
206
+ onChunk,
207
+ onContentBlockUpdate,
208
+ onToolCalls,
209
+ onPreToolContent,
210
+ onUsage,
211
+ onBlock,
212
+ maxToolDepth = 10,
213
+ signal,
214
+ } = options;
215
+
216
+ // Initialize incremental parser for XML tracking
217
+ const parser = new IncrementalXmlParser();
218
+ let toolDepth = 0;
219
+ let totalUsage: BasicUsage = { inputTokens: 0, outputTokens: 0 };
220
+ const contentBlocks: ContentBlock[] = [];
221
+ let lastStopReason: StopReason = 'end_turn';
222
+ let rawRequest: unknown;
223
+ let rawResponse: unknown;
224
+
225
+ // Track executed tool calls and results
226
+ const executedToolCalls: ToolCall[] = [];
227
+ const executedToolResults: ToolResult[] = [];
228
+
229
+ // Transform initial request (XML tools are injected into system prompt)
230
+ let { providerRequest, prefillResult } = this.transformRequest(request);
231
+
232
+ try {
233
+ // Tool execution loop
234
+ while (toolDepth <= maxToolDepth) {
235
+ rawRequest = providerRequest;
236
+
237
+ // Track if we manually detected a stop sequence (API doesn't always stop)
238
+ let detectedStopSequence: string | null = null;
239
+ let truncatedAccumulated: string | null = null;
240
+
241
+ // Track where to start checking for stop sequences (skip already-processed content)
242
+ const checkFromIndex = parser.getAccumulated().length;
243
+
244
+ // Stream from provider
245
+ const streamResult = await this.streamOnce(
246
+ providerRequest,
247
+ {
248
+ onChunk: (chunk) => {
249
+ // If we already detected a stop sequence, ignore remaining chunks
250
+ if (detectedStopSequence) {
251
+ return;
252
+ }
253
+
254
+ // Process chunk with enriched streaming API
255
+ const { content, blockEvents } = parser.processChunk(chunk);
256
+
257
+ // Check for stop sequences only in NEW content (not already-processed)
258
+ const accumulated = parser.getAccumulated();
259
+ const newContent = accumulated.slice(checkFromIndex);
260
+
261
+ for (const stopSeq of prefillResult.stopSequences) {
262
+ const idx = newContent.indexOf(stopSeq);
263
+ if (idx !== -1) {
264
+ // Found stop sequence - mark it and truncate
265
+ const absoluteIdx = checkFromIndex + idx;
266
+ detectedStopSequence = stopSeq;
267
+ truncatedAccumulated = accumulated.slice(0, absoluteIdx);
268
+
269
+ // Emit only the portion up to stop sequence with metadata
270
+ const alreadyEmitted = accumulated.length - chunk.length;
271
+ if (absoluteIdx > alreadyEmitted) {
272
+ const truncatedChunk = accumulated.slice(alreadyEmitted, absoluteIdx);
273
+ const meta: ChunkMeta = {
274
+ type: parser.getCurrentBlockType(),
275
+ visible: parser.getCurrentBlockType() === 'text',
276
+ blockIndex: 0, // Approximate
277
+ };
278
+ onChunk?.(truncatedChunk, meta);
279
+ }
280
+ return;
281
+ }
282
+ }
283
+
284
+ // Emit block events first
285
+ if (onBlock) {
286
+ for (const event of blockEvents) {
287
+ onBlock(event);
288
+ }
289
+ }
290
+
291
+ // Emit content chunks with metadata
292
+ for (const { text, meta } of content) {
293
+ onChunk?.(text, meta);
294
+ }
295
+ },
296
+ onContentBlock: onContentBlockUpdate
297
+ ? (index: number, block: unknown) => onContentBlockUpdate(index, block as ContentBlock)
298
+ : undefined,
299
+ },
300
+ { signal }
301
+ );
302
+
303
+ // If we detected stop sequence manually, fix up the parser and result
304
+ if (detectedStopSequence && truncatedAccumulated !== null) {
305
+ parser.reset();
306
+ parser.push(truncatedAccumulated);
307
+ streamResult.stopReason = 'stop_sequence';
308
+ streamResult.stopSequence = detectedStopSequence;
309
+ }
310
+
311
+ rawResponse = streamResult.raw;
312
+ lastStopReason = this.mapStopReason(streamResult.stopReason);
313
+
314
+ // Accumulate usage
315
+ totalUsage.inputTokens += streamResult.usage.inputTokens;
316
+ totalUsage.outputTokens += streamResult.usage.outputTokens;
317
+ onUsage?.(totalUsage);
318
+
319
+ // Get accumulated text from parser
320
+ const accumulated = parser.getAccumulated();
321
+
322
+ // Check for tool calls (if handler provided)
323
+ if (onToolCalls && streamResult.stopSequence === '</function_calls>') {
324
+ // Append the closing tag (we truncated before it, or API stopped before it)
325
+ const closeTag = '</function_calls>';
326
+ parser.push(closeTag);
327
+ // Note: closing tag is structural XML, not emitted via onChunk (invisible)
328
+
329
+ const parsed = parseToolCalls(parser.getAccumulated());
330
+
331
+ if (parsed && parsed.calls.length > 0) {
332
+ // Notify about pre-tool content
333
+ if (onPreToolContent && parsed.beforeText.trim()) {
334
+ await onPreToolContent(parsed.beforeText);
335
+ }
336
+
337
+ // Track the tool calls
338
+ executedToolCalls.push(...parsed.calls);
339
+
340
+ // Execute tools
341
+ const context: ToolContext = {
342
+ rawText: parsed.fullMatch,
343
+ preamble: parsed.beforeText,
344
+ depth: toolDepth,
345
+ previousResults: executedToolResults,
346
+ accumulated: parser.getAccumulated(),
347
+ };
348
+
349
+ const results = await onToolCalls(parsed.calls, context);
350
+
351
+ // Track the tool results
352
+ executedToolResults.push(...results);
353
+
354
+ // Check if results contain images (requires split-turn injection)
355
+ if (hasImageInToolResults(results)) {
356
+ // Use split-turn injection for images
357
+ const splitContent = formatToolResultsForSplitTurn(results);
358
+
359
+ // Append the text portion to accumulated (before image)
360
+ parser.push(splitContent.beforeImageXml);
361
+ const toolResultMeta: ChunkMeta = {
362
+ type: 'tool_result',
363
+ visible: false,
364
+ blockIndex: 0,
365
+ };
366
+ onChunk?.(splitContent.beforeImageXml, toolResultMeta);
367
+
368
+ // Build continuation with image injection
369
+ providerRequest = this.buildContinuationRequestWithImages(
370
+ request,
371
+ prefillResult,
372
+ parser.getAccumulated(),
373
+ splitContent.images,
374
+ splitContent.afterImageXml
375
+ );
376
+
377
+ // Also add afterImageXml to accumulated for complete rawAssistantText
378
+ // This is prefilled but represents assistant's logical output
379
+ parser.push(splitContent.afterImageXml);
380
+ onChunk?.(splitContent.afterImageXml, toolResultMeta);
381
+ prefillResult.assistantPrefill = parser.getAccumulated();
382
+ } else {
383
+ // Standard path: no images, use simple XML injection
384
+ const resultsXml = formatToolResults(results);
385
+ parser.push(resultsXml);
386
+ const toolResultMeta: ChunkMeta = {
387
+ type: 'tool_result',
388
+ visible: false,
389
+ blockIndex: 0,
390
+ };
391
+ onChunk?.(resultsXml, toolResultMeta);
392
+
393
+ // Update prefill and continue
394
+ prefillResult.assistantPrefill = parser.getAccumulated();
395
+ providerRequest = this.buildContinuationRequest(
396
+ request,
397
+ prefillResult,
398
+ parser.getAccumulated()
399
+ );
400
+ }
401
+
402
+ toolDepth++;
403
+ continue;
404
+ }
405
+ }
406
+
407
+ // Check for false-positive stop (unclosed block)
408
+ // Only resume if we stopped on a stop_sequence (not end_turn or max_tokens)
409
+ // Use parser's nesting detection instead of regex-based hasUnclosedToolBlock
410
+ if (lastStopReason === 'stop_sequence' && parser.isInsideBlock()) {
411
+ // False positive! The stop sequence (e.g., "\nUser:") appeared inside XML content
412
+ // Re-add the consumed stop sequence and resume streaming
413
+ if (streamResult.stopSequence) {
414
+ parser.push(streamResult.stopSequence);
415
+ const meta: ChunkMeta = {
416
+ type: parser.getCurrentBlockType(),
417
+ visible: parser.getCurrentBlockType() === 'text',
418
+ blockIndex: 0,
419
+ };
420
+ onChunk?.(streamResult.stopSequence, meta);
421
+ }
422
+
423
+ // Resume streaming - but limit resumptions to prevent infinite loops
424
+ toolDepth++; // Count this as a "depth" to limit iterations
425
+ if (toolDepth > maxToolDepth) {
426
+ break;
427
+ }
428
+ prefillResult.assistantPrefill = parser.getAccumulated();
429
+ providerRequest = this.buildContinuationRequest(
430
+ request,
431
+ prefillResult,
432
+ parser.getAccumulated()
433
+ );
434
+ continue;
435
+ }
436
+
437
+ // No more tools or tool handling disabled, we're done
438
+ break;
439
+ }
440
+
441
+ // Build final response
442
+ return this.buildFinalResponse(
443
+ parser.getAccumulated(),
444
+ contentBlocks,
445
+ lastStopReason,
446
+ totalUsage,
447
+ request,
448
+ prefillResult,
449
+ startTime,
450
+ 1, // attempts
451
+ rawRequest,
452
+ rawResponse,
453
+ executedToolCalls,
454
+ executedToolResults
455
+ );
456
+ } catch (error) {
457
+ // Check if this is an abort error
458
+ if (this.isAbortError(error)) {
459
+ return this.buildAbortedResponse(
460
+ parser.getAccumulated(),
461
+ totalUsage,
462
+ executedToolCalls,
463
+ executedToolResults,
464
+ 'user'
465
+ );
466
+ }
467
+ // Re-throw non-abort errors
468
+ throw error;
469
+ }
470
+ }
471
+
472
+ /**
473
+ * Stream with native API tool execution
474
+ */
475
+ private async streamWithNativeTools(
476
+ request: NormalizedRequest,
477
+ options: StreamOptions
478
+ ): Promise<NormalizedResponse | AbortedResponse> {
479
+ const startTime = Date.now();
480
+ const {
481
+ onChunk,
482
+ onContentBlockUpdate,
483
+ onToolCalls,
484
+ onPreToolContent,
485
+ onUsage,
486
+ maxToolDepth = 10,
487
+ signal,
488
+ } = options;
489
+
490
+ let toolDepth = 0;
491
+ let totalUsage: BasicUsage = { inputTokens: 0, outputTokens: 0 };
492
+ let lastStopReason: StopReason = 'end_turn';
493
+ let rawRequest: unknown;
494
+ let rawResponse: unknown;
495
+
496
+ // Track all text for rawAssistantText
497
+ let allTextAccumulated = '';
498
+
499
+ // Track executed tool calls and results
500
+ const executedToolCalls: ToolCall[] = [];
501
+ const executedToolResults: ToolResult[] = [];
502
+
503
+ // Build messages array that we'll update with tool results
504
+ let messages = [...request.messages];
505
+ let allContentBlocks: ContentBlock[] = [];
506
+
507
+ try {
508
+ // Tool execution loop
509
+ while (toolDepth <= maxToolDepth) {
510
+ // Build provider request with native tools
511
+ const providerRequest = this.buildNativeToolRequest(request, messages);
512
+ rawRequest = providerRequest;
513
+
514
+ // Stream from provider
515
+ let textAccumulated = '';
516
+ let blockIndex = 0;
517
+ const streamResult = await this.streamOnce(
518
+ providerRequest,
519
+ {
520
+ onChunk: (chunk) => {
521
+ textAccumulated += chunk;
522
+ allTextAccumulated += chunk;
523
+ // For native mode, emit text chunks with basic metadata
524
+ // TODO: Use native API content_block events for richer metadata
525
+ const meta: ChunkMeta = {
526
+ type: 'text',
527
+ visible: true,
528
+ blockIndex,
529
+ };
530
+ onChunk?.(chunk, meta);
531
+ },
532
+ onContentBlock: onContentBlockUpdate
533
+ ? (index: number, block: unknown) => onContentBlockUpdate(index, block as ContentBlock)
534
+ : undefined,
535
+ },
536
+ { signal }
537
+ );
538
+
539
+ rawResponse = streamResult.raw;
540
+ lastStopReason = this.mapStopReason(streamResult.stopReason);
541
+
542
+ // Accumulate usage
543
+ totalUsage.inputTokens += streamResult.usage.inputTokens;
544
+ totalUsage.outputTokens += streamResult.usage.outputTokens;
545
+ onUsage?.(totalUsage);
546
+
547
+ // Parse content blocks from response
548
+ const responseBlocks = this.parseProviderContent(streamResult.content);
549
+ allContentBlocks.push(...responseBlocks);
550
+
551
+ // Check for tool_use blocks
552
+ const toolUseBlocks = responseBlocks.filter(
553
+ (b): b is ContentBlock & { type: 'tool_use' } => b.type === 'tool_use'
554
+ );
555
+
556
+ if (onToolCalls && toolUseBlocks.length > 0 && lastStopReason === 'tool_use') {
557
+ // Notify about pre-tool content
558
+ const textBlocks = responseBlocks.filter(b => b.type === 'text');
559
+ if (onPreToolContent && textBlocks.length > 0) {
560
+ const preToolText = textBlocks.map(b => (b as any).text).join('');
561
+ if (preToolText.trim()) {
562
+ await onPreToolContent(preToolText);
563
+ }
564
+ }
565
+
566
+ // Convert to normalized ToolCall[]
567
+ const toolCalls: ToolCall[] = toolUseBlocks.map(block => ({
568
+ id: block.id,
569
+ name: block.name,
570
+ input: block.input as Record<string, unknown>,
571
+ }));
572
+
573
+ // Track tool calls
574
+ executedToolCalls.push(...toolCalls);
575
+
576
+ // Execute tools
577
+ const context: ToolContext = {
578
+ rawText: JSON.stringify(toolUseBlocks),
579
+ preamble: textAccumulated,
580
+ depth: toolDepth,
581
+ previousResults: executedToolResults,
582
+ accumulated: allTextAccumulated,
583
+ };
584
+
585
+ const results = await onToolCalls(toolCalls, context);
586
+
587
+ // Track tool results
588
+ executedToolResults.push(...results);
589
+
590
+ // Add tool results to content blocks
591
+ for (const result of results) {
592
+ allContentBlocks.push({
593
+ type: 'tool_result',
594
+ toolUseId: result.toolUseId,
595
+ content: result.content,
596
+ isError: result.isError,
597
+ });
598
+ }
599
+
600
+ // Add assistant message with tool use and user message with tool results
601
+ messages.push({
602
+ participant: 'Claude',
603
+ content: responseBlocks,
604
+ });
605
+
606
+ messages.push({
607
+ participant: 'User',
608
+ content: results.map(r => ({
609
+ type: 'tool_result' as const,
610
+ toolUseId: r.toolUseId,
611
+ content: r.content,
612
+ isError: r.isError,
613
+ })),
614
+ });
615
+
616
+ toolDepth++;
617
+ continue;
618
+ }
619
+
620
+ // No more tools, we're done
621
+ break;
622
+ }
623
+
624
+ const durationMs = Date.now() - startTime;
625
+
626
+ return {
627
+ content: allContentBlocks,
628
+ rawAssistantText: allTextAccumulated,
629
+ toolCalls: executedToolCalls,
630
+ toolResults: executedToolResults,
631
+ stopReason: lastStopReason,
632
+ usage: totalUsage,
633
+ details: {
634
+ stop: {
635
+ reason: lastStopReason,
636
+ wasTruncated: lastStopReason === 'max_tokens',
637
+ },
638
+ usage: { ...totalUsage },
639
+ timing: {
640
+ totalDurationMs: durationMs,
641
+ attempts: 1,
642
+ },
643
+ model: {
644
+ requested: request.config.model,
645
+ actual: request.config.model,
646
+ provider: this.adapter.name,
647
+ },
648
+ cache: {
649
+ markersInRequest: 0,
650
+ tokensCreated: 0,
651
+ tokensRead: 0,
652
+ hitRatio: 0,
653
+ },
654
+ },
655
+ raw: {
656
+ request: rawRequest,
657
+ response: rawResponse,
658
+ },
659
+ };
660
+ } catch (error) {
661
+ // Check if this is an abort error
662
+ if (this.isAbortError(error)) {
663
+ return this.buildAbortedResponse(
664
+ allTextAccumulated,
665
+ totalUsage,
666
+ executedToolCalls,
667
+ executedToolResults,
668
+ 'user'
669
+ );
670
+ }
671
+ // Re-throw non-abort errors
672
+ throw error;
673
+ }
674
+ }
675
+
676
+ /**
677
+ * Build a provider request with native tool support
678
+ */
679
+ private buildNativeToolRequest(
680
+ request: NormalizedRequest,
681
+ messages: typeof request.messages
682
+ ): any {
683
+ // Convert messages to provider format
684
+ const providerMessages: any[] = [];
685
+
686
+ for (const msg of messages) {
687
+ const isAssistant = msg.participant === 'Claude';
688
+ const role = isAssistant ? 'assistant' : 'user';
689
+
690
+ // Convert content blocks
691
+ const content: any[] = [];
692
+ for (const block of msg.content) {
693
+ if (block.type === 'text') {
694
+ content.push({ type: 'text', text: block.text });
695
+ } else if (block.type === 'tool_use') {
696
+ content.push({
697
+ type: 'tool_use',
698
+ id: block.id,
699
+ name: block.name,
700
+ input: block.input,
701
+ });
702
+ } else if (block.type === 'tool_result') {
703
+ content.push({
704
+ type: 'tool_result',
705
+ tool_use_id: block.toolUseId,
706
+ content: block.content,
707
+ is_error: block.isError,
708
+ });
709
+ }
710
+ }
711
+
712
+ providerMessages.push({ role, content });
713
+ }
714
+
715
+ // Convert tools to provider format
716
+ const tools = request.tools?.map(tool => ({
717
+ name: tool.name,
718
+ description: tool.description,
719
+ input_schema: tool.inputSchema,
720
+ }));
721
+
722
+ return {
723
+ model: request.config.model,
724
+ maxTokens: request.config.maxTokens,
725
+ temperature: request.config.temperature,
726
+ messages: providerMessages,
727
+ system: request.system,
728
+ tools,
729
+ extra: request.providerParams,
730
+ };
731
+ }
732
+
733
+ /**
734
+ * Parse provider response content into normalized blocks
735
+ */
736
+ private parseProviderContent(content: unknown): ContentBlock[] {
737
+ if (!content) return [];
738
+
739
+ if (Array.isArray(content)) {
740
+ const blocks: ContentBlock[] = [];
741
+ for (const item of content) {
742
+ if (item.type === 'text') {
743
+ blocks.push({ type: 'text', text: item.text });
744
+ } else if (item.type === 'tool_use') {
745
+ blocks.push({
746
+ type: 'tool_use',
747
+ id: item.id,
748
+ name: item.name,
749
+ input: item.input,
750
+ });
751
+ } else if (item.type === 'thinking') {
752
+ blocks.push({
753
+ type: 'thinking',
754
+ thinking: item.thinking,
755
+ signature: item.signature,
756
+ });
757
+ }
758
+ }
759
+ return blocks;
760
+ }
761
+
762
+ if (typeof content === 'string') {
763
+ return [{ type: 'text', text: content }];
764
+ }
765
+
766
+ return [];
767
+ }
768
+
769
+ // ==========================================================================
770
+ // Internal Methods
771
+ // ==========================================================================
772
+
773
+ private transformRequest(request: NormalizedRequest): {
774
+ providerRequest: any;
775
+ prefillResult: PrefillTransformResult;
776
+ } {
777
+ // For now, use prefill transform
778
+ // In full implementation, would check capabilities and choose transform
779
+
780
+ // Extract user-provided stop sequences to pass to prefill transform
781
+ const additionalStopSequences = Array.isArray(request.stopSequences)
782
+ ? request.stopSequences
783
+ : request.stopSequences?.sequences ?? [];
784
+
785
+ // Request-level maxParticipantsForStop takes precedence over instance config
786
+ const maxParticipantsForStop = request.maxParticipantsForStop
787
+ ?? this.config.maxParticipantsForStop
788
+ ?? 10;
789
+
790
+ const prefillResult = transformToPrefill(request, {
791
+ assistantName: this.config.assistantParticipant ?? 'Claude',
792
+ promptCaching: true, // Enable cache control by default
793
+ additionalStopSequences,
794
+ maxParticipantsForStop,
795
+ });
796
+
797
+ // Use the pre-built messages from prefill transform
798
+ // These include cache_control markers on appropriate content blocks
799
+ const providerRequest = {
800
+ model: request.config.model,
801
+ maxTokens: request.config.maxTokens,
802
+ temperature: request.config.temperature,
803
+ messages: prefillResult.messages,
804
+ // System is now part of messages with cache_control
805
+ // But we still pass it for providers that need it separately
806
+ system: prefillResult.systemContent.length > 0
807
+ ? prefillResult.systemContent
808
+ : undefined,
809
+ stopSequences: prefillResult.stopSequences,
810
+ extra: request.providerParams,
811
+ };
812
+
813
+ return { providerRequest, prefillResult };
814
+ }
815
+
816
+ private async streamOnce(
817
+ request: any,
818
+ callbacks: { onChunk: (chunk: string) => void; onContentBlock?: (index: number, block: unknown) => void },
819
+ options: { signal?: AbortSignal }
820
+ ) {
821
+ return await this.adapter.stream(request, callbacks, options);
822
+ }
823
+
824
+ private buildContinuationRequest(
825
+ originalRequest: NormalizedRequest,
826
+ prefillResult: PrefillTransformResult,
827
+ accumulated: string
828
+ ): any {
829
+ // Anthropic quirk: assistant content cannot end with trailing whitespace
830
+ const trimmedAccumulated = accumulated.trimEnd();
831
+
832
+ // Build continuation messages: keep all messages up to last assistant,
833
+ // then replace/add the accumulated content
834
+ const messages = [...prefillResult.messages];
835
+
836
+ // Find and update the last assistant message, or add one
837
+ let foundAssistant = false;
838
+ for (let i = messages.length - 1; i >= 0; i--) {
839
+ if (messages[i]?.role === 'assistant') {
840
+ messages[i] = { role: 'assistant', content: trimmedAccumulated };
841
+ foundAssistant = true;
842
+ break;
843
+ }
844
+ }
845
+
846
+ if (!foundAssistant) {
847
+ messages.push({ role: 'assistant', content: trimmedAccumulated });
848
+ }
849
+
850
+ return {
851
+ model: originalRequest.config.model,
852
+ maxTokens: originalRequest.config.maxTokens,
853
+ temperature: originalRequest.config.temperature,
854
+ messages,
855
+ system: prefillResult.systemContent.length > 0
856
+ ? prefillResult.systemContent
857
+ : undefined,
858
+ stopSequences: prefillResult.stopSequences,
859
+ extra: originalRequest.providerParams,
860
+ };
861
+ }
862
+
863
+ /**
864
+ * Build continuation request with split-turn image injection.
865
+ *
866
+ * When tool results contain images in prefill mode, we must:
867
+ * 1. End assistant turn mid-XML (after text content, inside <function_results>)
868
+ * 2. Insert user turn with only image content
869
+ * 3. Continue with assistant prefill containing closing XML tags
870
+ *
871
+ * This is required because Anthropic API only allows images in user turns.
872
+ *
873
+ * Structure:
874
+ * ```
875
+ * Assistant: "...response..." + <function_results><result>text content
876
+ * User: [image blocks]
877
+ * Assistant (prefill): </result></function_results>
878
+ * ```
879
+ */
880
+ private buildContinuationRequestWithImages(
881
+ originalRequest: NormalizedRequest,
882
+ prefillResult: PrefillTransformResult,
883
+ accumulated: string,
884
+ images: ProviderImageBlock[],
885
+ afterImageXml: string
886
+ ): any {
887
+ // Anthropic quirk: assistant content cannot end with trailing whitespace
888
+ const trimmedAccumulated = accumulated.trimEnd();
889
+
890
+ // Build messages: copy existing, then modify/add for split-turn
891
+ const messages: any[] = [];
892
+
893
+ // Copy all messages except the last assistant message
894
+ for (const msg of prefillResult.messages) {
895
+ if (msg.role === 'assistant') {
896
+ // Skip - we'll add our own assistant messages
897
+ continue;
898
+ }
899
+ messages.push({ ...msg });
900
+ }
901
+
902
+ // Add assistant message with accumulated content (ends mid-XML)
903
+ messages.push({
904
+ role: 'assistant',
905
+ content: trimmedAccumulated,
906
+ });
907
+
908
+ // Add user message with just the images
909
+ messages.push({
910
+ role: 'user',
911
+ content: images,
912
+ });
913
+
914
+ // Add assistant prefill with closing XML tags
915
+ // Anthropic quirk: assistant content cannot end with trailing whitespace
916
+ const trimmedAfterXml = afterImageXml.trimEnd();
917
+ messages.push({
918
+ role: 'assistant',
919
+ content: trimmedAfterXml,
920
+ });
921
+
922
+ return {
923
+ model: originalRequest.config.model,
924
+ maxTokens: originalRequest.config.maxTokens,
925
+ temperature: originalRequest.config.temperature,
926
+ messages,
927
+ system: prefillResult.systemContent.length > 0
928
+ ? prefillResult.systemContent
929
+ : undefined,
930
+ stopSequences: prefillResult.stopSequences,
931
+ extra: originalRequest.providerParams,
932
+ };
933
+ }
934
+
935
+ private transformResponse(
936
+ providerResponse: any,
937
+ request: NormalizedRequest,
938
+ prefillResult: PrefillTransformResult,
939
+ startTime: number,
940
+ attempts: number,
941
+ rawRequest?: unknown
942
+ ): NormalizedResponse {
943
+ // Extract text from response
944
+ const content: ContentBlock[] = [];
945
+ const toolCalls: ToolCall[] = [];
946
+
947
+ // Build raw text for rawAssistantText
948
+ let rawAssistantText = '';
949
+
950
+ if (Array.isArray(providerResponse.content)) {
951
+ for (const block of providerResponse.content) {
952
+ if (block.type === 'text') {
953
+ content.push({ type: 'text', text: block.text });
954
+ rawAssistantText += block.text;
955
+ } else if (block.type === 'tool_use') {
956
+ content.push({
957
+ type: 'tool_use',
958
+ id: block.id,
959
+ name: block.name,
960
+ input: block.input,
961
+ });
962
+ toolCalls.push({
963
+ id: block.id,
964
+ name: block.name,
965
+ input: block.input,
966
+ });
967
+ } else if (block.type === 'thinking') {
968
+ content.push({
969
+ type: 'thinking',
970
+ thinking: block.thinking,
971
+ signature: block.signature,
972
+ });
973
+ }
974
+ }
975
+ } else if (typeof providerResponse.content === 'string') {
976
+ content.push({ type: 'text', text: providerResponse.content });
977
+ rawAssistantText = providerResponse.content;
978
+ }
979
+
980
+ const stopReason = this.mapStopReason(providerResponse.stopReason);
981
+ const durationMs = Date.now() - startTime;
982
+ const usage = {
983
+ inputTokens: providerResponse.usage.inputTokens,
984
+ outputTokens: providerResponse.usage.outputTokens,
985
+ };
986
+
987
+ return {
988
+ content,
989
+ rawAssistantText,
990
+ toolCalls,
991
+ toolResults: [], // complete() doesn't execute tools
992
+ stopReason,
993
+ usage,
994
+ details: {
995
+ stop: {
996
+ reason: stopReason,
997
+ triggeredSequence: providerResponse.stopSequence,
998
+ wasTruncated: stopReason === 'max_tokens',
999
+ },
1000
+ usage: {
1001
+ inputTokens: providerResponse.usage.inputTokens,
1002
+ outputTokens: providerResponse.usage.outputTokens,
1003
+ cacheCreationTokens: providerResponse.usage.cacheCreationTokens,
1004
+ cacheReadTokens: providerResponse.usage.cacheReadTokens,
1005
+ },
1006
+ timing: {
1007
+ totalDurationMs: durationMs,
1008
+ attempts,
1009
+ },
1010
+ model: {
1011
+ requested: request.config.model,
1012
+ actual: providerResponse.model,
1013
+ provider: this.adapter.name,
1014
+ },
1015
+ cache: {
1016
+ markersInRequest: prefillResult.cacheMarkersApplied,
1017
+ tokensCreated: providerResponse.usage.cacheCreationTokens ?? 0,
1018
+ tokensRead: providerResponse.usage.cacheReadTokens ?? 0,
1019
+ hitRatio: this.calculateCacheHitRatio(providerResponse.usage),
1020
+ },
1021
+ },
1022
+ raw: {
1023
+ request: rawRequest ?? null,
1024
+ response: providerResponse.raw,
1025
+ },
1026
+ };
1027
+ }
1028
+
1029
+ private buildFinalResponse(
1030
+ accumulated: string,
1031
+ contentBlocks: ContentBlock[],
1032
+ stopReason: StopReason,
1033
+ usage: BasicUsage,
1034
+ request: NormalizedRequest,
1035
+ prefillResult: PrefillTransformResult,
1036
+ startTime: number,
1037
+ attempts: number,
1038
+ rawRequest: unknown,
1039
+ rawResponse: unknown,
1040
+ executedToolCalls: ToolCall[] = [],
1041
+ executedToolResults: ToolResult[] = []
1042
+ ): NormalizedResponse {
1043
+ // Parse accumulated text into structured content blocks
1044
+ // This extracts thinking, tool_use, tool_result, and text blocks
1045
+ let finalContent: ContentBlock[];
1046
+ let toolCalls: ToolCall[];
1047
+ let toolResults: ToolResult[];
1048
+
1049
+ if (contentBlocks.length > 0) {
1050
+ // Native mode - content blocks already structured
1051
+ finalContent = contentBlocks;
1052
+ toolCalls = executedToolCalls;
1053
+ toolResults = executedToolResults;
1054
+ } else {
1055
+ // XML mode - parse accumulated text into blocks
1056
+ const parsed = parseAccumulatedIntoBlocks(accumulated);
1057
+ finalContent = parsed.blocks;
1058
+ toolCalls = parsed.toolCalls.length > 0 ? parsed.toolCalls : executedToolCalls;
1059
+ toolResults = parsed.toolResults.length > 0 ? parsed.toolResults : executedToolResults;
1060
+ }
1061
+
1062
+ const durationMs = Date.now() - startTime;
1063
+
1064
+ return {
1065
+ content: finalContent,
1066
+ rawAssistantText: accumulated,
1067
+ toolCalls,
1068
+ toolResults,
1069
+ stopReason,
1070
+ usage,
1071
+ details: {
1072
+ stop: {
1073
+ reason: stopReason,
1074
+ wasTruncated: stopReason === 'max_tokens',
1075
+ },
1076
+ usage: {
1077
+ ...usage,
1078
+ },
1079
+ timing: {
1080
+ totalDurationMs: durationMs,
1081
+ attempts,
1082
+ },
1083
+ model: {
1084
+ requested: request.config.model,
1085
+ actual: request.config.model, // TODO: get from response
1086
+ provider: this.adapter.name,
1087
+ },
1088
+ cache: {
1089
+ markersInRequest: 0,
1090
+ tokensCreated: 0,
1091
+ tokensRead: 0,
1092
+ hitRatio: 0,
1093
+ },
1094
+ },
1095
+ raw: {
1096
+ request: rawRequest,
1097
+ response: rawResponse,
1098
+ },
1099
+ };
1100
+ }
1101
+
1102
+ private mapStopReason(providerReason: string): StopReason {
1103
+ switch (providerReason) {
1104
+ case 'end_turn':
1105
+ return 'end_turn';
1106
+ case 'max_tokens':
1107
+ return 'max_tokens';
1108
+ case 'stop_sequence':
1109
+ return 'stop_sequence';
1110
+ case 'tool_use':
1111
+ return 'tool_use';
1112
+ default:
1113
+ return 'end_turn';
1114
+ }
1115
+ }
1116
+
1117
+ private calculateCacheHitRatio(usage: any): number {
1118
+ const cacheRead = usage.cacheReadTokens ?? 0;
1119
+ const total = usage.inputTokens ?? 0;
1120
+ if (total === 0) return 0;
1121
+ return cacheRead / total;
1122
+ }
1123
+
1124
+ private calculateRetryDelay(attempt: number): number {
1125
+ const { retryDelayMs, backoffMultiplier, maxRetryDelayMs } = this.retryConfig;
1126
+ const delay = retryDelayMs * Math.pow(backoffMultiplier, attempt - 1);
1127
+ return Math.min(delay, maxRetryDelayMs);
1128
+ }
1129
+
1130
+ private sleep(ms: number): Promise<void> {
1131
+ return new Promise((resolve) => setTimeout(resolve, ms));
1132
+ }
1133
+
1134
+ /**
1135
+ * Check if an error is an abort error
1136
+ */
1137
+ private isAbortError(error: unknown): boolean {
1138
+ if (error instanceof Error) {
1139
+ // Standard AbortError
1140
+ if (error.name === 'AbortError') return true;
1141
+ // Anthropic SDK abort
1142
+ if (error.message.includes('aborted') || error.message.includes('abort')) return true;
1143
+ }
1144
+ // DOMException for browser environments
1145
+ if (typeof DOMException !== 'undefined' && error instanceof DOMException) {
1146
+ return error.name === 'AbortError';
1147
+ }
1148
+ return false;
1149
+ }
1150
+
1151
+ /**
1152
+ * Build an AbortedResponse from current execution state
1153
+ */
1154
+ private buildAbortedResponse(
1155
+ accumulated: string,
1156
+ usage: BasicUsage,
1157
+ toolCalls: ToolCall[],
1158
+ toolResults: ToolResult[],
1159
+ reason: 'user' | 'timeout' | 'error'
1160
+ ): AbortedResponse {
1161
+ // Parse accumulated text into content blocks for partial content
1162
+ const { blocks } = parseAccumulatedIntoBlocks(accumulated);
1163
+
1164
+ return {
1165
+ aborted: true,
1166
+ partialContent: blocks.length > 0 ? blocks : undefined,
1167
+ partialUsage: usage,
1168
+ reason,
1169
+ rawAssistantText: accumulated || undefined,
1170
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
1171
+ toolResults: toolResults.length > 0 ? toolResults : undefined,
1172
+ };
1173
+ }
1174
+ }