@just-every/ensemble 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +490 -100
  2. package/package.json +1 -1
package/README.md CHANGED
@@ -5,6 +5,14 @@
5
5
 
6
6
  A unified interface for interacting with multiple LLM providers including OpenAI, Anthropic Claude, Google Gemini, Deepseek, Grok, and OpenRouter.
7
7
 
8
+ ## Why Use an Ensemble Approach?
9
+
10
+ The ensemble pattern - rotating between multiple LLM providers dynamically - offers compelling advantages over relying on a single model. Research has shown that sampling multiple reasoning chains and using consensus answers can improve performance by double-digit margins on complex tasks. By automating this at runtime rather than prompt-engineering time, ensemble delivers more reliable and robust AI interactions.
11
+
12
+ Beyond accuracy improvements, ensemble requests provide practical benefits for production systems. Different models carry unique training biases and stylistic patterns - rotating between them dilutes individual quirks and prevents conversations from getting "stuck" in one voice. The approach also ensures resilience: when one provider experiences an outage, quota limit, or latency spike, requests seamlessly route to alternatives. You can optimize costs by routing simple tasks to cheaper models while reserving premium models for complex reasoning. Need regex help? Route to a code-specialized model. Need emotional calibration? Use a dialogue expert. The ensemble gives you this granularity without complex conditional logic.
13
+
14
+ Perhaps most importantly, the ensemble approach future-proofs your application. Model quality and pricing change weekly in the fast-moving LLM landscape. With ensemble, you can trial newcomers on a small percentage of traffic, compare real metrics, then scale up or roll back within minutes - all without changing your code.
15
+
8
16
  ## Features
9
17
 
10
18
  - **Multi-provider support**: Claude, OpenAI, Gemini, Deepseek, Grok, OpenRouter
@@ -96,187 +104,569 @@ for await (const event of earlyStream) {
96
104
 
97
105
  ## API Reference
98
106
 
99
- ### `request(model, messages, options?)`
107
+ ### Core Functions
100
108
 
101
- Main function for making LLM requests using the AsyncGenerator API.
109
+ #### `request(model, messages, options?)`
110
+
111
+ Main function for making LLM requests with streaming responses and automatic tool execution.
102
112
 
103
113
  **Parameters:**
104
- - `model` (string): Model identifier
105
- - `messages` (ResponseInput): Array of message objects
114
+ - `model` (string): Model identifier (e.g., 'gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash')
115
+ - `messages` (ResponseInput): Array of message objects in the conversation
106
116
  - `options` (RequestOptions): Optional configuration object
107
117
 
108
118
  **Returns:** `AsyncGenerator<EnsembleStreamEvent>` - An async generator that yields streaming events
109
119
 
110
120
  ```typescript
111
121
  interface RequestOptions {
112
- agentId?: string;
113
- tools?: ToolFunction[];
114
- modelSettings?: ModelSettings;
115
- modelClass?: ModelClassID;
122
+ agentId?: string; // Identifier for logging/tracking
123
+ tools?: ToolFunction[]; // Array of tool definitions
124
+ toolChoice?: ToolChoice; // Control tool selection behavior
125
+ maxToolCalls?: number; // Max rounds of tool execution (default: 10, 0 = disabled)
126
+ processToolCall?: (toolCalls: ToolCall[]) => Promise<any>; // Custom tool handler
127
+ modelSettings?: ModelSettings; // Temperature, maxTokens, etc.
128
+ modelClass?: ModelClassID; // 'standard' | 'code' | 'reasoning' | 'monologue'
129
+ responseFormat?: ResponseFormat; // JSON mode or structured output
130
+ maxImageDimension?: number; // Auto-resize images (default: provider-specific)
131
+ fallbackModels?: string[]; // Models to try if primary fails
116
132
  }
117
133
 
118
- // Usage with try/catch for error handling
119
- try {
120
- for await (const event of request(model, messages, options)) {
121
- // Process events
122
- }
123
- } catch (error) {
124
- // Handle errors
125
- }
134
+ // Stream event types
135
+ type EnsembleStreamEvent =
136
+ | { type: 'text_delta', delta: string }
137
+ | { type: 'text', text: string }
138
+ | { type: 'message_delta', content: string }
139
+ | { type: 'message_complete', content: string }
140
+ | { type: 'tool_start', tool_calls: ToolCall[] }
141
+ | { type: 'cost_update', usage: TokenUsage }
142
+ | { type: 'stream_end', timestamp: string }
143
+ | { type: 'error', error: Error };
126
144
  ```
127
145
 
128
146
 
129
- ### Model Provider Interface
147
+ ### Working with Models
130
148
 
131
- Each provider implements the `ModelProvider` interface:
149
+ #### Model Selection
132
150
 
133
151
  ```typescript
134
- interface ModelProvider {
135
- createResponseStream(
136
- model: string,
137
- messages: ResponseInput,
138
- agent: EnsembleAgent
139
- ): AsyncGenerator<EnsembleStreamEvent>;
152
+ import { getModelFromClass, findModel, MODEL_REGISTRY } from '@just-every/ensemble';
153
+
154
+ // Get best model for a specific task type
155
+ const codeModel = getModelFromClass('code'); // Returns best available code model
156
+ const reasoningModel = getModelFromClass('reasoning'); // For complex reasoning tasks
157
+
158
+ // Check if a model exists
159
+ const modelInfo = findModel('gpt-4o');
160
+ if (modelInfo) {
161
+ console.log(`Provider: ${modelInfo.provider}`);
162
+ console.log(`Input cost: $${modelInfo.inputCost}/million tokens`);
163
+ }
164
+
165
+ // List all available models
166
+ for (const [modelName, info] of Object.entries(MODEL_REGISTRY)) {
167
+ console.log(`${modelName}: ${info.provider}`);
140
168
  }
141
169
  ```
142
170
 
143
- ### Utilities
171
+ #### Model Classes
172
+
173
+ - **standard**: General-purpose models for everyday tasks
174
+ - **code**: Optimized for programming and technical tasks
175
+ - **reasoning**: Advanced models for complex logical reasoning
176
+ - **monologue**: Models supporting extended thinking/reasoning traces
144
177
 
145
- - **Cost Tracking**: Monitor token usage and costs with cost_tracker
146
- - **Quota Management**: Track API quotas and rate limits with quota_tracker
147
- - **Image Processing**: Convert images to text, resize, and optimize
148
- - **Logging System**: Pluggable request/response logging with configurable backends
149
- - **Communication**: Logging and debugging utilities
150
- - **Delta Buffer**: Handle streaming response deltas
151
- - **AsyncQueue**: Generic async queue for bridging callbacks to async iteration (used internally)
178
+ ### Message Types
152
179
 
153
- ### Automatic Tool Execution
180
+ ```typescript
181
+ // User/Assistant messages
182
+ interface TextMessage {
183
+ type: 'message';
184
+ role: 'user' | 'assistant' | 'developer';
185
+ content: string | MessageContent[];
186
+ status?: 'completed' | 'in_progress';
187
+ }
154
188
 
155
- The `request` function provides automatic tool execution:
189
+ // Multi-modal content
190
+ type MessageContent =
191
+ | { type: 'input_text', text: string }
192
+ | { type: 'input_image', image_url: string, detail?: 'auto' | 'low' | 'high' }
193
+ | { type: 'tool_use', id: string, name: string, arguments: any };
194
+
195
+ // Tool-related messages
196
+ interface FunctionCall {
197
+ type: 'function_call';
198
+ id: string;
199
+ name: string;
200
+ arguments: string;
201
+ }
202
+
203
+ interface FunctionCallOutput {
204
+ type: 'function_call_output';
205
+ id: string;
206
+ output: string;
207
+ }
208
+ ```
209
+
210
+ ## Common Use Cases
211
+
212
+ ### 1. Basic Conversations
156
213
 
157
214
  ```typescript
158
215
  import { request } from '@just-every/ensemble';
159
216
 
160
- // Define tools
161
- const tools = [{
162
- function: async ({ city }: { city: string }) => {
163
- return `Weather in ${city}: Sunny, 72°F`;
217
+ // Simple Q&A
218
+ for await (const event of request('gpt-4o-mini', [
219
+ { type: 'message', role: 'user', content: 'Explain quantum computing in simple terms' }
220
+ ])) {
221
+ if (event.type === 'text_delta') {
222
+ process.stdout.write(event.delta);
223
+ }
224
+ }
225
+
226
+ // Multi-turn conversation
227
+ const messages = [
228
+ { type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
229
+ { type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
230
+ { type: 'message', role: 'assistant', content: 'Here are several ways...' },
231
+ { type: 'message', role: 'user', content: 'What about using flexbox?' }
232
+ ];
233
+
234
+ for await (const event of request('claude-3.5-sonnet', messages)) {
235
+ // Handle streaming response
236
+ }
237
+ ```
238
+
239
+ ### 2. Tool Calling & Function Execution
240
+
241
+ ```typescript
242
+ // Define tools with TypeScript types
243
+ interface WeatherParams {
244
+ city: string;
245
+ unit?: 'celsius' | 'fahrenheit';
246
+ }
247
+
248
+ const weatherTool: ToolFunction = {
249
+ function: async ({ city, unit = 'celsius' }: WeatherParams) => {
250
+ // Real implementation would call weather API
251
+ const temp = unit === 'celsius' ? 22 : 72;
252
+ return `${temp}°${unit[0].toUpperCase()} in ${city}`;
164
253
  },
165
254
  definition: {
166
255
  type: 'function',
167
256
  function: {
168
257
  name: 'get_weather',
169
- description: 'Get weather for a city',
258
+ description: 'Get current weather for a city',
170
259
  parameters: {
171
260
  type: 'object',
172
261
  properties: {
173
- city: { type: 'string', description: 'City name' }
262
+ city: { type: 'string', description: 'City name' },
263
+ unit: {
264
+ type: 'string',
265
+ enum: ['celsius', 'fahrenheit'],
266
+ description: 'Temperature unit'
267
+ }
174
268
  },
175
269
  required: ['city']
176
270
  }
177
271
  }
178
272
  }
179
- }];
180
-
181
- // Make a request with automatic tool execution
182
- const response = await request('claude-3-5-sonnet-20241022', [
183
- { type: 'message', role: 'user', content: 'What\'s the weather in Paris?' }
184
- ], {
185
- tools,
186
- maxToolCalls: 10 // Maximum rounds of tool execution (default: 10)
273
+ };
274
+
275
+ // Use with automatic execution
276
+ for await (const event of request('gpt-4o', [
277
+ { type: 'message', role: 'user', content: 'What\'s the weather in Tokyo and New York?' }
278
+ ], { tools: [weatherTool] })) {
279
+ if (event.type === 'tool_start') {
280
+ console.log('Calling tool:', event.tool_calls[0].function.name);
281
+ } else if (event.type === 'text_delta') {
282
+ process.stdout.write(event.delta);
283
+ }
284
+ }
285
+ ```
286
+
287
+ ### 3. Model Selection Strategies
288
+
289
+ ```typescript
290
+ import { getModelFromClass, request } from '@just-every/ensemble';
291
+
292
+ // Route based on task type
293
+ async function intelligentRequest(task: string, messages: ResponseInput) {
294
+ let model: string;
295
+
296
+ if (task.includes('code') || task.includes('debug')) {
297
+ model = getModelFromClass('code'); // Best code model
298
+ } else if (task.includes('analyze') || task.includes('reasoning')) {
299
+ model = getModelFromClass('reasoning'); // Best reasoning model
300
+ } else {
301
+ model = getModelFromClass('standard'); // Cost-effective general model
302
+ }
303
+
304
+ console.log(`Using ${model} for ${task}`);
305
+
306
+ return request(model, messages, {
307
+ fallbackModels: ['gpt-4o-mini', 'claude-3-5-haiku'] // Fallback options
308
+ });
309
+ }
310
+
311
+ // Use model rotation for consensus
312
+ async function consensusRequest(messages: ResponseInput) {
313
+ const models = ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash'];
314
+ const responses = [];
315
+
316
+ for (const model of models) {
317
+ const stream = request(model, messages);
318
+ const result = await convertStreamToMessages(stream);
319
+ responses.push(result.fullResponse);
320
+ }
321
+
322
+ // Analyze responses for consensus
323
+ return analyzeConsensus(responses);
324
+ }
325
+ ```
326
+
327
+ ### 4. Structured Output & JSON Mode
328
+
329
+ ```typescript
330
+ // JSON mode for reliable parsing
331
+ const jsonStream = request('gpt-4o', [
332
+ { type: 'message', role: 'user', content: 'List 3 programming languages with their pros/cons as JSON' }
333
+ ], {
334
+ responseFormat: { type: 'json_object' }
187
335
  });
188
336
 
189
- console.log(response); // "Based on the current weather data, Paris is experiencing sunny weather..."
337
+ let jsonContent = '';
338
+ for await (const event of jsonStream) {
339
+ if (event.type === 'text_delta') {
340
+ jsonContent += event.delta;
341
+ }
342
+ }
343
+
344
+ const data = JSON.parse(jsonContent);
345
+
346
+ // Structured output with schema validation
347
+ const schema = {
348
+ type: 'object',
349
+ properties: {
350
+ name: { type: 'string' },
351
+ age: { type: 'number' },
352
+ skills: {
353
+ type: 'array',
354
+ items: { type: 'string' }
355
+ }
356
+ },
357
+ required: ['name', 'age', 'skills']
358
+ };
190
359
 
191
- // Custom tool execution handler
192
- const responseWithCustomHandler = await request('gpt-4o', messages, {
193
- tools,
194
- processToolCall: async (toolCalls) => {
195
- // Custom tool execution logic
196
- console.log('Executing tools:', toolCalls);
197
- return toolCalls.map(tc => 'Custom result');
360
+ const structuredStream = request('gpt-4o', [
361
+ { type: 'message', role: 'user', content: 'Generate a developer profile' }
362
+ ], {
363
+ responseFormat: {
364
+ type: 'json_schema',
365
+ json_schema: {
366
+ name: 'developer_profile',
367
+ schema: schema,
368
+ strict: true
369
+ }
198
370
  }
199
371
  });
200
372
  ```
201
373
 
202
- ### Stream Conversion
374
+ ### 5. Image Processing
203
375
 
204
- Convert streaming events into conversation history for chaining LLM calls:
376
+ ```typescript
377
+ // Analyze images with vision models
378
+ const imageStream = request('gpt-4o', [
379
+ {
380
+ type: 'message',
381
+ role: 'user',
382
+ content: [
383
+ { type: 'input_text', text: 'What\'s in this image? Describe any text you see.' },
384
+ {
385
+ type: 'input_image',
386
+ image_url: 'data:image/jpeg;base64,...',
387
+ detail: 'high' // 'auto' | 'low' | 'high'
388
+ }
389
+ ]
390
+ }
391
+ ], {
392
+ maxImageDimension: 2048 // Auto-resize large images
393
+ });
394
+
395
+ // Multiple images
396
+ const comparison = request('claude-3.5-sonnet', [
397
+ {
398
+ type: 'message',
399
+ role: 'user',
400
+ content: [
401
+ { type: 'input_text', text: 'Compare these two designs:' },
402
+ { type: 'input_image', image_url: 'https://example.com/design1.png' },
403
+ { type: 'input_image', image_url: 'https://example.com/design2.png' }
404
+ ]
405
+ }
406
+ ]);
407
+ ```
408
+
409
+ ### 6. Error Handling & Resilience
410
+
411
+ ```typescript
412
+ import { isRateLimitError, isAuthenticationError } from '@just-every/ensemble';
413
+
414
+ async function robustRequest(model: string, messages: ResponseInput, options?: RequestOptions) {
415
+ const maxRetries = 3;
416
+ let lastError;
417
+
418
+ for (let i = 0; i < maxRetries; i++) {
419
+ try {
420
+ const events = [];
421
+ for await (const event of request(model, messages, options)) {
422
+ if (event.type === 'error') {
423
+ throw event.error;
424
+ }
425
+ events.push(event);
426
+ }
427
+ return events;
428
+
429
+ } catch (error) {
430
+ lastError = error;
431
+
432
+ if (isAuthenticationError(error)) {
433
+ throw error; // Don't retry auth errors
434
+ }
435
+
436
+ if (isRateLimitError(error)) {
437
+ const waitTime = error.retryAfter || Math.pow(2, i) * 1000;
438
+ console.log(`Rate limited. Waiting ${waitTime}ms...`);
439
+ await new Promise(resolve => setTimeout(resolve, waitTime));
440
+ continue;
441
+ }
442
+
443
+ // Try fallback model
444
+ if (options?.fallbackModels?.[i]) {
445
+ model = options.fallbackModels[i];
446
+ console.log(`Falling back to ${model}`);
447
+ continue;
448
+ }
449
+ }
450
+ }
451
+
452
+ throw lastError;
453
+ }
454
+ ```
455
+
456
+ ## Utilities
457
+
458
+ ### Cost & Usage Tracking
459
+
460
+ ```typescript
461
+ import { costTracker, quotaTracker } from '@just-every/ensemble';
462
+
463
+ // Track costs across requests
464
+ for await (const event of request('gpt-4o', messages)) {
465
+ if (event.type === 'cost_update') {
466
+ console.log(`Tokens: ${event.usage.input_tokens} in, ${event.usage.output_tokens} out`);
467
+ console.log(`Cost: $${event.usage.total_cost.toFixed(4)}`);
468
+ }
469
+ }
470
+
471
+ // Get cumulative costs
472
+ const usage = costTracker.getAllUsage();
473
+ for (const [model, stats] of Object.entries(usage)) {
474
+ console.log(`${model}: $${stats.total_cost.toFixed(2)} for ${stats.request_count} requests`);
475
+ }
476
+
477
+ // Check quotas before making requests
478
+ if (quotaTracker.canMakeRequest('gpt-4o', 'openai')) {
479
+ // Safe to proceed
480
+ } else {
481
+ const resetTime = quotaTracker.getResetTime('openai');
482
+ console.log(`Quota exceeded. Resets at ${resetTime}`);
483
+ }
484
+ ```
485
+
486
+ ### Stream Conversion & Chaining
205
487
 
206
488
  ```typescript
207
489
  import { convertStreamToMessages, chainRequests } from '@just-every/ensemble';
208
490
 
209
- // Convert a single stream to messages
210
- const stream = request('claude-3-5-sonnet-20241022', [
211
- { type: 'message', role: 'user', content: 'Tell me a joke' }
491
+ // Convert stream to conversation history
492
+ const stream = request('claude-3.5-sonnet', [
493
+ { type: 'message', role: 'user', content: 'Write a haiku about coding' }
212
494
  ]);
213
495
 
214
496
  const result = await convertStreamToMessages(stream);
215
- console.log(result.messages); // Array of ResponseInput items
216
- console.log(result.fullResponse); // Complete response text
497
+ console.log(result.messages); // Full conversation history
498
+ console.log(result.fullResponse); // Just the assistant's response
217
499
 
218
- // Chain multiple requests together
219
- const chainResult = await chainRequests([
500
+ // Chain multiple models for multi-step tasks
501
+ const analysis = await chainRequests([
502
+ {
503
+ model: getModelFromClass('code'),
504
+ systemPrompt: 'Analyze this code for bugs and security issues',
505
+ },
220
506
  {
221
- model: 'claude-3-5-sonnet-20241022',
222
- systemPrompt: 'You are a helpful assistant that tells jokes.',
507
+ model: getModelFromClass('reasoning'),
508
+ systemPrompt: 'Prioritize the issues found and suggest fixes',
223
509
  },
224
510
  {
225
- model: 'gpt-4o',
226
- systemPrompt: 'Rate the previous joke on a scale of 1-10.',
511
+ model: 'gpt-4o-mini',
512
+ systemPrompt: 'Summarize the analysis in 3 bullet points',
227
513
  }
228
514
  ], [
229
- { type: 'message', role: 'user', content: 'Tell me a joke about programming' }
515
+ { type: 'message', role: 'user', content: codeToAnalyze }
230
516
  ]);
517
+ ```
231
518
 
232
- // Custom tool processing during conversion
233
- const streamWithTools = request('gpt-4o', messages, {
234
- tools: [weatherTool]
235
- });
519
+ ### Image Utilities
236
520
 
237
- const toolResult = await convertStreamToMessages(streamWithTools, [], {
238
- processToolCall: async (toolCalls) => {
239
- // Process tool calls and return results
240
- const results = await Promise.all(
241
- toolCalls.map(call => processMyTool(call))
242
- );
243
- return results;
244
- },
245
- onThinking: (msg) => console.log('Thinking:', msg.content),
246
- onResponse: (msg) => console.log('Response:', msg.content),
247
- });
521
+ ```typescript
522
+ import { resizeImageForModel, imageToText } from '@just-every/ensemble';
523
+
524
+ // Auto-resize for specific model requirements
525
+ const resized = await resizeImageForModel(
526
+ base64ImageData,
527
+ 'gpt-4o', // Different models have different size limits
528
+ { maxDimension: 2048 }
529
+ );
530
+
531
+ // Extract text from images
532
+ const extractedText = await imageToText(imageBuffer);
533
+ console.log('Found text:', extractedText);
248
534
  ```
249
535
 
250
- ### Logging
251
-
252
- The ensemble package includes a pluggable logging system for LLM requests and responses:
536
+ ### Logging & Debugging
253
537
 
254
538
  ```typescript
255
539
  import { setEnsembleLogger, EnsembleLogger } from '@just-every/ensemble';
256
540
 
257
- // Implement custom logger
258
- class CustomLogger implements EnsembleLogger {
541
+ // Production-ready logger example
542
+ class ProductionLogger implements EnsembleLogger {
259
543
  log_llm_request(agentId: string, providerName: string, model: string, requestData: unknown, timestamp?: Date): string {
260
- // Log request and return request ID for correlation
261
- console.log(`Request: ${agentId} -> ${providerName}/${model}`);
262
- return `req_${Date.now()}`;
544
+ const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
545
+
546
+ // Log to your monitoring system
547
+ logger.info('LLM Request', {
548
+ requestId,
549
+ agentId,
550
+ provider: providerName,
551
+ model,
552
+ timestamp,
553
+ // Be careful not to log sensitive data
554
+ messageCount: (requestData as any).messages?.length,
555
+ hasTools: !!(requestData as any).tools?.length
556
+ });
557
+
558
+ return requestId;
263
559
  }
264
560
 
265
561
  log_llm_response(requestId: string | undefined, responseData: unknown, timestamp?: Date): void {
266
- // Log response using request ID
267
- console.log(`Response for: ${requestId}`);
562
+ const response = responseData as any;
563
+
564
+ logger.info('LLM Response', {
565
+ requestId,
566
+ timestamp,
567
+ inputTokens: response.usage?.input_tokens,
568
+ outputTokens: response.usage?.output_tokens,
569
+ totalCost: response.usage?.total_cost,
570
+ cached: response.usage?.cache_creation_input_tokens > 0
571
+ });
268
572
  }
269
573
 
270
574
  log_llm_error(requestId: string | undefined, errorData: unknown, timestamp?: Date): void {
271
- // Log error using request ID
272
- console.log(`Error for: ${requestId}`);
575
+ logger.error('LLM Error', {
576
+ requestId,
577
+ timestamp,
578
+ error: errorData,
579
+ // Include retry information if available
580
+ retryAfter: (errorData as any).retryAfter
581
+ });
273
582
  }
274
583
  }
275
584
 
276
- // Enable logging
277
- setEnsembleLogger(new CustomLogger());
585
+ // Enable logging globally
586
+ setEnsembleLogger(new ProductionLogger());
587
+
588
+ // Debug mode for development
589
+ if (process.env.NODE_ENV === 'development') {
590
+ setEnsembleLogger({
591
+ log_llm_request: (agent, provider, model, data) => {
592
+ console.log(`[${new Date().toISOString()}] → ${provider}/${model}`);
593
+ return Date.now().toString();
594
+ },
595
+ log_llm_response: (id, data) => {
596
+ const response = data as any;
597
+ console.log(`[${new Date().toISOString()}] ← ${response.usage?.total_tokens} tokens`);
598
+ },
599
+ log_llm_error: (id, error) => {
600
+ console.error(`[${new Date().toISOString()}] ✗ Error:`, error);
601
+ }
602
+ });
603
+ }
604
+ ```
605
+
606
+ ## Advanced Topics
607
+
608
+ ### Custom Model Providers
609
+
610
+ ```typescript
611
+ import { ModelProvider, registerExternalModel } from '@just-every/ensemble';
612
+
613
+ // Register a custom model
614
+ registerExternalModel({
615
+ id: 'my-custom-model',
616
+ provider: 'custom',
617
+ inputCost: 0.001,
618
+ outputCost: 0.002,
619
+ contextWindow: 8192,
620
+ maxOutput: 4096,
621
+ supportsTools: true,
622
+ supportsVision: false,
623
+ supportsStreaming: true
624
+ });
625
+
626
+ // Use your custom model
627
+ const stream = request('my-custom-model', messages);
628
+ ```
278
629
 
279
- // All ensemble requests will now be logged
630
+ ### Performance Optimization
631
+
632
+ ```typescript
633
+ // Batch processing with concurrency control
634
+ async function batchProcess(items: string[], concurrency = 3) {
635
+ const results = [];
636
+ const queue = [...items];
637
+
638
+ async function worker() {
639
+ while (queue.length > 0) {
640
+ const item = queue.shift()!;
641
+ const stream = request('gpt-4o-mini', [
642
+ { type: 'message', role: 'user', content: `Process: ${item}` }
643
+ ]);
644
+
645
+ const result = await convertStreamToMessages(stream);
646
+ results.push({ item, result: result.fullResponse });
647
+ }
648
+ }
649
+
650
+ // Run workers concurrently
651
+ await Promise.all(Array(concurrency).fill(null).map(() => worker()));
652
+ return results;
653
+ }
654
+
655
+ // Stream multiple requests in parallel
656
+ async function parallelStreaming(prompts: string[]) {
657
+ const streams = prompts.map(prompt =>
658
+ request('claude-3.5-haiku', [
659
+ { type: 'message', role: 'user', content: prompt }
660
+ ])
661
+ );
662
+
663
+ // Process all streams concurrently
664
+ const results = await Promise.all(
665
+ streams.map(stream => convertStreamToMessages(stream))
666
+ );
667
+
668
+ return results.map(r => r.fullResponse);
669
+ }
280
670
  ```
281
671
 
282
672
  ## Environment Variables
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@just-every/ensemble",
3
- "version": "0.1.13",
3
+ "version": "0.1.15",
4
4
  "description": "LLM provider abstraction layer with unified streaming interface",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",