@just-every/ensemble 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,6 +32,23 @@ Perhaps most importantly, the ensemble approach future-proofs your application.
32
32
  npm install @just-every/ensemble
33
33
  ```
34
34
 
35
+ ### Migration from OpenAI SDK
36
+
37
+ If you're currently using the OpenAI SDK, migration is simple:
38
+
39
+ ```typescript
40
+ // Before:
41
+ import OpenAI from 'openai';
42
+ const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
43
+
44
+ // After:
45
+ import OpenAIEnsemble from '@just-every/ensemble/openai-compat';
46
+ const client = OpenAIEnsemble;
47
+
48
+ // Your existing code works unchanged!
49
+ const completion = await client.chat.completions.create({ /* ... */ });
50
+ ```
51
+
35
52
  ## Quick Start
36
53
 
37
54
  ```typescript
@@ -104,187 +121,628 @@ for await (const event of earlyStream) {
104
121
 
105
122
  ## API Reference
106
123
 
107
- ### `request(model, messages, options?)`
124
+ ### Core Functions
108
125
 
109
- Main function for making LLM requests using the AsyncGenerator API.
126
+ #### `request(model, messages, options?)`
127
+
128
+ Main function for making LLM requests with streaming responses and automatic tool execution.
110
129
 
111
130
  **Parameters:**
112
- - `model` (string): Model identifier
113
- - `messages` (ResponseInput): Array of message objects
131
+ - `model` (string): Model identifier (e.g., 'gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash')
132
+ - `messages` (ResponseInput): Array of message objects in the conversation
114
133
  - `options` (RequestOptions): Optional configuration object
115
134
 
116
135
  **Returns:** `AsyncGenerator<EnsembleStreamEvent>` - An async generator that yields streaming events
117
136
 
118
137
  ```typescript
119
138
  interface RequestOptions {
120
- agentId?: string;
121
- tools?: ToolFunction[];
122
- modelSettings?: ModelSettings;
123
- modelClass?: ModelClassID;
139
+ agentId?: string; // Identifier for logging/tracking
140
+ tools?: ToolFunction[]; // Array of tool definitions
141
+ toolChoice?: ToolChoice; // Control tool selection behavior
142
+ maxToolCalls?: number; // Max rounds of tool execution (default: 10, 0 = disabled)
143
+ processToolCall?: (toolCalls: ToolCall[]) => Promise<any>; // Custom tool handler
144
+ modelSettings?: ModelSettings; // Temperature, maxTokens, etc.
145
+ modelClass?: ModelClassID; // 'standard' | 'code' | 'reasoning' | 'monologue'
146
+ responseFormat?: ResponseFormat; // JSON mode or structured output
147
+ maxImageDimension?: number; // Auto-resize images (default: provider-specific)
148
+ fallbackModels?: string[]; // Models to try if primary fails
124
149
  }
125
150
 
126
- // Usage with try/catch for error handling
127
- try {
128
- for await (const event of request(model, messages, options)) {
129
- // Process events
130
- }
131
- } catch (error) {
132
- // Handle errors
133
- }
151
+ // Stream event types
152
+ type EnsembleStreamEvent =
153
+ | { type: 'text_delta', delta: string }
154
+ | { type: 'text', text: string }
155
+ | { type: 'message_delta', content: string }
156
+ | { type: 'message_complete', content: string }
157
+ | { type: 'tool_start', tool_calls: ToolCall[] }
158
+ | { type: 'cost_update', usage: TokenUsage }
159
+ | { type: 'stream_end', timestamp: string }
160
+ | { type: 'error', error: Error };
134
161
  ```
135
162
 
136
163
 
137
- ### Model Provider Interface
164
+ ### Working with Models
138
165
 
139
- Each provider implements the `ModelProvider` interface:
166
+ #### Model Selection
140
167
 
141
168
  ```typescript
142
- interface ModelProvider {
143
- createResponseStream(
144
- model: string,
145
- messages: ResponseInput,
146
- agent: EnsembleAgent
147
- ): AsyncGenerator<EnsembleStreamEvent>;
169
+ import { getModelFromClass, findModel, MODEL_REGISTRY } from '@just-every/ensemble';
170
+
171
+ // Get best model for a specific task type
172
+ const codeModel = getModelFromClass('code'); // Returns best available code model
173
+ const reasoningModel = getModelFromClass('reasoning'); // For complex reasoning tasks
174
+
175
+ // Check if a model exists
176
+ const modelInfo = findModel('gpt-4o');
177
+ if (modelInfo) {
178
+ console.log(`Provider: ${modelInfo.provider}`);
179
+ console.log(`Input cost: $${modelInfo.inputCost}/million tokens`);
180
+ }
181
+
182
+ // List all available models
183
+ for (const [modelName, info] of Object.entries(MODEL_REGISTRY)) {
184
+ console.log(`${modelName}: ${info.provider}`);
148
185
  }
149
186
  ```
150
187
 
151
- ### Utilities
188
+ #### Model Classes
152
189
 
153
- - **Cost Tracking**: Monitor token usage and costs with cost_tracker
154
- - **Quota Management**: Track API quotas and rate limits with quota_tracker
155
- - **Image Processing**: Convert images to text, resize, and optimize
156
- - **Logging System**: Pluggable request/response logging with configurable backends
157
- - **Communication**: Logging and debugging utilities
158
- - **Delta Buffer**: Handle streaming response deltas
159
- - **AsyncQueue**: Generic async queue for bridging callbacks to async iteration (used internally)
190
+ - **standard**: General-purpose models for everyday tasks
191
+ - **code**: Optimized for programming and technical tasks
192
+ - **reasoning**: Advanced models for complex logical reasoning
193
+ - **monologue**: Models supporting extended thinking/reasoning traces
160
194
 
161
- ### Automatic Tool Execution
195
+ ### Message Types
162
196
 
163
- The `request` function provides automatic tool execution:
197
+ ```typescript
198
+ // User/Assistant messages
199
+ interface TextMessage {
200
+ type: 'message';
201
+ role: 'user' | 'assistant' | 'developer';
202
+ content: string | MessageContent[];
203
+ status?: 'completed' | 'in_progress';
204
+ }
205
+
206
+ // Multi-modal content
207
+ type MessageContent =
208
+ | { type: 'input_text', text: string }
209
+ | { type: 'input_image', image_url: string, detail?: 'auto' | 'low' | 'high' }
210
+ | { type: 'tool_use', id: string, name: string, arguments: any };
211
+
212
+ // Tool-related messages
213
+ interface FunctionCall {
214
+ type: 'function_call';
215
+ id: string;
216
+ name: string;
217
+ arguments: string;
218
+ }
219
+
220
+ interface FunctionCallOutput {
221
+ type: 'function_call_output';
222
+ id: string;
223
+ output: string;
224
+ }
225
+ ```
226
+
227
+ ## Common Use Cases
228
+
229
+ ### 1. Basic Conversations
164
230
 
165
231
  ```typescript
166
232
  import { request } from '@just-every/ensemble';
167
233
 
168
- // Define tools
169
- const tools = [{
170
- function: async ({ city }: { city: string }) => {
171
- return `Weather in ${city}: Sunny, 72°F`;
234
+ // Simple Q&A
235
+ for await (const event of request('gpt-4o-mini', [
236
+ { type: 'message', role: 'user', content: 'Explain quantum computing in simple terms' }
237
+ ])) {
238
+ if (event.type === 'text_delta') {
239
+ process.stdout.write(event.delta);
240
+ }
241
+ }
242
+
243
+ // Multi-turn conversation
244
+ const messages = [
245
+ { type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
246
+ { type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
247
+ { type: 'message', role: 'assistant', content: 'Here are several ways...' },
248
+ { type: 'message', role: 'user', content: 'What about using flexbox?' }
249
+ ];
250
+
251
+ for await (const event of request('claude-3.5-sonnet', messages)) {
252
+ // Handle streaming response
253
+ }
254
+ ```
255
+
256
+ ### 2. Tool Calling & Function Execution
257
+
258
+ ```typescript
259
+ // Define tools with TypeScript types
260
+ interface WeatherParams {
261
+ city: string;
262
+ unit?: 'celsius' | 'fahrenheit';
263
+ }
264
+
265
+ const weatherTool: ToolFunction = {
266
+ function: async ({ city, unit = 'celsius' }: WeatherParams) => {
267
+ // Real implementation would call weather API
268
+ const temp = unit === 'celsius' ? 22 : 72;
269
+ return `${temp}°${unit[0].toUpperCase()} in ${city}`;
172
270
  },
173
271
  definition: {
174
272
  type: 'function',
175
273
  function: {
176
274
  name: 'get_weather',
177
- description: 'Get weather for a city',
275
+ description: 'Get current weather for a city',
178
276
  parameters: {
179
277
  type: 'object',
180
278
  properties: {
181
- city: { type: 'string', description: 'City name' }
279
+ city: { type: 'string', description: 'City name' },
280
+ unit: {
281
+ type: 'string',
282
+ enum: ['celsius', 'fahrenheit'],
283
+ description: 'Temperature unit'
284
+ }
182
285
  },
183
286
  required: ['city']
184
287
  }
185
288
  }
186
289
  }
187
- }];
188
-
189
- // Make a request with automatic tool execution
190
- const response = await request('claude-3-5-sonnet-20241022', [
191
- { type: 'message', role: 'user', content: 'What\'s the weather in Paris?' }
192
- ], {
193
- tools,
194
- maxToolCalls: 10 // Maximum rounds of tool execution (default: 10)
290
+ };
291
+
292
+ // Use with automatic execution
293
+ for await (const event of request('gpt-4o', [
294
+ { type: 'message', role: 'user', content: 'What\'s the weather in Tokyo and New York?' }
295
+ ], { tools: [weatherTool] })) {
296
+ if (event.type === 'tool_start') {
297
+ console.log('Calling tool:', event.tool_calls[0].function.name);
298
+ } else if (event.type === 'text_delta') {
299
+ process.stdout.write(event.delta);
300
+ }
301
+ }
302
+ ```
303
+
304
+ ### 3. Model Selection Strategies
305
+
306
+ ```typescript
307
+ import { getModelFromClass, request } from '@just-every/ensemble';
308
+
309
+ // Route based on task type
310
+ async function intelligentRequest(task: string, messages: ResponseInput) {
311
+ let model: string;
312
+
313
+ if (task.includes('code') || task.includes('debug')) {
314
+ model = getModelFromClass('code'); // Best code model
315
+ } else if (task.includes('analyze') || task.includes('reasoning')) {
316
+ model = getModelFromClass('reasoning'); // Best reasoning model
317
+ } else {
318
+ model = getModelFromClass('standard'); // Cost-effective general model
319
+ }
320
+
321
+ console.log(`Using ${model} for ${task}`);
322
+
323
+ return request(model, messages, {
324
+ fallbackModels: ['gpt-4o-mini', 'claude-3-5-haiku'] // Fallback options
325
+ });
326
+ }
327
+
328
+ // Use model rotation for consensus
329
+ async function consensusRequest(messages: ResponseInput) {
330
+ const models = ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash'];
331
+ const responses = [];
332
+
333
+ for (const model of models) {
334
+ const stream = request(model, messages);
335
+ const result = await convertStreamToMessages(stream);
336
+ responses.push(result.fullResponse);
337
+ }
338
+
339
+ // Analyze responses for consensus
340
+ return analyzeConsensus(responses);
341
+ }
342
+ ```
343
+
344
+ ### 4. Structured Output & JSON Mode
345
+
346
+ ```typescript
347
+ // JSON mode for reliable parsing
348
+ const jsonStream = request('gpt-4o', [
349
+ { type: 'message', role: 'user', content: 'List 3 programming languages with their pros/cons as JSON' }
350
+ ], {
351
+ responseFormat: { type: 'json_object' }
195
352
  });
196
353
 
197
- console.log(response); // "Based on the current weather data, Paris is experiencing sunny weather..."
354
+ let jsonContent = '';
355
+ for await (const event of jsonStream) {
356
+ if (event.type === 'text_delta') {
357
+ jsonContent += event.delta;
358
+ }
359
+ }
198
360
 
199
- // Custom tool execution handler
200
- const responseWithCustomHandler = await request('gpt-4o', messages, {
201
- tools,
202
- processToolCall: async (toolCalls) => {
203
- // Custom tool execution logic
204
- console.log('Executing tools:', toolCalls);
205
- return toolCalls.map(tc => 'Custom result');
361
+ const data = JSON.parse(jsonContent);
362
+
363
+ // Structured output with schema validation
364
+ const schema = {
365
+ type: 'object',
366
+ properties: {
367
+ name: { type: 'string' },
368
+ age: { type: 'number' },
369
+ skills: {
370
+ type: 'array',
371
+ items: { type: 'string' }
372
+ }
373
+ },
374
+ required: ['name', 'age', 'skills']
375
+ };
376
+
377
+ const structuredStream = request('gpt-4o', [
378
+ { type: 'message', role: 'user', content: 'Generate a developer profile' }
379
+ ], {
380
+ responseFormat: {
381
+ type: 'json_schema',
382
+ json_schema: {
383
+ name: 'developer_profile',
384
+ schema: schema,
385
+ strict: true
386
+ }
206
387
  }
207
388
  });
208
389
  ```
209
390
 
210
- ### Stream Conversion
211
-
212
- Convert streaming events into conversation history for chaining LLM calls:
391
+ ### 5. Image Processing
213
392
 
214
393
  ```typescript
215
- import { convertStreamToMessages, chainRequests } from '@just-every/ensemble';
394
+ // Analyze images with vision models
395
+ const imageStream = request('gpt-4o', [
396
+ {
397
+ type: 'message',
398
+ role: 'user',
399
+ content: [
400
+ { type: 'input_text', text: 'What\'s in this image? Describe any text you see.' },
401
+ {
402
+ type: 'input_image',
403
+ image_url: 'data:image/jpeg;base64,...',
404
+ detail: 'high' // 'auto' | 'low' | 'high'
405
+ }
406
+ ]
407
+ }
408
+ ], {
409
+ maxImageDimension: 2048 // Auto-resize large images
410
+ });
216
411
 
217
- // Convert a single stream to messages
218
- const stream = request('claude-3-5-sonnet-20241022', [
219
- { type: 'message', role: 'user', content: 'Tell me a joke' }
412
+ // Multiple images
413
+ const comparison = request('claude-3.5-sonnet', [
414
+ {
415
+ type: 'message',
416
+ role: 'user',
417
+ content: [
418
+ { type: 'input_text', text: 'Compare these two designs:' },
419
+ { type: 'input_image', image_url: 'https://example.com/design1.png' },
420
+ { type: 'input_image', image_url: 'https://example.com/design2.png' }
421
+ ]
422
+ }
220
423
  ]);
424
+ ```
221
425
 
222
- const result = await convertStreamToMessages(stream);
223
- console.log(result.messages); // Array of ResponseInput items
224
- console.log(result.fullResponse); // Complete response text
426
+ ### 6. Error Handling & Resilience
225
427
 
226
- // Chain multiple requests together
227
- const chainResult = await chainRequests([
428
+ ```typescript
429
+ import { isRateLimitError, isAuthenticationError } from '@just-every/ensemble';
430
+
431
+ async function robustRequest(model: string, messages: ResponseInput, options?: RequestOptions) {
432
+ const maxRetries = 3;
433
+ let lastError;
434
+
435
+ for (let i = 0; i < maxRetries; i++) {
436
+ try {
437
+ const events = [];
438
+ for await (const event of request(model, messages, options)) {
439
+ if (event.type === 'error') {
440
+ throw event.error;
441
+ }
442
+ events.push(event);
443
+ }
444
+ return events;
445
+
446
+ } catch (error) {
447
+ lastError = error;
448
+
449
+ if (isAuthenticationError(error)) {
450
+ throw error; // Don't retry auth errors
451
+ }
452
+
453
+ if (isRateLimitError(error)) {
454
+ const waitTime = error.retryAfter || Math.pow(2, i) * 1000;
455
+ console.log(`Rate limited. Waiting ${waitTime}ms...`);
456
+ await new Promise(resolve => setTimeout(resolve, waitTime));
457
+ continue;
458
+ }
459
+
460
+ // Try fallback model
461
+ if (options?.fallbackModels?.[i]) {
462
+ model = options.fallbackModels[i];
463
+ console.log(`Falling back to ${model}`);
464
+ continue;
465
+ }
466
+ }
467
+ }
468
+
469
+ throw lastError;
470
+ }
471
+ ```
472
+
473
+ ## Utilities
474
+
475
+ ### Cost & Usage Tracking
476
+
477
+ ```typescript
478
+ import { costTracker, quotaTracker } from '@just-every/ensemble';
479
+
480
+ // Track costs across requests
481
+ for await (const event of request('gpt-4o', messages)) {
482
+ if (event.type === 'cost_update') {
483
+ console.log(`Tokens: ${event.usage.input_tokens} in, ${event.usage.output_tokens} out`);
484
+ console.log(`Cost: $${event.usage.total_cost.toFixed(4)}`);
485
+ }
486
+ }
487
+
488
+ // Get cumulative costs
489
+ const usage = costTracker.getAllUsage();
490
+ for (const [model, stats] of Object.entries(usage)) {
491
+ console.log(`${model}: $${stats.total_cost.toFixed(2)} for ${stats.request_count} requests`);
492
+ }
493
+
494
+ // Check quotas before making requests
495
+ if (quotaTracker.canMakeRequest('gpt-4o', 'openai')) {
496
+ // Safe to proceed
497
+ } else {
498
+ const resetTime = quotaTracker.getResetTime('openai');
499
+ console.log(`Quota exceeded. Resets at ${resetTime}`);
500
+ }
501
+ ```
502
+
503
+ ### Stream Conversion & Chaining
504
+
505
+ ```typescript
506
+ import { convertStreamToMessages, chainRequests } from '@just-every/ensemble';
507
+
508
+ let currentMessages = [
509
+ { type: 'message', role: 'user', content: 'Write a haiku about coding' },
510
+ { type: 'message', role: 'user', content: 'Make it really long' }
511
+ ];
512
+
513
+ let messages = [
514
+ { type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
515
+ { type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
516
+ ];
517
+ messages = [...messages, ...(await convertStreamToMessages(request('claude-4-sonnet', messages))).messages];
518
+ messages = [...messages, ...(await convertStreamToMessages(request(getModelFromClass('reasoning_mini'), messages))).messages];
519
+ messages = [...messages, ...(await convertStreamToMessages(request('gemini-2.5-flash', messages))).messages];
520
+
521
+
522
+ console.log(result.messages); // Full conversation history
523
+ console.log(result.fullResponse); // Just the assistant's response
524
+
525
+ // Chain multiple models for multi-step tasks
526
+ const analysis = await chainRequests(
527
+ [
528
+ { type: 'message', role: 'user', content: codeToAnalyze }
529
+ ],
530
+ [
228
531
  {
229
- model: 'claude-3-5-sonnet-20241022',
230
- systemPrompt: 'You are a helpful assistant that tells jokes.',
532
+ model: getModelFromClass('code'),
533
+ systemPrompt: 'Analyze this code for bugs and security issues',
231
534
  },
232
535
  {
233
- model: 'gpt-4o',
234
- systemPrompt: 'Rate the previous joke on a scale of 1-10.',
536
+ model: getModelFromClass('reasoning'),
537
+ systemPrompt: 'Prioritize the issues found and suggest fixes',
538
+ },
539
+ {
540
+ model: 'gpt-4.1-mini',
541
+ systemPrompt: 'Summarize the analysis in 3 bullet points',
235
542
  }
236
- ], [
237
- { type: 'message', role: 'user', content: 'Tell me a joke about programming' }
238
543
  ]);
544
+ ```
239
545
 
240
- // Custom tool processing during conversion
241
- const streamWithTools = request('gpt-4o', messages, {
242
- tools: [weatherTool]
243
- });
546
+ ### Image Utilities
244
547
 
245
- const toolResult = await convertStreamToMessages(streamWithTools, [], {
246
- processToolCall: async (toolCalls) => {
247
- // Process tool calls and return results
248
- const results = await Promise.all(
249
- toolCalls.map(call => processMyTool(call))
250
- );
251
- return results;
252
- },
253
- onThinking: (msg) => console.log('Thinking:', msg.content),
254
- onResponse: (msg) => console.log('Response:', msg.content),
255
- });
548
+ ```typescript
549
+ import { resizeImageForModel, imageToText } from '@just-every/ensemble';
550
+
551
+ // Auto-resize for specific model requirements
552
+ const resized = await resizeImageForModel(
553
+ base64ImageData,
554
+ 'gpt-4o', // Different models have different size limits
555
+ { maxDimension: 2048 }
556
+ );
557
+
558
+ // Extract text from images
559
+ const extractedText = await imageToText(imageBuffer);
560
+ console.log('Found text:', extractedText);
256
561
  ```
257
562
 
258
- ### Logging
259
-
260
- The ensemble package includes a pluggable logging system for LLM requests and responses:
563
+ ### Logging & Debugging
261
564
 
262
565
  ```typescript
263
566
  import { setEnsembleLogger, EnsembleLogger } from '@just-every/ensemble';
264
567
 
265
- // Implement custom logger
266
- class CustomLogger implements EnsembleLogger {
568
+ // Production-ready logger example
569
+ class ProductionLogger implements EnsembleLogger {
267
570
  log_llm_request(agentId: string, providerName: string, model: string, requestData: unknown, timestamp?: Date): string {
268
- // Log request and return request ID for correlation
269
- console.log(`Request: ${agentId} -> ${providerName}/${model}`);
270
- return `req_${Date.now()}`;
571
+ const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
572
+
573
+ // Log to your monitoring system
574
+ logger.info('LLM Request', {
575
+ requestId,
576
+ agentId,
577
+ provider: providerName,
578
+ model,
579
+ timestamp,
580
+ // Be careful not to log sensitive data
581
+ messageCount: (requestData as any).messages?.length,
582
+ hasTools: !!(requestData as any).tools?.length
583
+ });
584
+
585
+ return requestId;
271
586
  }
272
587
 
273
588
  log_llm_response(requestId: string | undefined, responseData: unknown, timestamp?: Date): void {
274
- // Log response using request ID
275
- console.log(`Response for: ${requestId}`);
589
+ const response = responseData as any;
590
+
591
+ logger.info('LLM Response', {
592
+ requestId,
593
+ timestamp,
594
+ inputTokens: response.usage?.input_tokens,
595
+ outputTokens: response.usage?.output_tokens,
596
+ totalCost: response.usage?.total_cost,
597
+ cached: response.usage?.cache_creation_input_tokens > 0
598
+ });
276
599
  }
277
600
 
278
601
  log_llm_error(requestId: string | undefined, errorData: unknown, timestamp?: Date): void {
279
- // Log error using request ID
280
- console.log(`Error for: ${requestId}`);
602
+ logger.error('LLM Error', {
603
+ requestId,
604
+ timestamp,
605
+ error: errorData,
606
+ // Include retry information if available
607
+ retryAfter: (errorData as any).retryAfter
608
+ });
281
609
  }
282
610
  }
283
611
 
284
- // Enable logging
285
- setEnsembleLogger(new CustomLogger());
612
+ // Enable logging globally
613
+ setEnsembleLogger(new ProductionLogger());
286
614
 
287
- // All ensemble requests will now be logged
615
+ // Debug mode for development
616
+ if (process.env.NODE_ENV === 'development') {
617
+ setEnsembleLogger({
618
+ log_llm_request: (agent, provider, model, data) => {
619
+ console.log(`[${new Date().toISOString()}] → ${provider}/${model}`);
620
+ return Date.now().toString();
621
+ },
622
+ log_llm_response: (id, data) => {
623
+ const response = data as any;
624
+ console.log(`[${new Date().toISOString()}] ← ${response.usage?.total_tokens} tokens`);
625
+ },
626
+ log_llm_error: (id, error) => {
627
+ console.error(`[${new Date().toISOString()}] ✗ Error:`, error);
628
+ }
629
+ });
630
+ }
631
+ ```
632
+
633
+ ## Advanced Topics
634
+
635
+ ### OpenAI SDK Compatibility
636
+
637
+ Ensemble provides a drop-in replacement for the OpenAI SDK, allowing you to use any supported model with OpenAI's familiar API:
638
+
639
+ ```typescript
640
+ import OpenAIEnsemble from '@just-every/ensemble/openai-compat';
641
+ // Or named imports: import { chat, completions } from '@just-every/ensemble';
642
+
643
+ // Replace OpenAI client
644
+ const openai = OpenAIEnsemble; // Instead of: new OpenAI({ apiKey: '...' })
645
+
646
+ // Use exactly like OpenAI SDK - but with any model!
647
+ const completion = await openai.chat.completions.create({
648
+ model: 'claude-3.5-sonnet', // or 'gpt-4o', 'gemini-2.0-flash', etc.
649
+ messages: [
650
+ { role: 'system', content: 'You are a helpful assistant.' },
651
+ { role: 'user', content: 'Hello!' }
652
+ ],
653
+ temperature: 0.7
654
+ });
655
+
656
+ console.log(completion.choices[0].message.content);
657
+
658
+ // Streaming
659
+ const stream = await openai.chat.completions.create({
660
+ model: 'gpt-4o-mini',
661
+ messages: [{ role: 'user', content: 'Tell me a story' }],
662
+ stream: true
663
+ });
664
+
665
+ for await (const chunk of stream) {
666
+ process.stdout.write(chunk.choices[0].delta.content || '');
667
+ }
668
+
669
+ // Legacy completions API also supported
670
+ const legacyCompletion = await openai.completions.create({
671
+ model: 'deepseek-chat',
672
+ prompt: 'Once upon a time',
673
+ max_tokens: 100
674
+ });
675
+ ```
676
+
677
+ This compatibility layer supports:
678
+ - All chat.completions.create parameters (temperature, tools, response_format, etc.)
679
+ - Streaming and non-streaming responses
680
+ - Tool/function calling
681
+ - Legacy completions.create API
682
+ - Proper TypeScript types matching OpenAI's SDK
683
+
684
+ ### Custom Model Providers
685
+
686
+ ```typescript
687
+ import { ModelProvider, registerExternalModel } from '@just-every/ensemble';
688
+
689
+ // Register a custom model
690
+ registerExternalModel({
691
+ id: 'my-custom-model',
692
+ provider: 'custom',
693
+ inputCost: 0.001,
694
+ outputCost: 0.002,
695
+ contextWindow: 8192,
696
+ maxOutput: 4096,
697
+ supportsTools: true,
698
+ supportsVision: false,
699
+ supportsStreaming: true
700
+ });
701
+
702
+ // Use your custom model
703
+ const stream = request('my-custom-model', messages);
704
+ ```
705
+
706
+ ### Performance Optimization
707
+
708
+ ```typescript
709
+ // Batch processing with concurrency control
710
+ async function batchProcess(items: string[], concurrency = 3) {
711
+ const results = [];
712
+ const queue = [...items];
713
+
714
+ async function worker() {
715
+ while (queue.length > 0) {
716
+ const item = queue.shift()!;
717
+ const stream = request('gpt-4o-mini', [
718
+ { type: 'message', role: 'user', content: `Process: ${item}` }
719
+ ]);
720
+
721
+ const result = await convertStreamToMessages(stream);
722
+ results.push({ item, result: result.fullResponse });
723
+ }
724
+ }
725
+
726
+ // Run workers concurrently
727
+ await Promise.all(Array(concurrency).fill(null).map(() => worker()));
728
+ return results;
729
+ }
730
+
731
+ // Stream multiple requests in parallel
732
+ async function parallelStreaming(prompts: string[]) {
733
+ const streams = prompts.map(prompt =>
734
+ request('claude-3.5-haiku', [
735
+ { type: 'message', role: 'user', content: prompt }
736
+ ])
737
+ );
738
+
739
+ // Process all streams concurrently
740
+ const results = await Promise.all(
741
+ streams.map(stream => convertStreamToMessages(stream))
742
+ );
743
+
744
+ return results.map(r => r.fullResponse);
745
+ }
288
746
  ```
289
747
 
290
748
  ## Environment Variables