@just-every/ensemble 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +562 -104
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/openai-compat.d.ts +79 -0
- package/dist/openai-compat.d.ts.map +1 -0
- package/dist/openai-compat.js +581 -0
- package/dist/openai-compat.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/stream_converter.d.ts +2 -2
- package/dist/utils/stream_converter.d.ts.map +1 -1
- package/dist/utils/stream_converter.js +1 -1
- package/dist/utils/stream_converter.js.map +1 -1
- package/dist/validation.d.ts +12 -12
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -32,6 +32,23 @@ Perhaps most importantly, the ensemble approach future-proofs your application.
|
|
|
32
32
|
npm install @just-every/ensemble
|
|
33
33
|
```
|
|
34
34
|
|
|
35
|
+
### Migration from OpenAI SDK
|
|
36
|
+
|
|
37
|
+
If you're currently using the OpenAI SDK, migration is simple:
|
|
38
|
+
|
|
39
|
+
```typescript
|
|
40
|
+
// Before:
|
|
41
|
+
import OpenAI from 'openai';
|
|
42
|
+
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
43
|
+
|
|
44
|
+
// After:
|
|
45
|
+
import OpenAIEnsemble from '@just-every/ensemble/openai-compat';
|
|
46
|
+
const client = OpenAIEnsemble;
|
|
47
|
+
|
|
48
|
+
// Your existing code works unchanged!
|
|
49
|
+
const completion = await client.chat.completions.create({ /* ... */ });
|
|
50
|
+
```
|
|
51
|
+
|
|
35
52
|
## Quick Start
|
|
36
53
|
|
|
37
54
|
```typescript
|
|
@@ -104,187 +121,628 @@ for await (const event of earlyStream) {
|
|
|
104
121
|
|
|
105
122
|
## API Reference
|
|
106
123
|
|
|
107
|
-
###
|
|
124
|
+
### Core Functions
|
|
108
125
|
|
|
109
|
-
|
|
126
|
+
#### `request(model, messages, options?)`
|
|
127
|
+
|
|
128
|
+
Main function for making LLM requests with streaming responses and automatic tool execution.
|
|
110
129
|
|
|
111
130
|
**Parameters:**
|
|
112
|
-
- `model` (string): Model identifier
|
|
113
|
-
- `messages` (ResponseInput): Array of message objects
|
|
131
|
+
- `model` (string): Model identifier (e.g., 'gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash')
|
|
132
|
+
- `messages` (ResponseInput): Array of message objects in the conversation
|
|
114
133
|
- `options` (RequestOptions): Optional configuration object
|
|
115
134
|
|
|
116
135
|
**Returns:** `AsyncGenerator<EnsembleStreamEvent>` - An async generator that yields streaming events
|
|
117
136
|
|
|
118
137
|
```typescript
|
|
119
138
|
interface RequestOptions {
|
|
120
|
-
agentId?: string;
|
|
121
|
-
tools?: ToolFunction[];
|
|
122
|
-
|
|
123
|
-
|
|
139
|
+
agentId?: string; // Identifier for logging/tracking
|
|
140
|
+
tools?: ToolFunction[]; // Array of tool definitions
|
|
141
|
+
toolChoice?: ToolChoice; // Control tool selection behavior
|
|
142
|
+
maxToolCalls?: number; // Max rounds of tool execution (default: 10, 0 = disabled)
|
|
143
|
+
processToolCall?: (toolCalls: ToolCall[]) => Promise<any>; // Custom tool handler
|
|
144
|
+
modelSettings?: ModelSettings; // Temperature, maxTokens, etc.
|
|
145
|
+
modelClass?: ModelClassID; // 'standard' | 'code' | 'reasoning' | 'monologue'
|
|
146
|
+
responseFormat?: ResponseFormat; // JSON mode or structured output
|
|
147
|
+
maxImageDimension?: number; // Auto-resize images (default: provider-specific)
|
|
148
|
+
fallbackModels?: string[]; // Models to try if primary fails
|
|
124
149
|
}
|
|
125
150
|
|
|
126
|
-
//
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
}
|
|
151
|
+
// Stream event types
|
|
152
|
+
type EnsembleStreamEvent =
|
|
153
|
+
| { type: 'text_delta', delta: string }
|
|
154
|
+
| { type: 'text', text: string }
|
|
155
|
+
| { type: 'message_delta', content: string }
|
|
156
|
+
| { type: 'message_complete', content: string }
|
|
157
|
+
| { type: 'tool_start', tool_calls: ToolCall[] }
|
|
158
|
+
| { type: 'cost_update', usage: TokenUsage }
|
|
159
|
+
| { type: 'stream_end', timestamp: string }
|
|
160
|
+
| { type: 'error', error: Error };
|
|
134
161
|
```
|
|
135
162
|
|
|
136
163
|
|
|
137
|
-
###
|
|
164
|
+
### Working with Models
|
|
138
165
|
|
|
139
|
-
|
|
166
|
+
#### Model Selection
|
|
140
167
|
|
|
141
168
|
```typescript
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
169
|
+
import { getModelFromClass, findModel, MODEL_REGISTRY } from '@just-every/ensemble';
|
|
170
|
+
|
|
171
|
+
// Get best model for a specific task type
|
|
172
|
+
const codeModel = getModelFromClass('code'); // Returns best available code model
|
|
173
|
+
const reasoningModel = getModelFromClass('reasoning'); // For complex reasoning tasks
|
|
174
|
+
|
|
175
|
+
// Check if a model exists
|
|
176
|
+
const modelInfo = findModel('gpt-4o');
|
|
177
|
+
if (modelInfo) {
|
|
178
|
+
console.log(`Provider: ${modelInfo.provider}`);
|
|
179
|
+
console.log(`Input cost: $${modelInfo.inputCost}/million tokens`);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// List all available models
|
|
183
|
+
for (const [modelName, info] of Object.entries(MODEL_REGISTRY)) {
|
|
184
|
+
console.log(`${modelName}: ${info.provider}`);
|
|
148
185
|
}
|
|
149
186
|
```
|
|
150
187
|
|
|
151
|
-
|
|
188
|
+
#### Model Classes
|
|
152
189
|
|
|
153
|
-
- **
|
|
154
|
-
- **
|
|
155
|
-
- **
|
|
156
|
-
- **
|
|
157
|
-
- **Communication**: Logging and debugging utilities
|
|
158
|
-
- **Delta Buffer**: Handle streaming response deltas
|
|
159
|
-
- **AsyncQueue**: Generic async queue for bridging callbacks to async iteration (used internally)
|
|
190
|
+
- **standard**: General-purpose models for everyday tasks
|
|
191
|
+
- **code**: Optimized for programming and technical tasks
|
|
192
|
+
- **reasoning**: Advanced models for complex logical reasoning
|
|
193
|
+
- **monologue**: Models supporting extended thinking/reasoning traces
|
|
160
194
|
|
|
161
|
-
###
|
|
195
|
+
### Message Types
|
|
162
196
|
|
|
163
|
-
|
|
197
|
+
```typescript
|
|
198
|
+
// User/Assistant messages
|
|
199
|
+
interface TextMessage {
|
|
200
|
+
type: 'message';
|
|
201
|
+
role: 'user' | 'assistant' | 'developer';
|
|
202
|
+
content: string | MessageContent[];
|
|
203
|
+
status?: 'completed' | 'in_progress';
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Multi-modal content
|
|
207
|
+
type MessageContent =
|
|
208
|
+
| { type: 'input_text', text: string }
|
|
209
|
+
| { type: 'input_image', image_url: string, detail?: 'auto' | 'low' | 'high' }
|
|
210
|
+
| { type: 'tool_use', id: string, name: string, arguments: any };
|
|
211
|
+
|
|
212
|
+
// Tool-related messages
|
|
213
|
+
interface FunctionCall {
|
|
214
|
+
type: 'function_call';
|
|
215
|
+
id: string;
|
|
216
|
+
name: string;
|
|
217
|
+
arguments: string;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
interface FunctionCallOutput {
|
|
221
|
+
type: 'function_call_output';
|
|
222
|
+
id: string;
|
|
223
|
+
output: string;
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Common Use Cases
|
|
228
|
+
|
|
229
|
+
### 1. Basic Conversations
|
|
164
230
|
|
|
165
231
|
```typescript
|
|
166
232
|
import { request } from '@just-every/ensemble';
|
|
167
233
|
|
|
168
|
-
//
|
|
169
|
-
const
|
|
170
|
-
|
|
171
|
-
|
|
234
|
+
// Simple Q&A
|
|
235
|
+
for await (const event of request('gpt-4o-mini', [
|
|
236
|
+
{ type: 'message', role: 'user', content: 'Explain quantum computing in simple terms' }
|
|
237
|
+
])) {
|
|
238
|
+
if (event.type === 'text_delta') {
|
|
239
|
+
process.stdout.write(event.delta);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Multi-turn conversation
|
|
244
|
+
const messages = [
|
|
245
|
+
{ type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
|
|
246
|
+
{ type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
|
|
247
|
+
{ type: 'message', role: 'assistant', content: 'Here are several ways...' },
|
|
248
|
+
{ type: 'message', role: 'user', content: 'What about using flexbox?' }
|
|
249
|
+
];
|
|
250
|
+
|
|
251
|
+
for await (const event of request('claude-3.5-sonnet', messages)) {
|
|
252
|
+
// Handle streaming response
|
|
253
|
+
}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### 2. Tool Calling & Function Execution
|
|
257
|
+
|
|
258
|
+
```typescript
|
|
259
|
+
// Define tools with TypeScript types
|
|
260
|
+
interface WeatherParams {
|
|
261
|
+
city: string;
|
|
262
|
+
unit?: 'celsius' | 'fahrenheit';
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const weatherTool: ToolFunction = {
|
|
266
|
+
function: async ({ city, unit = 'celsius' }: WeatherParams) => {
|
|
267
|
+
// Real implementation would call weather API
|
|
268
|
+
const temp = unit === 'celsius' ? 22 : 72;
|
|
269
|
+
return `${temp}°${unit[0].toUpperCase()} in ${city}`;
|
|
172
270
|
},
|
|
173
271
|
definition: {
|
|
174
272
|
type: 'function',
|
|
175
273
|
function: {
|
|
176
274
|
name: 'get_weather',
|
|
177
|
-
description: 'Get weather for a city',
|
|
275
|
+
description: 'Get current weather for a city',
|
|
178
276
|
parameters: {
|
|
179
277
|
type: 'object',
|
|
180
278
|
properties: {
|
|
181
|
-
city: { type: 'string', description: 'City name' }
|
|
279
|
+
city: { type: 'string', description: 'City name' },
|
|
280
|
+
unit: {
|
|
281
|
+
type: 'string',
|
|
282
|
+
enum: ['celsius', 'fahrenheit'],
|
|
283
|
+
description: 'Temperature unit'
|
|
284
|
+
}
|
|
182
285
|
},
|
|
183
286
|
required: ['city']
|
|
184
287
|
}
|
|
185
288
|
}
|
|
186
289
|
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
//
|
|
190
|
-
const
|
|
191
|
-
{ type: 'message', role: 'user', content: 'What\'s the weather in
|
|
192
|
-
], {
|
|
193
|
-
|
|
194
|
-
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
// Use with automatic execution
|
|
293
|
+
for await (const event of request('gpt-4o', [
|
|
294
|
+
{ type: 'message', role: 'user', content: 'What\'s the weather in Tokyo and New York?' }
|
|
295
|
+
], { tools: [weatherTool] })) {
|
|
296
|
+
if (event.type === 'tool_start') {
|
|
297
|
+
console.log('Calling tool:', event.tool_calls[0].function.name);
|
|
298
|
+
} else if (event.type === 'text_delta') {
|
|
299
|
+
process.stdout.write(event.delta);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
### 3. Model Selection Strategies
|
|
305
|
+
|
|
306
|
+
```typescript
|
|
307
|
+
import { getModelFromClass, request } from '@just-every/ensemble';
|
|
308
|
+
|
|
309
|
+
// Route based on task type
|
|
310
|
+
async function intelligentRequest(task: string, messages: ResponseInput) {
|
|
311
|
+
let model: string;
|
|
312
|
+
|
|
313
|
+
if (task.includes('code') || task.includes('debug')) {
|
|
314
|
+
model = getModelFromClass('code'); // Best code model
|
|
315
|
+
} else if (task.includes('analyze') || task.includes('reasoning')) {
|
|
316
|
+
model = getModelFromClass('reasoning'); // Best reasoning model
|
|
317
|
+
} else {
|
|
318
|
+
model = getModelFromClass('standard'); // Cost-effective general model
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
console.log(`Using ${model} for ${task}`);
|
|
322
|
+
|
|
323
|
+
return request(model, messages, {
|
|
324
|
+
fallbackModels: ['gpt-4o-mini', 'claude-3-5-haiku'] // Fallback options
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Use model rotation for consensus
|
|
329
|
+
async function consensusRequest(messages: ResponseInput) {
|
|
330
|
+
const models = ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash'];
|
|
331
|
+
const responses = [];
|
|
332
|
+
|
|
333
|
+
for (const model of models) {
|
|
334
|
+
const stream = request(model, messages);
|
|
335
|
+
const result = await convertStreamToMessages(stream);
|
|
336
|
+
responses.push(result.fullResponse);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Analyze responses for consensus
|
|
340
|
+
return analyzeConsensus(responses);
|
|
341
|
+
}
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
### 4. Structured Output & JSON Mode
|
|
345
|
+
|
|
346
|
+
```typescript
|
|
347
|
+
// JSON mode for reliable parsing
|
|
348
|
+
const jsonStream = request('gpt-4o', [
|
|
349
|
+
{ type: 'message', role: 'user', content: 'List 3 programming languages with their pros/cons as JSON' }
|
|
350
|
+
], {
|
|
351
|
+
responseFormat: { type: 'json_object' }
|
|
195
352
|
});
|
|
196
353
|
|
|
197
|
-
|
|
354
|
+
let jsonContent = '';
|
|
355
|
+
for await (const event of jsonStream) {
|
|
356
|
+
if (event.type === 'text_delta') {
|
|
357
|
+
jsonContent += event.delta;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
198
360
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
361
|
+
const data = JSON.parse(jsonContent);
|
|
362
|
+
|
|
363
|
+
// Structured output with schema validation
|
|
364
|
+
const schema = {
|
|
365
|
+
type: 'object',
|
|
366
|
+
properties: {
|
|
367
|
+
name: { type: 'string' },
|
|
368
|
+
age: { type: 'number' },
|
|
369
|
+
skills: {
|
|
370
|
+
type: 'array',
|
|
371
|
+
items: { type: 'string' }
|
|
372
|
+
}
|
|
373
|
+
},
|
|
374
|
+
required: ['name', 'age', 'skills']
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
const structuredStream = request('gpt-4o', [
|
|
378
|
+
{ type: 'message', role: 'user', content: 'Generate a developer profile' }
|
|
379
|
+
], {
|
|
380
|
+
responseFormat: {
|
|
381
|
+
type: 'json_schema',
|
|
382
|
+
json_schema: {
|
|
383
|
+
name: 'developer_profile',
|
|
384
|
+
schema: schema,
|
|
385
|
+
strict: true
|
|
386
|
+
}
|
|
206
387
|
}
|
|
207
388
|
});
|
|
208
389
|
```
|
|
209
390
|
|
|
210
|
-
###
|
|
211
|
-
|
|
212
|
-
Convert streaming events into conversation history for chaining LLM calls:
|
|
391
|
+
### 5. Image Processing
|
|
213
392
|
|
|
214
393
|
```typescript
|
|
215
|
-
|
|
394
|
+
// Analyze images with vision models
|
|
395
|
+
const imageStream = request('gpt-4o', [
|
|
396
|
+
{
|
|
397
|
+
type: 'message',
|
|
398
|
+
role: 'user',
|
|
399
|
+
content: [
|
|
400
|
+
{ type: 'input_text', text: 'What\'s in this image? Describe any text you see.' },
|
|
401
|
+
{
|
|
402
|
+
type: 'input_image',
|
|
403
|
+
image_url: 'data:image/jpeg;base64,...',
|
|
404
|
+
detail: 'high' // 'auto' | 'low' | 'high'
|
|
405
|
+
}
|
|
406
|
+
]
|
|
407
|
+
}
|
|
408
|
+
], {
|
|
409
|
+
maxImageDimension: 2048 // Auto-resize large images
|
|
410
|
+
});
|
|
216
411
|
|
|
217
|
-
//
|
|
218
|
-
const
|
|
219
|
-
{
|
|
412
|
+
// Multiple images
|
|
413
|
+
const comparison = request('claude-3.5-sonnet', [
|
|
414
|
+
{
|
|
415
|
+
type: 'message',
|
|
416
|
+
role: 'user',
|
|
417
|
+
content: [
|
|
418
|
+
{ type: 'input_text', text: 'Compare these two designs:' },
|
|
419
|
+
{ type: 'input_image', image_url: 'https://example.com/design1.png' },
|
|
420
|
+
{ type: 'input_image', image_url: 'https://example.com/design2.png' }
|
|
421
|
+
]
|
|
422
|
+
}
|
|
220
423
|
]);
|
|
424
|
+
```
|
|
221
425
|
|
|
222
|
-
|
|
223
|
-
console.log(result.messages); // Array of ResponseInput items
|
|
224
|
-
console.log(result.fullResponse); // Complete response text
|
|
426
|
+
### 6. Error Handling & Resilience
|
|
225
427
|
|
|
226
|
-
|
|
227
|
-
|
|
428
|
+
```typescript
|
|
429
|
+
import { isRateLimitError, isAuthenticationError } from '@just-every/ensemble';
|
|
430
|
+
|
|
431
|
+
async function robustRequest(model: string, messages: ResponseInput, options?: RequestOptions) {
|
|
432
|
+
const maxRetries = 3;
|
|
433
|
+
let lastError;
|
|
434
|
+
|
|
435
|
+
for (let i = 0; i < maxRetries; i++) {
|
|
436
|
+
try {
|
|
437
|
+
const events = [];
|
|
438
|
+
for await (const event of request(model, messages, options)) {
|
|
439
|
+
if (event.type === 'error') {
|
|
440
|
+
throw event.error;
|
|
441
|
+
}
|
|
442
|
+
events.push(event);
|
|
443
|
+
}
|
|
444
|
+
return events;
|
|
445
|
+
|
|
446
|
+
} catch (error) {
|
|
447
|
+
lastError = error;
|
|
448
|
+
|
|
449
|
+
if (isAuthenticationError(error)) {
|
|
450
|
+
throw error; // Don't retry auth errors
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
if (isRateLimitError(error)) {
|
|
454
|
+
const waitTime = error.retryAfter || Math.pow(2, i) * 1000;
|
|
455
|
+
console.log(`Rate limited. Waiting ${waitTime}ms...`);
|
|
456
|
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
457
|
+
continue;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Try fallback model
|
|
461
|
+
if (options?.fallbackModels?.[i]) {
|
|
462
|
+
model = options.fallbackModels[i];
|
|
463
|
+
console.log(`Falling back to ${model}`);
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
throw lastError;
|
|
470
|
+
}
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
## Utilities
|
|
474
|
+
|
|
475
|
+
### Cost & Usage Tracking
|
|
476
|
+
|
|
477
|
+
```typescript
|
|
478
|
+
import { costTracker, quotaTracker } from '@just-every/ensemble';
|
|
479
|
+
|
|
480
|
+
// Track costs across requests
|
|
481
|
+
for await (const event of request('gpt-4o', messages)) {
|
|
482
|
+
if (event.type === 'cost_update') {
|
|
483
|
+
console.log(`Tokens: ${event.usage.input_tokens} in, ${event.usage.output_tokens} out`);
|
|
484
|
+
console.log(`Cost: $${event.usage.total_cost.toFixed(4)}`);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// Get cumulative costs
|
|
489
|
+
const usage = costTracker.getAllUsage();
|
|
490
|
+
for (const [model, stats] of Object.entries(usage)) {
|
|
491
|
+
console.log(`${model}: $${stats.total_cost.toFixed(2)} for ${stats.request_count} requests`);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// Check quotas before making requests
|
|
495
|
+
if (quotaTracker.canMakeRequest('gpt-4o', 'openai')) {
|
|
496
|
+
// Safe to proceed
|
|
497
|
+
} else {
|
|
498
|
+
const resetTime = quotaTracker.getResetTime('openai');
|
|
499
|
+
console.log(`Quota exceeded. Resets at ${resetTime}`);
|
|
500
|
+
}
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
### Stream Conversion & Chaining
|
|
504
|
+
|
|
505
|
+
```typescript
|
|
506
|
+
import { convertStreamToMessages, chainRequests } from '@just-every/ensemble';
|
|
507
|
+
|
|
508
|
+
let currentMessages = [
|
|
509
|
+
{ type: 'message', role: 'user', content: 'Write a haiku about coding' },
|
|
510
|
+
{ type: 'message', role: 'user', content: 'Make it really long' }
|
|
511
|
+
];
|
|
512
|
+
|
|
513
|
+
let messages = [
|
|
514
|
+
{ type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
|
|
515
|
+
{ type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
|
|
516
|
+
];
|
|
517
|
+
messages = [...messages, ...(await convertStreamToMessages(request('claude-4-sonnet', messages))).messages];
|
|
518
|
+
messages = [...messages, ...(await convertStreamToMessages(request(getModelFromClass('reasoning_mini'), messages))).messages];
|
|
519
|
+
messages = [...messages, ...(await convertStreamToMessages(request('gemini-2.5-flash', messages))).messages];
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
console.log(result.messages); // Full conversation history
|
|
523
|
+
console.log(result.fullResponse); // Just the assistant's response
|
|
524
|
+
|
|
525
|
+
// Chain multiple models for multi-step tasks
|
|
526
|
+
const analysis = await chainRequests(
|
|
527
|
+
[
|
|
528
|
+
{ type: 'message', role: 'user', content: codeToAnalyze }
|
|
529
|
+
],
|
|
530
|
+
[
|
|
228
531
|
{
|
|
229
|
-
model: '
|
|
230
|
-
systemPrompt: '
|
|
532
|
+
model: getModelFromClass('code'),
|
|
533
|
+
systemPrompt: 'Analyze this code for bugs and security issues',
|
|
231
534
|
},
|
|
232
535
|
{
|
|
233
|
-
model: '
|
|
234
|
-
systemPrompt: '
|
|
536
|
+
model: getModelFromClass('reasoning'),
|
|
537
|
+
systemPrompt: 'Prioritize the issues found and suggest fixes',
|
|
538
|
+
},
|
|
539
|
+
{
|
|
540
|
+
model: 'gpt-4.1-mini',
|
|
541
|
+
systemPrompt: 'Summarize the analysis in 3 bullet points',
|
|
235
542
|
}
|
|
236
|
-
], [
|
|
237
|
-
{ type: 'message', role: 'user', content: 'Tell me a joke about programming' }
|
|
238
543
|
]);
|
|
544
|
+
```
|
|
239
545
|
|
|
240
|
-
|
|
241
|
-
const streamWithTools = request('gpt-4o', messages, {
|
|
242
|
-
tools: [weatherTool]
|
|
243
|
-
});
|
|
546
|
+
### Image Utilities
|
|
244
547
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
548
|
+
```typescript
|
|
549
|
+
import { resizeImageForModel, imageToText } from '@just-every/ensemble';
|
|
550
|
+
|
|
551
|
+
// Auto-resize for specific model requirements
|
|
552
|
+
const resized = await resizeImageForModel(
|
|
553
|
+
base64ImageData,
|
|
554
|
+
'gpt-4o', // Different models have different size limits
|
|
555
|
+
{ maxDimension: 2048 }
|
|
556
|
+
);
|
|
557
|
+
|
|
558
|
+
// Extract text from images
|
|
559
|
+
const extractedText = await imageToText(imageBuffer);
|
|
560
|
+
console.log('Found text:', extractedText);
|
|
256
561
|
```
|
|
257
562
|
|
|
258
|
-
### Logging
|
|
259
|
-
|
|
260
|
-
The ensemble package includes a pluggable logging system for LLM requests and responses:
|
|
563
|
+
### Logging & Debugging
|
|
261
564
|
|
|
262
565
|
```typescript
|
|
263
566
|
import { setEnsembleLogger, EnsembleLogger } from '@just-every/ensemble';
|
|
264
567
|
|
|
265
|
-
//
|
|
266
|
-
class
|
|
568
|
+
// Production-ready logger example
|
|
569
|
+
class ProductionLogger implements EnsembleLogger {
|
|
267
570
|
log_llm_request(agentId: string, providerName: string, model: string, requestData: unknown, timestamp?: Date): string {
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
571
|
+
const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
572
|
+
|
|
573
|
+
// Log to your monitoring system
|
|
574
|
+
logger.info('LLM Request', {
|
|
575
|
+
requestId,
|
|
576
|
+
agentId,
|
|
577
|
+
provider: providerName,
|
|
578
|
+
model,
|
|
579
|
+
timestamp,
|
|
580
|
+
// Be careful not to log sensitive data
|
|
581
|
+
messageCount: (requestData as any).messages?.length,
|
|
582
|
+
hasTools: !!(requestData as any).tools?.length
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
return requestId;
|
|
271
586
|
}
|
|
272
587
|
|
|
273
588
|
log_llm_response(requestId: string | undefined, responseData: unknown, timestamp?: Date): void {
|
|
274
|
-
|
|
275
|
-
|
|
589
|
+
const response = responseData as any;
|
|
590
|
+
|
|
591
|
+
logger.info('LLM Response', {
|
|
592
|
+
requestId,
|
|
593
|
+
timestamp,
|
|
594
|
+
inputTokens: response.usage?.input_tokens,
|
|
595
|
+
outputTokens: response.usage?.output_tokens,
|
|
596
|
+
totalCost: response.usage?.total_cost,
|
|
597
|
+
cached: response.usage?.cache_creation_input_tokens > 0
|
|
598
|
+
});
|
|
276
599
|
}
|
|
277
600
|
|
|
278
601
|
log_llm_error(requestId: string | undefined, errorData: unknown, timestamp?: Date): void {
|
|
279
|
-
|
|
280
|
-
|
|
602
|
+
logger.error('LLM Error', {
|
|
603
|
+
requestId,
|
|
604
|
+
timestamp,
|
|
605
|
+
error: errorData,
|
|
606
|
+
// Include retry information if available
|
|
607
|
+
retryAfter: (errorData as any).retryAfter
|
|
608
|
+
});
|
|
281
609
|
}
|
|
282
610
|
}
|
|
283
611
|
|
|
284
|
-
// Enable logging
|
|
285
|
-
setEnsembleLogger(new
|
|
612
|
+
// Enable logging globally
|
|
613
|
+
setEnsembleLogger(new ProductionLogger());
|
|
286
614
|
|
|
287
|
-
//
|
|
615
|
+
// Debug mode for development
|
|
616
|
+
if (process.env.NODE_ENV === 'development') {
|
|
617
|
+
setEnsembleLogger({
|
|
618
|
+
log_llm_request: (agent, provider, model, data) => {
|
|
619
|
+
console.log(`[${new Date().toISOString()}] → ${provider}/${model}`);
|
|
620
|
+
return Date.now().toString();
|
|
621
|
+
},
|
|
622
|
+
log_llm_response: (id, data) => {
|
|
623
|
+
const response = data as any;
|
|
624
|
+
console.log(`[${new Date().toISOString()}] ← ${response.usage?.total_tokens} tokens`);
|
|
625
|
+
},
|
|
626
|
+
log_llm_error: (id, error) => {
|
|
627
|
+
console.error(`[${new Date().toISOString()}] ✗ Error:`, error);
|
|
628
|
+
}
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
```
|
|
632
|
+
|
|
633
|
+
## Advanced Topics
|
|
634
|
+
|
|
635
|
+
### OpenAI SDK Compatibility
|
|
636
|
+
|
|
637
|
+
Ensemble provides a drop-in replacement for the OpenAI SDK, allowing you to use any supported model with OpenAI's familiar API:
|
|
638
|
+
|
|
639
|
+
```typescript
|
|
640
|
+
import OpenAIEnsemble from '@just-every/ensemble/openai-compat';
|
|
641
|
+
// Or named imports: import { chat, completions } from '@just-every/ensemble';
|
|
642
|
+
|
|
643
|
+
// Replace OpenAI client
|
|
644
|
+
const openai = OpenAIEnsemble; // Instead of: new OpenAI({ apiKey: '...' })
|
|
645
|
+
|
|
646
|
+
// Use exactly like OpenAI SDK - but with any model!
|
|
647
|
+
const completion = await openai.chat.completions.create({
|
|
648
|
+
model: 'claude-3.5-sonnet', // or 'gpt-4o', 'gemini-2.0-flash', etc.
|
|
649
|
+
messages: [
|
|
650
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
651
|
+
{ role: 'user', content: 'Hello!' }
|
|
652
|
+
],
|
|
653
|
+
temperature: 0.7
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
console.log(completion.choices[0].message.content);
|
|
657
|
+
|
|
658
|
+
// Streaming
|
|
659
|
+
const stream = await openai.chat.completions.create({
|
|
660
|
+
model: 'gpt-4o-mini',
|
|
661
|
+
messages: [{ role: 'user', content: 'Tell me a story' }],
|
|
662
|
+
stream: true
|
|
663
|
+
});
|
|
664
|
+
|
|
665
|
+
for await (const chunk of stream) {
|
|
666
|
+
process.stdout.write(chunk.choices[0].delta.content || '');
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Legacy completions API also supported
|
|
670
|
+
const legacyCompletion = await openai.completions.create({
|
|
671
|
+
model: 'deepseek-chat',
|
|
672
|
+
prompt: 'Once upon a time',
|
|
673
|
+
max_tokens: 100
|
|
674
|
+
});
|
|
675
|
+
```
|
|
676
|
+
|
|
677
|
+
This compatibility layer supports:
|
|
678
|
+
- All chat.completions.create parameters (temperature, tools, response_format, etc.)
|
|
679
|
+
- Streaming and non-streaming responses
|
|
680
|
+
- Tool/function calling
|
|
681
|
+
- Legacy completions.create API
|
|
682
|
+
- Proper TypeScript types matching OpenAI's SDK
|
|
683
|
+
|
|
684
|
+
### Custom Model Providers
|
|
685
|
+
|
|
686
|
+
```typescript
|
|
687
|
+
import { ModelProvider, registerExternalModel } from '@just-every/ensemble';
|
|
688
|
+
|
|
689
|
+
// Register a custom model
|
|
690
|
+
registerExternalModel({
|
|
691
|
+
id: 'my-custom-model',
|
|
692
|
+
provider: 'custom',
|
|
693
|
+
inputCost: 0.001,
|
|
694
|
+
outputCost: 0.002,
|
|
695
|
+
contextWindow: 8192,
|
|
696
|
+
maxOutput: 4096,
|
|
697
|
+
supportsTools: true,
|
|
698
|
+
supportsVision: false,
|
|
699
|
+
supportsStreaming: true
|
|
700
|
+
});
|
|
701
|
+
|
|
702
|
+
// Use your custom model
|
|
703
|
+
const stream = request('my-custom-model', messages);
|
|
704
|
+
```
|
|
705
|
+
|
|
706
|
+
### Performance Optimization
|
|
707
|
+
|
|
708
|
+
```typescript
|
|
709
|
+
// Batch processing with concurrency control
|
|
710
|
+
async function batchProcess(items: string[], concurrency = 3) {
|
|
711
|
+
const results = [];
|
|
712
|
+
const queue = [...items];
|
|
713
|
+
|
|
714
|
+
async function worker() {
|
|
715
|
+
while (queue.length > 0) {
|
|
716
|
+
const item = queue.shift()!;
|
|
717
|
+
const stream = request('gpt-4o-mini', [
|
|
718
|
+
{ type: 'message', role: 'user', content: `Process: ${item}` }
|
|
719
|
+
]);
|
|
720
|
+
|
|
721
|
+
const result = await convertStreamToMessages(stream);
|
|
722
|
+
results.push({ item, result: result.fullResponse });
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
// Run workers concurrently
|
|
727
|
+
await Promise.all(Array(concurrency).fill(null).map(() => worker()));
|
|
728
|
+
return results;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// Stream multiple requests in parallel
|
|
732
|
+
async function parallelStreaming(prompts: string[]) {
|
|
733
|
+
const streams = prompts.map(prompt =>
|
|
734
|
+
request('claude-3.5-haiku', [
|
|
735
|
+
{ type: 'message', role: 'user', content: prompt }
|
|
736
|
+
])
|
|
737
|
+
);
|
|
738
|
+
|
|
739
|
+
// Process all streams concurrently
|
|
740
|
+
const results = await Promise.all(
|
|
741
|
+
streams.map(stream => convertStreamToMessages(stream))
|
|
742
|
+
);
|
|
743
|
+
|
|
744
|
+
return results.map(r => r.fullResponse);
|
|
745
|
+
}
|
|
288
746
|
```
|
|
289
747
|
|
|
290
748
|
## Environment Variables
|