@just-every/ensemble 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +490 -100
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,6 +5,14 @@
|
|
|
5
5
|
|
|
6
6
|
A unified interface for interacting with multiple LLM providers including OpenAI, Anthropic Claude, Google Gemini, Deepseek, Grok, and OpenRouter.
|
|
7
7
|
|
|
8
|
+
## Why Use an Ensemble Approach?
|
|
9
|
+
|
|
10
|
+
The ensemble pattern - rotating between multiple LLM providers dynamically - offers compelling advantages over relying on a single model. Research has shown that sampling multiple reasoning chains and using consensus answers can improve performance by double-digit margins on complex tasks. By automating this at runtime rather than prompt-engineering time, ensemble delivers more reliable and robust AI interactions.
|
|
11
|
+
|
|
12
|
+
Beyond accuracy improvements, ensemble requests provide practical benefits for production systems. Different models carry unique training biases and stylistic patterns - rotating between them dilutes individual quirks and prevents conversations from getting "stuck" in one voice. The approach also ensures resilience: when one provider experiences an outage, quota limit, or latency spike, requests seamlessly route to alternatives. You can optimize costs by routing simple tasks to cheaper models while reserving premium models for complex reasoning. Need regex help? Route to a code-specialized model. Need emotional calibration? Use a dialogue expert. The ensemble gives you this granularity without complex conditional logic.
|
|
13
|
+
|
|
14
|
+
Perhaps most importantly, the ensemble approach future-proofs your application. Model quality and pricing change weekly in the fast-moving LLM landscape. With ensemble, you can trial newcomers on a small percentage of traffic, compare real metrics, then scale up or roll back within minutes - all without changing your code.
|
|
15
|
+
|
|
8
16
|
## Features
|
|
9
17
|
|
|
10
18
|
- **Multi-provider support**: Claude, OpenAI, Gemini, Deepseek, Grok, OpenRouter
|
|
@@ -96,187 +104,569 @@ for await (const event of earlyStream) {
|
|
|
96
104
|
|
|
97
105
|
## API Reference
|
|
98
106
|
|
|
99
|
-
###
|
|
107
|
+
### Core Functions
|
|
100
108
|
|
|
101
|
-
|
|
109
|
+
#### `request(model, messages, options?)`
|
|
110
|
+
|
|
111
|
+
Main function for making LLM requests with streaming responses and automatic tool execution.
|
|
102
112
|
|
|
103
113
|
**Parameters:**
|
|
104
|
-
- `model` (string): Model identifier
|
|
105
|
-
- `messages` (ResponseInput): Array of message objects
|
|
114
|
+
- `model` (string): Model identifier (e.g., 'gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash')
|
|
115
|
+
- `messages` (ResponseInput): Array of message objects in the conversation
|
|
106
116
|
- `options` (RequestOptions): Optional configuration object
|
|
107
117
|
|
|
108
118
|
**Returns:** `AsyncGenerator<EnsembleStreamEvent>` - An async generator that yields streaming events
|
|
109
119
|
|
|
110
120
|
```typescript
|
|
111
121
|
interface RequestOptions {
|
|
112
|
-
agentId?: string;
|
|
113
|
-
tools?: ToolFunction[];
|
|
114
|
-
|
|
115
|
-
|
|
122
|
+
agentId?: string; // Identifier for logging/tracking
|
|
123
|
+
tools?: ToolFunction[]; // Array of tool definitions
|
|
124
|
+
toolChoice?: ToolChoice; // Control tool selection behavior
|
|
125
|
+
maxToolCalls?: number; // Max rounds of tool execution (default: 10, 0 = disabled)
|
|
126
|
+
processToolCall?: (toolCalls: ToolCall[]) => Promise<any>; // Custom tool handler
|
|
127
|
+
modelSettings?: ModelSettings; // Temperature, maxTokens, etc.
|
|
128
|
+
modelClass?: ModelClassID; // 'standard' | 'code' | 'reasoning' | 'monologue'
|
|
129
|
+
responseFormat?: ResponseFormat; // JSON mode or structured output
|
|
130
|
+
maxImageDimension?: number; // Auto-resize images (default: provider-specific)
|
|
131
|
+
fallbackModels?: string[]; // Models to try if primary fails
|
|
116
132
|
}
|
|
117
133
|
|
|
118
|
-
//
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
134
|
+
// Stream event types
|
|
135
|
+
type EnsembleStreamEvent =
|
|
136
|
+
| { type: 'text_delta', delta: string }
|
|
137
|
+
| { type: 'text', text: string }
|
|
138
|
+
| { type: 'message_delta', content: string }
|
|
139
|
+
| { type: 'message_complete', content: string }
|
|
140
|
+
| { type: 'tool_start', tool_calls: ToolCall[] }
|
|
141
|
+
| { type: 'cost_update', usage: TokenUsage }
|
|
142
|
+
| { type: 'stream_end', timestamp: string }
|
|
143
|
+
| { type: 'error', error: Error };
|
|
126
144
|
```
|
|
127
145
|
|
|
128
146
|
|
|
129
|
-
###
|
|
147
|
+
### Working with Models
|
|
130
148
|
|
|
131
|
-
|
|
149
|
+
#### Model Selection
|
|
132
150
|
|
|
133
151
|
```typescript
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
152
|
+
import { getModelFromClass, findModel, MODEL_REGISTRY } from '@just-every/ensemble';
|
|
153
|
+
|
|
154
|
+
// Get best model for a specific task type
|
|
155
|
+
const codeModel = getModelFromClass('code'); // Returns best available code model
|
|
156
|
+
const reasoningModel = getModelFromClass('reasoning'); // For complex reasoning tasks
|
|
157
|
+
|
|
158
|
+
// Check if a model exists
|
|
159
|
+
const modelInfo = findModel('gpt-4o');
|
|
160
|
+
if (modelInfo) {
|
|
161
|
+
console.log(`Provider: ${modelInfo.provider}`);
|
|
162
|
+
console.log(`Input cost: $${modelInfo.inputCost}/million tokens`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// List all available models
|
|
166
|
+
for (const [modelName, info] of Object.entries(MODEL_REGISTRY)) {
|
|
167
|
+
console.log(`${modelName}: ${info.provider}`);
|
|
140
168
|
}
|
|
141
169
|
```
|
|
142
170
|
|
|
143
|
-
|
|
171
|
+
#### Model Classes
|
|
172
|
+
|
|
173
|
+
- **standard**: General-purpose models for everyday tasks
|
|
174
|
+
- **code**: Optimized for programming and technical tasks
|
|
175
|
+
- **reasoning**: Advanced models for complex logical reasoning
|
|
176
|
+
- **monologue**: Models supporting extended thinking/reasoning traces
|
|
144
177
|
|
|
145
|
-
|
|
146
|
-
- **Quota Management**: Track API quotas and rate limits with quota_tracker
|
|
147
|
-
- **Image Processing**: Convert images to text, resize, and optimize
|
|
148
|
-
- **Logging System**: Pluggable request/response logging with configurable backends
|
|
149
|
-
- **Communication**: Logging and debugging utilities
|
|
150
|
-
- **Delta Buffer**: Handle streaming response deltas
|
|
151
|
-
- **AsyncQueue**: Generic async queue for bridging callbacks to async iteration (used internally)
|
|
178
|
+
### Message Types
|
|
152
179
|
|
|
153
|
-
|
|
180
|
+
```typescript
|
|
181
|
+
// User/Assistant messages
|
|
182
|
+
interface TextMessage {
|
|
183
|
+
type: 'message';
|
|
184
|
+
role: 'user' | 'assistant' | 'developer';
|
|
185
|
+
content: string | MessageContent[];
|
|
186
|
+
status?: 'completed' | 'in_progress';
|
|
187
|
+
}
|
|
154
188
|
|
|
155
|
-
|
|
189
|
+
// Multi-modal content
|
|
190
|
+
type MessageContent =
|
|
191
|
+
| { type: 'input_text', text: string }
|
|
192
|
+
| { type: 'input_image', image_url: string, detail?: 'auto' | 'low' | 'high' }
|
|
193
|
+
| { type: 'tool_use', id: string, name: string, arguments: any };
|
|
194
|
+
|
|
195
|
+
// Tool-related messages
|
|
196
|
+
interface FunctionCall {
|
|
197
|
+
type: 'function_call';
|
|
198
|
+
id: string;
|
|
199
|
+
name: string;
|
|
200
|
+
arguments: string;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
interface FunctionCallOutput {
|
|
204
|
+
type: 'function_call_output';
|
|
205
|
+
id: string;
|
|
206
|
+
output: string;
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## Common Use Cases
|
|
211
|
+
|
|
212
|
+
### 1. Basic Conversations
|
|
156
213
|
|
|
157
214
|
```typescript
|
|
158
215
|
import { request } from '@just-every/ensemble';
|
|
159
216
|
|
|
160
|
-
//
|
|
161
|
-
const
|
|
162
|
-
|
|
163
|
-
|
|
217
|
+
// Simple Q&A
|
|
218
|
+
for await (const event of request('gpt-4o-mini', [
|
|
219
|
+
{ type: 'message', role: 'user', content: 'Explain quantum computing in simple terms' }
|
|
220
|
+
])) {
|
|
221
|
+
if (event.type === 'text_delta') {
|
|
222
|
+
process.stdout.write(event.delta);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Multi-turn conversation
|
|
227
|
+
const messages = [
|
|
228
|
+
{ type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
|
|
229
|
+
{ type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
|
|
230
|
+
{ type: 'message', role: 'assistant', content: 'Here are several ways...' },
|
|
231
|
+
{ type: 'message', role: 'user', content: 'What about using flexbox?' }
|
|
232
|
+
];
|
|
233
|
+
|
|
234
|
+
for await (const event of request('claude-3.5-sonnet', messages)) {
|
|
235
|
+
// Handle streaming response
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### 2. Tool Calling & Function Execution
|
|
240
|
+
|
|
241
|
+
```typescript
|
|
242
|
+
// Define tools with TypeScript types
|
|
243
|
+
interface WeatherParams {
|
|
244
|
+
city: string;
|
|
245
|
+
unit?: 'celsius' | 'fahrenheit';
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const weatherTool: ToolFunction = {
|
|
249
|
+
function: async ({ city, unit = 'celsius' }: WeatherParams) => {
|
|
250
|
+
// Real implementation would call weather API
|
|
251
|
+
const temp = unit === 'celsius' ? 22 : 72;
|
|
252
|
+
return `${temp}°${unit[0].toUpperCase()} in ${city}`;
|
|
164
253
|
},
|
|
165
254
|
definition: {
|
|
166
255
|
type: 'function',
|
|
167
256
|
function: {
|
|
168
257
|
name: 'get_weather',
|
|
169
|
-
description: 'Get weather for a city',
|
|
258
|
+
description: 'Get current weather for a city',
|
|
170
259
|
parameters: {
|
|
171
260
|
type: 'object',
|
|
172
261
|
properties: {
|
|
173
|
-
city: { type: 'string', description: 'City name' }
|
|
262
|
+
city: { type: 'string', description: 'City name' },
|
|
263
|
+
unit: {
|
|
264
|
+
type: 'string',
|
|
265
|
+
enum: ['celsius', 'fahrenheit'],
|
|
266
|
+
description: 'Temperature unit'
|
|
267
|
+
}
|
|
174
268
|
},
|
|
175
269
|
required: ['city']
|
|
176
270
|
}
|
|
177
271
|
}
|
|
178
272
|
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
//
|
|
182
|
-
const
|
|
183
|
-
{ type: 'message', role: 'user', content: 'What\'s the weather in
|
|
184
|
-
], {
|
|
185
|
-
|
|
186
|
-
|
|
273
|
+
};
|
|
274
|
+
|
|
275
|
+
// Use with automatic execution
|
|
276
|
+
for await (const event of request('gpt-4o', [
|
|
277
|
+
{ type: 'message', role: 'user', content: 'What\'s the weather in Tokyo and New York?' }
|
|
278
|
+
], { tools: [weatherTool] })) {
|
|
279
|
+
if (event.type === 'tool_start') {
|
|
280
|
+
console.log('Calling tool:', event.tool_calls[0].function.name);
|
|
281
|
+
} else if (event.type === 'text_delta') {
|
|
282
|
+
process.stdout.write(event.delta);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### 3. Model Selection Strategies
|
|
288
|
+
|
|
289
|
+
```typescript
|
|
290
|
+
import { getModelFromClass, request } from '@just-every/ensemble';
|
|
291
|
+
|
|
292
|
+
// Route based on task type
|
|
293
|
+
async function intelligentRequest(task: string, messages: ResponseInput) {
|
|
294
|
+
let model: string;
|
|
295
|
+
|
|
296
|
+
if (task.includes('code') || task.includes('debug')) {
|
|
297
|
+
model = getModelFromClass('code'); // Best code model
|
|
298
|
+
} else if (task.includes('analyze') || task.includes('reasoning')) {
|
|
299
|
+
model = getModelFromClass('reasoning'); // Best reasoning model
|
|
300
|
+
} else {
|
|
301
|
+
model = getModelFromClass('standard'); // Cost-effective general model
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
console.log(`Using ${model} for ${task}`);
|
|
305
|
+
|
|
306
|
+
return request(model, messages, {
|
|
307
|
+
fallbackModels: ['gpt-4o-mini', 'claude-3-5-haiku'] // Fallback options
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Use model rotation for consensus
|
|
312
|
+
async function consensusRequest(messages: ResponseInput) {
|
|
313
|
+
const models = ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash'];
|
|
314
|
+
const responses = [];
|
|
315
|
+
|
|
316
|
+
for (const model of models) {
|
|
317
|
+
const stream = request(model, messages);
|
|
318
|
+
const result = await convertStreamToMessages(stream);
|
|
319
|
+
responses.push(result.fullResponse);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Analyze responses for consensus
|
|
323
|
+
return analyzeConsensus(responses);
|
|
324
|
+
}
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
### 4. Structured Output & JSON Mode
|
|
328
|
+
|
|
329
|
+
```typescript
|
|
330
|
+
// JSON mode for reliable parsing
|
|
331
|
+
const jsonStream = request('gpt-4o', [
|
|
332
|
+
{ type: 'message', role: 'user', content: 'List 3 programming languages with their pros/cons as JSON' }
|
|
333
|
+
], {
|
|
334
|
+
responseFormat: { type: 'json_object' }
|
|
187
335
|
});
|
|
188
336
|
|
|
189
|
-
|
|
337
|
+
let jsonContent = '';
|
|
338
|
+
for await (const event of jsonStream) {
|
|
339
|
+
if (event.type === 'text_delta') {
|
|
340
|
+
jsonContent += event.delta;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const data = JSON.parse(jsonContent);
|
|
345
|
+
|
|
346
|
+
// Structured output with schema validation
|
|
347
|
+
const schema = {
|
|
348
|
+
type: 'object',
|
|
349
|
+
properties: {
|
|
350
|
+
name: { type: 'string' },
|
|
351
|
+
age: { type: 'number' },
|
|
352
|
+
skills: {
|
|
353
|
+
type: 'array',
|
|
354
|
+
items: { type: 'string' }
|
|
355
|
+
}
|
|
356
|
+
},
|
|
357
|
+
required: ['name', 'age', 'skills']
|
|
358
|
+
};
|
|
190
359
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
360
|
+
const structuredStream = request('gpt-4o', [
|
|
361
|
+
{ type: 'message', role: 'user', content: 'Generate a developer profile' }
|
|
362
|
+
], {
|
|
363
|
+
responseFormat: {
|
|
364
|
+
type: 'json_schema',
|
|
365
|
+
json_schema: {
|
|
366
|
+
name: 'developer_profile',
|
|
367
|
+
schema: schema,
|
|
368
|
+
strict: true
|
|
369
|
+
}
|
|
198
370
|
}
|
|
199
371
|
});
|
|
200
372
|
```
|
|
201
373
|
|
|
202
|
-
###
|
|
374
|
+
### 5. Image Processing
|
|
203
375
|
|
|
204
|
-
|
|
376
|
+
```typescript
|
|
377
|
+
// Analyze images with vision models
|
|
378
|
+
const imageStream = request('gpt-4o', [
|
|
379
|
+
{
|
|
380
|
+
type: 'message',
|
|
381
|
+
role: 'user',
|
|
382
|
+
content: [
|
|
383
|
+
{ type: 'input_text', text: 'What\'s in this image? Describe any text you see.' },
|
|
384
|
+
{
|
|
385
|
+
type: 'input_image',
|
|
386
|
+
image_url: 'data:image/jpeg;base64,...',
|
|
387
|
+
detail: 'high' // 'auto' | 'low' | 'high'
|
|
388
|
+
}
|
|
389
|
+
]
|
|
390
|
+
}
|
|
391
|
+
], {
|
|
392
|
+
maxImageDimension: 2048 // Auto-resize large images
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// Multiple images
|
|
396
|
+
const comparison = request('claude-3.5-sonnet', [
|
|
397
|
+
{
|
|
398
|
+
type: 'message',
|
|
399
|
+
role: 'user',
|
|
400
|
+
content: [
|
|
401
|
+
{ type: 'input_text', text: 'Compare these two designs:' },
|
|
402
|
+
{ type: 'input_image', image_url: 'https://example.com/design1.png' },
|
|
403
|
+
{ type: 'input_image', image_url: 'https://example.com/design2.png' }
|
|
404
|
+
]
|
|
405
|
+
}
|
|
406
|
+
]);
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
### 6. Error Handling & Resilience
|
|
410
|
+
|
|
411
|
+
```typescript
|
|
412
|
+
import { isRateLimitError, isAuthenticationError } from '@just-every/ensemble';
|
|
413
|
+
|
|
414
|
+
async function robustRequest(model: string, messages: ResponseInput, options?: RequestOptions) {
|
|
415
|
+
const maxRetries = 3;
|
|
416
|
+
let lastError;
|
|
417
|
+
|
|
418
|
+
for (let i = 0; i < maxRetries; i++) {
|
|
419
|
+
try {
|
|
420
|
+
const events = [];
|
|
421
|
+
for await (const event of request(model, messages, options)) {
|
|
422
|
+
if (event.type === 'error') {
|
|
423
|
+
throw event.error;
|
|
424
|
+
}
|
|
425
|
+
events.push(event);
|
|
426
|
+
}
|
|
427
|
+
return events;
|
|
428
|
+
|
|
429
|
+
} catch (error) {
|
|
430
|
+
lastError = error;
|
|
431
|
+
|
|
432
|
+
if (isAuthenticationError(error)) {
|
|
433
|
+
throw error; // Don't retry auth errors
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (isRateLimitError(error)) {
|
|
437
|
+
const waitTime = error.retryAfter || Math.pow(2, i) * 1000;
|
|
438
|
+
console.log(`Rate limited. Waiting ${waitTime}ms...`);
|
|
439
|
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Try fallback model
|
|
444
|
+
if (options?.fallbackModels?.[i]) {
|
|
445
|
+
model = options.fallbackModels[i];
|
|
446
|
+
console.log(`Falling back to ${model}`);
|
|
447
|
+
continue;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
throw lastError;
|
|
453
|
+
}
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
## Utilities
|
|
457
|
+
|
|
458
|
+
### Cost & Usage Tracking
|
|
459
|
+
|
|
460
|
+
```typescript
|
|
461
|
+
import { costTracker, quotaTracker } from '@just-every/ensemble';
|
|
462
|
+
|
|
463
|
+
// Track costs across requests
|
|
464
|
+
for await (const event of request('gpt-4o', messages)) {
|
|
465
|
+
if (event.type === 'cost_update') {
|
|
466
|
+
console.log(`Tokens: ${event.usage.input_tokens} in, ${event.usage.output_tokens} out`);
|
|
467
|
+
console.log(`Cost: $${event.usage.total_cost.toFixed(4)}`);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// Get cumulative costs
|
|
472
|
+
const usage = costTracker.getAllUsage();
|
|
473
|
+
for (const [model, stats] of Object.entries(usage)) {
|
|
474
|
+
console.log(`${model}: $${stats.total_cost.toFixed(2)} for ${stats.request_count} requests`);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Check quotas before making requests
|
|
478
|
+
if (quotaTracker.canMakeRequest('gpt-4o', 'openai')) {
|
|
479
|
+
// Safe to proceed
|
|
480
|
+
} else {
|
|
481
|
+
const resetTime = quotaTracker.getResetTime('openai');
|
|
482
|
+
console.log(`Quota exceeded. Resets at ${resetTime}`);
|
|
483
|
+
}
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
### Stream Conversion & Chaining
|
|
205
487
|
|
|
206
488
|
```typescript
|
|
207
489
|
import { convertStreamToMessages, chainRequests } from '@just-every/ensemble';
|
|
208
490
|
|
|
209
|
-
// Convert
|
|
210
|
-
const stream = request('claude-3
|
|
211
|
-
{ type: 'message', role: 'user', content: '
|
|
491
|
+
// Convert stream to conversation history
|
|
492
|
+
const stream = request('claude-3.5-sonnet', [
|
|
493
|
+
{ type: 'message', role: 'user', content: 'Write a haiku about coding' }
|
|
212
494
|
]);
|
|
213
495
|
|
|
214
496
|
const result = await convertStreamToMessages(stream);
|
|
215
|
-
console.log(result.messages);
|
|
216
|
-
console.log(result.fullResponse); //
|
|
497
|
+
console.log(result.messages); // Full conversation history
|
|
498
|
+
console.log(result.fullResponse); // Just the assistant's response
|
|
217
499
|
|
|
218
|
-
// Chain multiple
|
|
219
|
-
const
|
|
500
|
+
// Chain multiple models for multi-step tasks
|
|
501
|
+
const analysis = await chainRequests([
|
|
502
|
+
{
|
|
503
|
+
model: getModelFromClass('code'),
|
|
504
|
+
systemPrompt: 'Analyze this code for bugs and security issues',
|
|
505
|
+
},
|
|
220
506
|
{
|
|
221
|
-
model: '
|
|
222
|
-
systemPrompt: '
|
|
507
|
+
model: getModelFromClass('reasoning'),
|
|
508
|
+
systemPrompt: 'Prioritize the issues found and suggest fixes',
|
|
223
509
|
},
|
|
224
510
|
{
|
|
225
|
-
model: 'gpt-4o',
|
|
226
|
-
systemPrompt: '
|
|
511
|
+
model: 'gpt-4o-mini',
|
|
512
|
+
systemPrompt: 'Summarize the analysis in 3 bullet points',
|
|
227
513
|
}
|
|
228
514
|
], [
|
|
229
|
-
{ type: 'message', role: 'user', content:
|
|
515
|
+
{ type: 'message', role: 'user', content: codeToAnalyze }
|
|
230
516
|
]);
|
|
517
|
+
```
|
|
231
518
|
|
|
232
|
-
|
|
233
|
-
const streamWithTools = request('gpt-4o', messages, {
|
|
234
|
-
tools: [weatherTool]
|
|
235
|
-
});
|
|
519
|
+
### Image Utilities
|
|
236
520
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
521
|
+
```typescript
|
|
522
|
+
import { resizeImageForModel, imageToText } from '@just-every/ensemble';
|
|
523
|
+
|
|
524
|
+
// Auto-resize for specific model requirements
|
|
525
|
+
const resized = await resizeImageForModel(
|
|
526
|
+
base64ImageData,
|
|
527
|
+
'gpt-4o', // Different models have different size limits
|
|
528
|
+
{ maxDimension: 2048 }
|
|
529
|
+
);
|
|
530
|
+
|
|
531
|
+
// Extract text from images
|
|
532
|
+
const extractedText = await imageToText(imageBuffer);
|
|
533
|
+
console.log('Found text:', extractedText);
|
|
248
534
|
```
|
|
249
535
|
|
|
250
|
-
### Logging
|
|
251
|
-
|
|
252
|
-
The ensemble package includes a pluggable logging system for LLM requests and responses:
|
|
536
|
+
### Logging & Debugging
|
|
253
537
|
|
|
254
538
|
```typescript
|
|
255
539
|
import { setEnsembleLogger, EnsembleLogger } from '@just-every/ensemble';
|
|
256
540
|
|
|
257
|
-
//
|
|
258
|
-
class
|
|
541
|
+
// Production-ready logger example
|
|
542
|
+
class ProductionLogger implements EnsembleLogger {
|
|
259
543
|
log_llm_request(agentId: string, providerName: string, model: string, requestData: unknown, timestamp?: Date): string {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
544
|
+
const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
545
|
+
|
|
546
|
+
// Log to your monitoring system
|
|
547
|
+
logger.info('LLM Request', {
|
|
548
|
+
requestId,
|
|
549
|
+
agentId,
|
|
550
|
+
provider: providerName,
|
|
551
|
+
model,
|
|
552
|
+
timestamp,
|
|
553
|
+
// Be careful not to log sensitive data
|
|
554
|
+
messageCount: (requestData as any).messages?.length,
|
|
555
|
+
hasTools: !!(requestData as any).tools?.length
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
return requestId;
|
|
263
559
|
}
|
|
264
560
|
|
|
265
561
|
log_llm_response(requestId: string | undefined, responseData: unknown, timestamp?: Date): void {
|
|
266
|
-
|
|
267
|
-
|
|
562
|
+
const response = responseData as any;
|
|
563
|
+
|
|
564
|
+
logger.info('LLM Response', {
|
|
565
|
+
requestId,
|
|
566
|
+
timestamp,
|
|
567
|
+
inputTokens: response.usage?.input_tokens,
|
|
568
|
+
outputTokens: response.usage?.output_tokens,
|
|
569
|
+
totalCost: response.usage?.total_cost,
|
|
570
|
+
cached: response.usage?.cache_creation_input_tokens > 0
|
|
571
|
+
});
|
|
268
572
|
}
|
|
269
573
|
|
|
270
574
|
log_llm_error(requestId: string | undefined, errorData: unknown, timestamp?: Date): void {
|
|
271
|
-
|
|
272
|
-
|
|
575
|
+
logger.error('LLM Error', {
|
|
576
|
+
requestId,
|
|
577
|
+
timestamp,
|
|
578
|
+
error: errorData,
|
|
579
|
+
// Include retry information if available
|
|
580
|
+
retryAfter: (errorData as any).retryAfter
|
|
581
|
+
});
|
|
273
582
|
}
|
|
274
583
|
}
|
|
275
584
|
|
|
276
|
-
// Enable logging
|
|
277
|
-
setEnsembleLogger(new
|
|
585
|
+
// Enable logging globally
|
|
586
|
+
setEnsembleLogger(new ProductionLogger());
|
|
587
|
+
|
|
588
|
+
// Debug mode for development
|
|
589
|
+
if (process.env.NODE_ENV === 'development') {
|
|
590
|
+
setEnsembleLogger({
|
|
591
|
+
log_llm_request: (agent, provider, model, data) => {
|
|
592
|
+
console.log(`[${new Date().toISOString()}] → ${provider}/${model}`);
|
|
593
|
+
return Date.now().toString();
|
|
594
|
+
},
|
|
595
|
+
log_llm_response: (id, data) => {
|
|
596
|
+
const response = data as any;
|
|
597
|
+
console.log(`[${new Date().toISOString()}] ← ${response.usage?.total_tokens} tokens`);
|
|
598
|
+
},
|
|
599
|
+
log_llm_error: (id, error) => {
|
|
600
|
+
console.error(`[${new Date().toISOString()}] ✗ Error:`, error);
|
|
601
|
+
}
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+
## Advanced Topics
|
|
607
|
+
|
|
608
|
+
### Custom Model Providers
|
|
609
|
+
|
|
610
|
+
```typescript
|
|
611
|
+
import { ModelProvider, registerExternalModel } from '@just-every/ensemble';
|
|
612
|
+
|
|
613
|
+
// Register a custom model
|
|
614
|
+
registerExternalModel({
|
|
615
|
+
id: 'my-custom-model',
|
|
616
|
+
provider: 'custom',
|
|
617
|
+
inputCost: 0.001,
|
|
618
|
+
outputCost: 0.002,
|
|
619
|
+
contextWindow: 8192,
|
|
620
|
+
maxOutput: 4096,
|
|
621
|
+
supportsTools: true,
|
|
622
|
+
supportsVision: false,
|
|
623
|
+
supportsStreaming: true
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
// Use your custom model
|
|
627
|
+
const stream = request('my-custom-model', messages);
|
|
628
|
+
```
|
|
278
629
|
|
|
279
|
-
|
|
630
|
+
### Performance Optimization
|
|
631
|
+
|
|
632
|
+
```typescript
|
|
633
|
+
// Batch processing with concurrency control
|
|
634
|
+
async function batchProcess(items: string[], concurrency = 3) {
|
|
635
|
+
const results = [];
|
|
636
|
+
const queue = [...items];
|
|
637
|
+
|
|
638
|
+
async function worker() {
|
|
639
|
+
while (queue.length > 0) {
|
|
640
|
+
const item = queue.shift()!;
|
|
641
|
+
const stream = request('gpt-4o-mini', [
|
|
642
|
+
{ type: 'message', role: 'user', content: `Process: ${item}` }
|
|
643
|
+
]);
|
|
644
|
+
|
|
645
|
+
const result = await convertStreamToMessages(stream);
|
|
646
|
+
results.push({ item, result: result.fullResponse });
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// Run workers concurrently
|
|
651
|
+
await Promise.all(Array(concurrency).fill(null).map(() => worker()));
|
|
652
|
+
return results;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// Stream multiple requests in parallel
|
|
656
|
+
async function parallelStreaming(prompts: string[]) {
|
|
657
|
+
const streams = prompts.map(prompt =>
|
|
658
|
+
request('claude-3.5-haiku', [
|
|
659
|
+
{ type: 'message', role: 'user', content: prompt }
|
|
660
|
+
])
|
|
661
|
+
);
|
|
662
|
+
|
|
663
|
+
// Process all streams concurrently
|
|
664
|
+
const results = await Promise.all(
|
|
665
|
+
streams.map(stream => convertStreamToMessages(stream))
|
|
666
|
+
);
|
|
667
|
+
|
|
668
|
+
return results.map(r => r.fullResponse);
|
|
669
|
+
}
|
|
280
670
|
```
|
|
281
671
|
|
|
282
672
|
## Environment Variables
|