@just-every/ensemble 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +482 -100
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -104,187 +104,569 @@ for await (const event of earlyStream) {
|
|
|
104
104
|
|
|
105
105
|
## API Reference
|
|
106
106
|
|
|
107
|
-
###
|
|
107
|
+
### Core Functions
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
#### `request(model, messages, options?)`
|
|
110
|
+
|
|
111
|
+
Main function for making LLM requests with streaming responses and automatic tool execution.
|
|
110
112
|
|
|
111
113
|
**Parameters:**
|
|
112
|
-
- `model` (string): Model identifier
|
|
113
|
-
- `messages` (ResponseInput): Array of message objects
|
|
114
|
+
- `model` (string): Model identifier (e.g., 'gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash')
|
|
115
|
+
- `messages` (ResponseInput): Array of message objects in the conversation
|
|
114
116
|
- `options` (RequestOptions): Optional configuration object
|
|
115
117
|
|
|
116
118
|
**Returns:** `AsyncGenerator<EnsembleStreamEvent>` - An async generator that yields streaming events
|
|
117
119
|
|
|
118
120
|
```typescript
|
|
119
121
|
interface RequestOptions {
|
|
120
|
-
agentId?: string;
|
|
121
|
-
tools?: ToolFunction[];
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
agentId?: string; // Identifier for logging/tracking
|
|
123
|
+
tools?: ToolFunction[]; // Array of tool definitions
|
|
124
|
+
toolChoice?: ToolChoice; // Control tool selection behavior
|
|
125
|
+
maxToolCalls?: number; // Max rounds of tool execution (default: 10, 0 = disabled)
|
|
126
|
+
processToolCall?: (toolCalls: ToolCall[]) => Promise<any>; // Custom tool handler
|
|
127
|
+
modelSettings?: ModelSettings; // Temperature, maxTokens, etc.
|
|
128
|
+
modelClass?: ModelClassID; // 'standard' | 'code' | 'reasoning' | 'monologue'
|
|
129
|
+
responseFormat?: ResponseFormat; // JSON mode or structured output
|
|
130
|
+
maxImageDimension?: number; // Auto-resize images (default: provider-specific)
|
|
131
|
+
fallbackModels?: string[]; // Models to try if primary fails
|
|
124
132
|
}
|
|
125
133
|
|
|
126
|
-
//
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
}
|
|
134
|
+
// Stream event types
|
|
135
|
+
type EnsembleStreamEvent =
|
|
136
|
+
| { type: 'text_delta', delta: string }
|
|
137
|
+
| { type: 'text', text: string }
|
|
138
|
+
| { type: 'message_delta', content: string }
|
|
139
|
+
| { type: 'message_complete', content: string }
|
|
140
|
+
| { type: 'tool_start', tool_calls: ToolCall[] }
|
|
141
|
+
| { type: 'cost_update', usage: TokenUsage }
|
|
142
|
+
| { type: 'stream_end', timestamp: string }
|
|
143
|
+
| { type: 'error', error: Error };
|
|
134
144
|
```
|
|
135
145
|
|
|
136
146
|
|
|
137
|
-
###
|
|
147
|
+
### Working with Models
|
|
138
148
|
|
|
139
|
-
|
|
149
|
+
#### Model Selection
|
|
140
150
|
|
|
141
151
|
```typescript
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
152
|
+
import { getModelFromClass, findModel, MODEL_REGISTRY } from '@just-every/ensemble';
|
|
153
|
+
|
|
154
|
+
// Get best model for a specific task type
|
|
155
|
+
const codeModel = getModelFromClass('code'); // Returns best available code model
|
|
156
|
+
const reasoningModel = getModelFromClass('reasoning'); // For complex reasoning tasks
|
|
157
|
+
|
|
158
|
+
// Check if a model exists
|
|
159
|
+
const modelInfo = findModel('gpt-4o');
|
|
160
|
+
if (modelInfo) {
|
|
161
|
+
console.log(`Provider: ${modelInfo.provider}`);
|
|
162
|
+
console.log(`Input cost: $${modelInfo.inputCost}/million tokens`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// List all available models
|
|
166
|
+
for (const [modelName, info] of Object.entries(MODEL_REGISTRY)) {
|
|
167
|
+
console.log(`${modelName}: ${info.provider}`);
|
|
148
168
|
}
|
|
149
169
|
```
|
|
150
170
|
|
|
151
|
-
|
|
171
|
+
#### Model Classes
|
|
172
|
+
|
|
173
|
+
- **standard**: General-purpose models for everyday tasks
|
|
174
|
+
- **code**: Optimized for programming and technical tasks
|
|
175
|
+
- **reasoning**: Advanced models for complex logical reasoning
|
|
176
|
+
- **monologue**: Models supporting extended thinking/reasoning traces
|
|
152
177
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
178
|
+
### Message Types
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
// User/Assistant messages
|
|
182
|
+
interface TextMessage {
|
|
183
|
+
type: 'message';
|
|
184
|
+
role: 'user' | 'assistant' | 'developer';
|
|
185
|
+
content: string | MessageContent[];
|
|
186
|
+
status?: 'completed' | 'in_progress';
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Multi-modal content
|
|
190
|
+
type MessageContent =
|
|
191
|
+
| { type: 'input_text', text: string }
|
|
192
|
+
| { type: 'input_image', image_url: string, detail?: 'auto' | 'low' | 'high' }
|
|
193
|
+
| { type: 'tool_use', id: string, name: string, arguments: any };
|
|
194
|
+
|
|
195
|
+
// Tool-related messages
|
|
196
|
+
interface FunctionCall {
|
|
197
|
+
type: 'function_call';
|
|
198
|
+
id: string;
|
|
199
|
+
name: string;
|
|
200
|
+
arguments: string;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
interface FunctionCallOutput {
|
|
204
|
+
type: 'function_call_output';
|
|
205
|
+
id: string;
|
|
206
|
+
output: string;
|
|
207
|
+
}
|
|
208
|
+
```
|
|
160
209
|
|
|
161
|
-
|
|
210
|
+
## Common Use Cases
|
|
162
211
|
|
|
163
|
-
|
|
212
|
+
### 1. Basic Conversations
|
|
164
213
|
|
|
165
214
|
```typescript
|
|
166
215
|
import { request } from '@just-every/ensemble';
|
|
167
216
|
|
|
168
|
-
//
|
|
169
|
-
const
|
|
170
|
-
|
|
171
|
-
|
|
217
|
+
// Simple Q&A
|
|
218
|
+
for await (const event of request('gpt-4o-mini', [
|
|
219
|
+
{ type: 'message', role: 'user', content: 'Explain quantum computing in simple terms' }
|
|
220
|
+
])) {
|
|
221
|
+
if (event.type === 'text_delta') {
|
|
222
|
+
process.stdout.write(event.delta);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Multi-turn conversation
|
|
227
|
+
const messages = [
|
|
228
|
+
{ type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
|
|
229
|
+
{ type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
|
|
230
|
+
{ type: 'message', role: 'assistant', content: 'Here are several ways...' },
|
|
231
|
+
{ type: 'message', role: 'user', content: 'What about using flexbox?' }
|
|
232
|
+
];
|
|
233
|
+
|
|
234
|
+
for await (const event of request('claude-3.5-sonnet', messages)) {
|
|
235
|
+
// Handle streaming response
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### 2. Tool Calling & Function Execution
|
|
240
|
+
|
|
241
|
+
```typescript
|
|
242
|
+
// Define tools with TypeScript types
|
|
243
|
+
interface WeatherParams {
|
|
244
|
+
city: string;
|
|
245
|
+
unit?: 'celsius' | 'fahrenheit';
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const weatherTool: ToolFunction = {
|
|
249
|
+
function: async ({ city, unit = 'celsius' }: WeatherParams) => {
|
|
250
|
+
// Real implementation would call weather API
|
|
251
|
+
const temp = unit === 'celsius' ? 22 : 72;
|
|
252
|
+
return `${temp}°${unit[0].toUpperCase()} in ${city}`;
|
|
172
253
|
},
|
|
173
254
|
definition: {
|
|
174
255
|
type: 'function',
|
|
175
256
|
function: {
|
|
176
257
|
name: 'get_weather',
|
|
177
|
-
description: 'Get weather for a city',
|
|
258
|
+
description: 'Get current weather for a city',
|
|
178
259
|
parameters: {
|
|
179
260
|
type: 'object',
|
|
180
261
|
properties: {
|
|
181
|
-
city: { type: 'string', description: 'City name' }
|
|
262
|
+
city: { type: 'string', description: 'City name' },
|
|
263
|
+
unit: {
|
|
264
|
+
type: 'string',
|
|
265
|
+
enum: ['celsius', 'fahrenheit'],
|
|
266
|
+
description: 'Temperature unit'
|
|
267
|
+
}
|
|
182
268
|
},
|
|
183
269
|
required: ['city']
|
|
184
270
|
}
|
|
185
271
|
}
|
|
186
272
|
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
//
|
|
190
|
-
const
|
|
191
|
-
{ type: 'message', role: 'user', content: 'What\'s the weather in
|
|
192
|
-
], {
|
|
193
|
-
|
|
194
|
-
|
|
273
|
+
};
|
|
274
|
+
|
|
275
|
+
// Use with automatic execution
|
|
276
|
+
for await (const event of request('gpt-4o', [
|
|
277
|
+
{ type: 'message', role: 'user', content: 'What\'s the weather in Tokyo and New York?' }
|
|
278
|
+
], { tools: [weatherTool] })) {
|
|
279
|
+
if (event.type === 'tool_start') {
|
|
280
|
+
console.log('Calling tool:', event.tool_calls[0].function.name);
|
|
281
|
+
} else if (event.type === 'text_delta') {
|
|
282
|
+
process.stdout.write(event.delta);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### 3. Model Selection Strategies
|
|
288
|
+
|
|
289
|
+
```typescript
|
|
290
|
+
import { getModelFromClass, request } from '@just-every/ensemble';
|
|
291
|
+
|
|
292
|
+
// Route based on task type
|
|
293
|
+
async function intelligentRequest(task: string, messages: ResponseInput) {
|
|
294
|
+
let model: string;
|
|
295
|
+
|
|
296
|
+
if (task.includes('code') || task.includes('debug')) {
|
|
297
|
+
model = getModelFromClass('code'); // Best code model
|
|
298
|
+
} else if (task.includes('analyze') || task.includes('reasoning')) {
|
|
299
|
+
model = getModelFromClass('reasoning'); // Best reasoning model
|
|
300
|
+
} else {
|
|
301
|
+
model = getModelFromClass('standard'); // Cost-effective general model
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
console.log(`Using ${model} for ${task}`);
|
|
305
|
+
|
|
306
|
+
return request(model, messages, {
|
|
307
|
+
fallbackModels: ['gpt-4o-mini', 'claude-3-5-haiku'] // Fallback options
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Use model rotation for consensus
|
|
312
|
+
async function consensusRequest(messages: ResponseInput) {
|
|
313
|
+
const models = ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash'];
|
|
314
|
+
const responses = [];
|
|
315
|
+
|
|
316
|
+
for (const model of models) {
|
|
317
|
+
const stream = request(model, messages);
|
|
318
|
+
const result = await convertStreamToMessages(stream);
|
|
319
|
+
responses.push(result.fullResponse);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Analyze responses for consensus
|
|
323
|
+
return analyzeConsensus(responses);
|
|
324
|
+
}
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
### 4. Structured Output & JSON Mode
|
|
328
|
+
|
|
329
|
+
```typescript
|
|
330
|
+
// JSON mode for reliable parsing
|
|
331
|
+
const jsonStream = request('gpt-4o', [
|
|
332
|
+
{ type: 'message', role: 'user', content: 'List 3 programming languages with their pros/cons as JSON' }
|
|
333
|
+
], {
|
|
334
|
+
responseFormat: { type: 'json_object' }
|
|
195
335
|
});
|
|
196
336
|
|
|
197
|
-
|
|
337
|
+
let jsonContent = '';
|
|
338
|
+
for await (const event of jsonStream) {
|
|
339
|
+
if (event.type === 'text_delta') {
|
|
340
|
+
jsonContent += event.delta;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
198
343
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
344
|
+
const data = JSON.parse(jsonContent);
|
|
345
|
+
|
|
346
|
+
// Structured output with schema validation
|
|
347
|
+
const schema = {
|
|
348
|
+
type: 'object',
|
|
349
|
+
properties: {
|
|
350
|
+
name: { type: 'string' },
|
|
351
|
+
age: { type: 'number' },
|
|
352
|
+
skills: {
|
|
353
|
+
type: 'array',
|
|
354
|
+
items: { type: 'string' }
|
|
355
|
+
}
|
|
356
|
+
},
|
|
357
|
+
required: ['name', 'age', 'skills']
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
const structuredStream = request('gpt-4o', [
|
|
361
|
+
{ type: 'message', role: 'user', content: 'Generate a developer profile' }
|
|
362
|
+
], {
|
|
363
|
+
responseFormat: {
|
|
364
|
+
type: 'json_schema',
|
|
365
|
+
json_schema: {
|
|
366
|
+
name: 'developer_profile',
|
|
367
|
+
schema: schema,
|
|
368
|
+
strict: true
|
|
369
|
+
}
|
|
206
370
|
}
|
|
207
371
|
});
|
|
208
372
|
```
|
|
209
373
|
|
|
210
|
-
###
|
|
374
|
+
### 5. Image Processing
|
|
211
375
|
|
|
212
|
-
|
|
376
|
+
```typescript
|
|
377
|
+
// Analyze images with vision models
|
|
378
|
+
const imageStream = request('gpt-4o', [
|
|
379
|
+
{
|
|
380
|
+
type: 'message',
|
|
381
|
+
role: 'user',
|
|
382
|
+
content: [
|
|
383
|
+
{ type: 'input_text', text: 'What\'s in this image? Describe any text you see.' },
|
|
384
|
+
{
|
|
385
|
+
type: 'input_image',
|
|
386
|
+
image_url: 'data:image/jpeg;base64,...',
|
|
387
|
+
detail: 'high' // 'auto' | 'low' | 'high'
|
|
388
|
+
}
|
|
389
|
+
]
|
|
390
|
+
}
|
|
391
|
+
], {
|
|
392
|
+
maxImageDimension: 2048 // Auto-resize large images
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// Multiple images
|
|
396
|
+
const comparison = request('claude-3.5-sonnet', [
|
|
397
|
+
{
|
|
398
|
+
type: 'message',
|
|
399
|
+
role: 'user',
|
|
400
|
+
content: [
|
|
401
|
+
{ type: 'input_text', text: 'Compare these two designs:' },
|
|
402
|
+
{ type: 'input_image', image_url: 'https://example.com/design1.png' },
|
|
403
|
+
{ type: 'input_image', image_url: 'https://example.com/design2.png' }
|
|
404
|
+
]
|
|
405
|
+
}
|
|
406
|
+
]);
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
### 6. Error Handling & Resilience
|
|
410
|
+
|
|
411
|
+
```typescript
|
|
412
|
+
import { isRateLimitError, isAuthenticationError } from '@just-every/ensemble';
|
|
413
|
+
|
|
414
|
+
async function robustRequest(model: string, messages: ResponseInput, options?: RequestOptions) {
|
|
415
|
+
const maxRetries = 3;
|
|
416
|
+
let lastError;
|
|
417
|
+
|
|
418
|
+
for (let i = 0; i < maxRetries; i++) {
|
|
419
|
+
try {
|
|
420
|
+
const events = [];
|
|
421
|
+
for await (const event of request(model, messages, options)) {
|
|
422
|
+
if (event.type === 'error') {
|
|
423
|
+
throw event.error;
|
|
424
|
+
}
|
|
425
|
+
events.push(event);
|
|
426
|
+
}
|
|
427
|
+
return events;
|
|
428
|
+
|
|
429
|
+
} catch (error) {
|
|
430
|
+
lastError = error;
|
|
431
|
+
|
|
432
|
+
if (isAuthenticationError(error)) {
|
|
433
|
+
throw error; // Don't retry auth errors
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (isRateLimitError(error)) {
|
|
437
|
+
const waitTime = error.retryAfter || Math.pow(2, i) * 1000;
|
|
438
|
+
console.log(`Rate limited. Waiting ${waitTime}ms...`);
|
|
439
|
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Try fallback model
|
|
444
|
+
if (options?.fallbackModels?.[i]) {
|
|
445
|
+
model = options.fallbackModels[i];
|
|
446
|
+
console.log(`Falling back to ${model}`);
|
|
447
|
+
continue;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
throw lastError;
|
|
453
|
+
}
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
## Utilities
|
|
457
|
+
|
|
458
|
+
### Cost & Usage Tracking
|
|
459
|
+
|
|
460
|
+
```typescript
|
|
461
|
+
import { costTracker, quotaTracker } from '@just-every/ensemble';
|
|
462
|
+
|
|
463
|
+
// Track costs across requests
|
|
464
|
+
for await (const event of request('gpt-4o', messages)) {
|
|
465
|
+
if (event.type === 'cost_update') {
|
|
466
|
+
console.log(`Tokens: ${event.usage.input_tokens} in, ${event.usage.output_tokens} out`);
|
|
467
|
+
console.log(`Cost: $${event.usage.total_cost.toFixed(4)}`);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// Get cumulative costs
|
|
472
|
+
const usage = costTracker.getAllUsage();
|
|
473
|
+
for (const [model, stats] of Object.entries(usage)) {
|
|
474
|
+
console.log(`${model}: $${stats.total_cost.toFixed(2)} for ${stats.request_count} requests`);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Check quotas before making requests
|
|
478
|
+
if (quotaTracker.canMakeRequest('gpt-4o', 'openai')) {
|
|
479
|
+
// Safe to proceed
|
|
480
|
+
} else {
|
|
481
|
+
const resetTime = quotaTracker.getResetTime('openai');
|
|
482
|
+
console.log(`Quota exceeded. Resets at ${resetTime}`);
|
|
483
|
+
}
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
### Stream Conversion & Chaining
|
|
213
487
|
|
|
214
488
|
```typescript
|
|
215
489
|
import { convertStreamToMessages, chainRequests } from '@just-every/ensemble';
|
|
216
490
|
|
|
217
|
-
// Convert
|
|
218
|
-
const stream = request('claude-3
|
|
219
|
-
{ type: 'message', role: 'user', content: '
|
|
491
|
+
// Convert stream to conversation history
|
|
492
|
+
const stream = request('claude-3.5-sonnet', [
|
|
493
|
+
{ type: 'message', role: 'user', content: 'Write a haiku about coding' }
|
|
220
494
|
]);
|
|
221
495
|
|
|
222
496
|
const result = await convertStreamToMessages(stream);
|
|
223
|
-
console.log(result.messages);
|
|
224
|
-
console.log(result.fullResponse); //
|
|
497
|
+
console.log(result.messages); // Full conversation history
|
|
498
|
+
console.log(result.fullResponse); // Just the assistant's response
|
|
225
499
|
|
|
226
|
-
// Chain multiple
|
|
227
|
-
const
|
|
500
|
+
// Chain multiple models for multi-step tasks
|
|
501
|
+
const analysis = await chainRequests([
|
|
502
|
+
{
|
|
503
|
+
model: getModelFromClass('code'),
|
|
504
|
+
systemPrompt: 'Analyze this code for bugs and security issues',
|
|
505
|
+
},
|
|
228
506
|
{
|
|
229
|
-
model: '
|
|
230
|
-
systemPrompt: '
|
|
507
|
+
model: getModelFromClass('reasoning'),
|
|
508
|
+
systemPrompt: 'Prioritize the issues found and suggest fixes',
|
|
231
509
|
},
|
|
232
510
|
{
|
|
233
|
-
model: 'gpt-4o',
|
|
234
|
-
systemPrompt: '
|
|
511
|
+
model: 'gpt-4o-mini',
|
|
512
|
+
systemPrompt: 'Summarize the analysis in 3 bullet points',
|
|
235
513
|
}
|
|
236
514
|
], [
|
|
237
|
-
{ type: 'message', role: 'user', content:
|
|
515
|
+
{ type: 'message', role: 'user', content: codeToAnalyze }
|
|
238
516
|
]);
|
|
517
|
+
```
|
|
239
518
|
|
|
240
|
-
|
|
241
|
-
const streamWithTools = request('gpt-4o', messages, {
|
|
242
|
-
tools: [weatherTool]
|
|
243
|
-
});
|
|
519
|
+
### Image Utilities
|
|
244
520
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
521
|
+
```typescript
|
|
522
|
+
import { resizeImageForModel, imageToText } from '@just-every/ensemble';
|
|
523
|
+
|
|
524
|
+
// Auto-resize for specific model requirements
|
|
525
|
+
const resized = await resizeImageForModel(
|
|
526
|
+
base64ImageData,
|
|
527
|
+
'gpt-4o', // Different models have different size limits
|
|
528
|
+
{ maxDimension: 2048 }
|
|
529
|
+
);
|
|
530
|
+
|
|
531
|
+
// Extract text from images
|
|
532
|
+
const extractedText = await imageToText(imageBuffer);
|
|
533
|
+
console.log('Found text:', extractedText);
|
|
256
534
|
```
|
|
257
535
|
|
|
258
|
-
### Logging
|
|
259
|
-
|
|
260
|
-
The ensemble package includes a pluggable logging system for LLM requests and responses:
|
|
536
|
+
### Logging & Debugging
|
|
261
537
|
|
|
262
538
|
```typescript
|
|
263
539
|
import { setEnsembleLogger, EnsembleLogger } from '@just-every/ensemble';
|
|
264
540
|
|
|
265
|
-
//
|
|
266
|
-
class
|
|
541
|
+
// Production-ready logger example
|
|
542
|
+
class ProductionLogger implements EnsembleLogger {
|
|
267
543
|
log_llm_request(agentId: string, providerName: string, model: string, requestData: unknown, timestamp?: Date): string {
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
544
|
+
const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
545
|
+
|
|
546
|
+
// Log to your monitoring system
|
|
547
|
+
logger.info('LLM Request', {
|
|
548
|
+
requestId,
|
|
549
|
+
agentId,
|
|
550
|
+
provider: providerName,
|
|
551
|
+
model,
|
|
552
|
+
timestamp,
|
|
553
|
+
// Be careful not to log sensitive data
|
|
554
|
+
messageCount: (requestData as any).messages?.length,
|
|
555
|
+
hasTools: !!(requestData as any).tools?.length
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
return requestId;
|
|
271
559
|
}
|
|
272
560
|
|
|
273
561
|
log_llm_response(requestId: string | undefined, responseData: unknown, timestamp?: Date): void {
|
|
274
|
-
|
|
275
|
-
|
|
562
|
+
const response = responseData as any;
|
|
563
|
+
|
|
564
|
+
logger.info('LLM Response', {
|
|
565
|
+
requestId,
|
|
566
|
+
timestamp,
|
|
567
|
+
inputTokens: response.usage?.input_tokens,
|
|
568
|
+
outputTokens: response.usage?.output_tokens,
|
|
569
|
+
totalCost: response.usage?.total_cost,
|
|
570
|
+
cached: response.usage?.cache_creation_input_tokens > 0
|
|
571
|
+
});
|
|
276
572
|
}
|
|
277
573
|
|
|
278
574
|
log_llm_error(requestId: string | undefined, errorData: unknown, timestamp?: Date): void {
|
|
279
|
-
|
|
280
|
-
|
|
575
|
+
logger.error('LLM Error', {
|
|
576
|
+
requestId,
|
|
577
|
+
timestamp,
|
|
578
|
+
error: errorData,
|
|
579
|
+
// Include retry information if available
|
|
580
|
+
retryAfter: (errorData as any).retryAfter
|
|
581
|
+
});
|
|
281
582
|
}
|
|
282
583
|
}
|
|
283
584
|
|
|
284
|
-
// Enable logging
|
|
285
|
-
setEnsembleLogger(new
|
|
585
|
+
// Enable logging globally
|
|
586
|
+
setEnsembleLogger(new ProductionLogger());
|
|
587
|
+
|
|
588
|
+
// Debug mode for development
|
|
589
|
+
if (process.env.NODE_ENV === 'development') {
|
|
590
|
+
setEnsembleLogger({
|
|
591
|
+
log_llm_request: (agent, provider, model, data) => {
|
|
592
|
+
console.log(`[${new Date().toISOString()}] → ${provider}/${model}`);
|
|
593
|
+
return Date.now().toString();
|
|
594
|
+
},
|
|
595
|
+
log_llm_response: (id, data) => {
|
|
596
|
+
const response = data as any;
|
|
597
|
+
console.log(`[${new Date().toISOString()}] ← ${response.usage?.total_tokens} tokens`);
|
|
598
|
+
},
|
|
599
|
+
log_llm_error: (id, error) => {
|
|
600
|
+
console.error(`[${new Date().toISOString()}] ✗ Error:`, error);
|
|
601
|
+
}
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
```
|
|
286
605
|
|
|
287
|
-
|
|
606
|
+
## Advanced Topics
|
|
607
|
+
|
|
608
|
+
### Custom Model Providers
|
|
609
|
+
|
|
610
|
+
```typescript
|
|
611
|
+
import { ModelProvider, registerExternalModel } from '@just-every/ensemble';
|
|
612
|
+
|
|
613
|
+
// Register a custom model
|
|
614
|
+
registerExternalModel({
|
|
615
|
+
id: 'my-custom-model',
|
|
616
|
+
provider: 'custom',
|
|
617
|
+
inputCost: 0.001,
|
|
618
|
+
outputCost: 0.002,
|
|
619
|
+
contextWindow: 8192,
|
|
620
|
+
maxOutput: 4096,
|
|
621
|
+
supportsTools: true,
|
|
622
|
+
supportsVision: false,
|
|
623
|
+
supportsStreaming: true
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
// Use your custom model
|
|
627
|
+
const stream = request('my-custom-model', messages);
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+
### Performance Optimization
|
|
631
|
+
|
|
632
|
+
```typescript
|
|
633
|
+
// Batch processing with concurrency control
|
|
634
|
+
async function batchProcess(items: string[], concurrency = 3) {
|
|
635
|
+
const results = [];
|
|
636
|
+
const queue = [...items];
|
|
637
|
+
|
|
638
|
+
async function worker() {
|
|
639
|
+
while (queue.length > 0) {
|
|
640
|
+
const item = queue.shift()!;
|
|
641
|
+
const stream = request('gpt-4o-mini', [
|
|
642
|
+
{ type: 'message', role: 'user', content: `Process: ${item}` }
|
|
643
|
+
]);
|
|
644
|
+
|
|
645
|
+
const result = await convertStreamToMessages(stream);
|
|
646
|
+
results.push({ item, result: result.fullResponse });
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// Run workers concurrently
|
|
651
|
+
await Promise.all(Array(concurrency).fill(null).map(() => worker()));
|
|
652
|
+
return results;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// Stream multiple requests in parallel
|
|
656
|
+
async function parallelStreaming(prompts: string[]) {
|
|
657
|
+
const streams = prompts.map(prompt =>
|
|
658
|
+
request('claude-3.5-haiku', [
|
|
659
|
+
{ type: 'message', role: 'user', content: prompt }
|
|
660
|
+
])
|
|
661
|
+
);
|
|
662
|
+
|
|
663
|
+
// Process all streams concurrently
|
|
664
|
+
const results = await Promise.all(
|
|
665
|
+
streams.map(stream => convertStreamToMessages(stream))
|
|
666
|
+
);
|
|
667
|
+
|
|
668
|
+
return results.map(r => r.fullResponse);
|
|
669
|
+
}
|
|
288
670
|
```
|
|
289
671
|
|
|
290
672
|
## Environment Variables
|