@just-every/ensemble 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -676
- package/dist/index.d.ts +9 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +23 -2
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -3,28 +3,7 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/@just-every/ensemble)
|
|
4
4
|
[](https://github.com/just-every/ensemble/actions)
|
|
5
5
|
|
|
6
|
-
A unified interface for interacting with multiple LLM providers
|
|
7
|
-
|
|
8
|
-
## Why Use an Ensemble Approach?
|
|
9
|
-
|
|
10
|
-
The ensemble pattern - rotating between multiple LLM providers dynamically - offers compelling advantages over relying on a single model. Research has shown that sampling multiple reasoning chains and using consensus answers can improve performance by double-digit margins on complex tasks. By automating this at runtime rather than prompt-engineering time, ensemble delivers more reliable and robust AI interactions.
|
|
11
|
-
|
|
12
|
-
Beyond accuracy improvements, ensemble requests provide practical benefits for production systems. Different models carry unique training biases and stylistic patterns - rotating between them dilutes individual quirks and prevents conversations from getting "stuck" in one voice. The approach also ensures resilience: when one provider experiences an outage, quota limit, or latency spike, requests seamlessly route to alternatives. You can optimize costs by routing simple tasks to cheaper models while reserving premium models for complex reasoning. Need regex help? Route to a code-specialized model. Need emotional calibration? Use a dialogue expert. The ensemble gives you this granularity without complex conditional logic.
|
|
13
|
-
|
|
14
|
-
Perhaps most importantly, the ensemble approach future-proofs your application. Model quality and pricing change weekly in the fast-moving LLM landscape. With ensemble, you can trial newcomers on a small percentage of traffic, compare real metrics, then scale up or roll back within minutes - all without changing your code.
|
|
15
|
-
|
|
16
|
-
## Features
|
|
17
|
-
|
|
18
|
-
- **Multi-provider support**: Claude, OpenAI, Gemini, Deepseek, Grok, OpenRouter
|
|
19
|
-
- **AsyncGenerator API**: Clean, native async iteration for streaming responses
|
|
20
|
-
- **Simple interface**: Direct async generator pattern matches native LLM APIs
|
|
21
|
-
- **Tool calling**: Function calling support where available
|
|
22
|
-
- **Stream conversion**: Convert streaming events to conversation history for chaining
|
|
23
|
-
- **Image processing**: Image-to-text and image utilities
|
|
24
|
-
- **Cost tracking**: Token usage and cost monitoring
|
|
25
|
-
- **Quota management**: Rate limiting and usage tracking
|
|
26
|
-
- **Pluggable logging**: Configurable request/response logging
|
|
27
|
-
- **Type safety**: Full TypeScript support
|
|
6
|
+
A unified interface for interacting with multiple LLM providers (OpenAI, Anthropic, Google, etc.) with streaming support, tool calling, and embeddings.
|
|
28
7
|
|
|
29
8
|
## Installation
|
|
30
9
|
|
|
@@ -32,723 +11,144 @@ Perhaps most importantly, the ensemble approach future-proofs your application.
|
|
|
32
11
|
npm install @just-every/ensemble
|
|
33
12
|
```
|
|
34
13
|
|
|
35
|
-
### Migration from OpenAI SDK
|
|
36
|
-
|
|
37
|
-
If you're currently using the OpenAI SDK, migration is simple:
|
|
38
|
-
|
|
39
|
-
```typescript
|
|
40
|
-
// Before:
|
|
41
|
-
import OpenAI from 'openai';
|
|
42
|
-
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
43
|
-
|
|
44
|
-
// After:
|
|
45
|
-
import OpenAIEnsemble from '@just-every/ensemble/openai-compat';
|
|
46
|
-
const client = OpenAIEnsemble;
|
|
47
|
-
|
|
48
|
-
// Your existing code works unchanged!
|
|
49
|
-
const completion = await client.chat.completions.create({ /* ... */ });
|
|
50
|
-
```
|
|
51
|
-
|
|
52
14
|
## Quick Start
|
|
53
15
|
|
|
54
16
|
```typescript
|
|
55
17
|
import { request } from '@just-every/ensemble';
|
|
56
18
|
|
|
57
|
-
// Simple request
|
|
58
|
-
const
|
|
59
|
-
{ type: 'message', role: 'user', content: 'Hello
|
|
60
|
-
])
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
for await (const event of stream) {
|
|
64
|
-
if (event.type === 'message_delta') {
|
|
65
|
-
console.log(event.content);
|
|
66
|
-
} else if (event.type === 'message_complete') {
|
|
67
|
-
console.log('Request completed!');
|
|
68
|
-
} else if (event.type === 'error') {
|
|
69
|
-
console.error('Request failed:', event.error);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// With tools
|
|
74
|
-
const toolStream = request('gpt-4o', [
|
|
75
|
-
{ type: 'message', role: 'user', content: 'What is the weather?' }
|
|
76
|
-
], {
|
|
77
|
-
tools: [{
|
|
78
|
-
function: async (location: string) => {
|
|
79
|
-
// Tool implementation
|
|
80
|
-
return `Weather in ${location}: Sunny, 72°F`;
|
|
81
|
-
},
|
|
82
|
-
definition: {
|
|
83
|
-
type: 'function',
|
|
84
|
-
function: {
|
|
85
|
-
name: 'get_weather',
|
|
86
|
-
description: 'Get current weather',
|
|
87
|
-
parameters: {
|
|
88
|
-
type: 'object',
|
|
89
|
-
properties: {
|
|
90
|
-
location: { type: 'string' }
|
|
91
|
-
},
|
|
92
|
-
required: ['location']
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
}]
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
// Process tool calls
|
|
100
|
-
for await (const event of toolStream) {
|
|
101
|
-
if (event.type === 'tool_start') {
|
|
102
|
-
console.log('Tool called:', event.tool_calls[0].function.name);
|
|
103
|
-
} else if (event.type === 'message_delta') {
|
|
104
|
-
console.log(event.content);
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Early termination
|
|
109
|
-
const earlyStream = request('claude-3-5-sonnet-20241022', [
|
|
110
|
-
{ type: 'message', role: 'user', content: 'Count to 100' }
|
|
111
|
-
]);
|
|
112
|
-
|
|
113
|
-
let count = 0;
|
|
114
|
-
for await (const event of earlyStream) {
|
|
115
|
-
if (event.type === 'message_delta') {
|
|
116
|
-
count++;
|
|
117
|
-
if (count >= 10) break; // Stop after 10 events
|
|
19
|
+
// Simple streaming request
|
|
20
|
+
for await (const event of request('gpt-4o-mini', [
|
|
21
|
+
{ type: 'message', role: 'user', content: 'Hello!' }
|
|
22
|
+
])) {
|
|
23
|
+
if (event.type === 'text_delta') {
|
|
24
|
+
process.stdout.write(event.delta);
|
|
118
25
|
}
|
|
119
26
|
}
|
|
120
27
|
```
|
|
121
28
|
|
|
122
|
-
##
|
|
123
|
-
|
|
124
|
-
### Core Functions
|
|
125
|
-
|
|
126
|
-
#### `request(model, messages, options?)`
|
|
127
|
-
|
|
128
|
-
Main function for making LLM requests with streaming responses and automatic tool execution.
|
|
129
|
-
|
|
130
|
-
**Parameters:**
|
|
131
|
-
- `model` (string): Model identifier (e.g., 'gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash')
|
|
132
|
-
- `messages` (ResponseInput): Array of message objects in the conversation
|
|
133
|
-
- `options` (RequestOptions): Optional configuration object
|
|
134
|
-
|
|
135
|
-
**Returns:** `AsyncGenerator<EnsembleStreamEvent>` - An async generator that yields streaming events
|
|
136
|
-
|
|
137
|
-
```typescript
|
|
138
|
-
interface RequestOptions {
|
|
139
|
-
agentId?: string; // Identifier for logging/tracking
|
|
140
|
-
tools?: ToolFunction[]; // Array of tool definitions
|
|
141
|
-
toolChoice?: ToolChoice; // Control tool selection behavior
|
|
142
|
-
maxToolCalls?: number; // Max rounds of tool execution (default: 10, 0 = disabled)
|
|
143
|
-
processToolCall?: (toolCalls: ToolCall[]) => Promise<any>; // Custom tool handler
|
|
144
|
-
modelSettings?: ModelSettings; // Temperature, maxTokens, etc.
|
|
145
|
-
modelClass?: ModelClassID; // 'standard' | 'code' | 'reasoning' | 'monologue'
|
|
146
|
-
responseFormat?: ResponseFormat; // JSON mode or structured output
|
|
147
|
-
maxImageDimension?: number; // Auto-resize images (default: provider-specific)
|
|
148
|
-
fallbackModels?: string[]; // Models to try if primary fails
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Stream event types
|
|
152
|
-
type EnsembleStreamEvent =
|
|
153
|
-
| { type: 'text_delta', delta: string }
|
|
154
|
-
| { type: 'text', text: string }
|
|
155
|
-
| { type: 'message_delta', content: string }
|
|
156
|
-
| { type: 'message_complete', content: string }
|
|
157
|
-
| { type: 'tool_start', tool_calls: ToolCall[] }
|
|
158
|
-
| { type: 'cost_update', usage: TokenUsage }
|
|
159
|
-
| { type: 'stream_end', timestamp: string }
|
|
160
|
-
| { type: 'error', error: Error };
|
|
161
|
-
```
|
|
162
|
-
|
|
29
|
+
## Core Functions
|
|
163
30
|
|
|
164
|
-
###
|
|
31
|
+
### `request(model, messages, options?)`
|
|
165
32
|
|
|
166
|
-
|
|
33
|
+
Make streaming LLM requests with automatic tool execution.
|
|
167
34
|
|
|
168
35
|
```typescript
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
const reasoningModel = getModelFromClass('reasoning'); // For complex reasoning tasks
|
|
174
|
-
|
|
175
|
-
// Check if a model exists
|
|
176
|
-
const modelInfo = findModel('gpt-4o');
|
|
177
|
-
if (modelInfo) {
|
|
178
|
-
console.log(`Provider: ${modelInfo.provider}`);
|
|
179
|
-
console.log(`Input cost: $${modelInfo.inputCost}/million tokens`);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// List all available models
|
|
183
|
-
for (const [modelName, info] of Object.entries(MODEL_REGISTRY)) {
|
|
184
|
-
console.log(`${modelName}: ${info.provider}`);
|
|
185
|
-
}
|
|
186
|
-
```
|
|
187
|
-
|
|
188
|
-
#### Model Classes
|
|
189
|
-
|
|
190
|
-
- **standard**: General-purpose models for everyday tasks
|
|
191
|
-
- **code**: Optimized for programming and technical tasks
|
|
192
|
-
- **reasoning**: Advanced models for complex logical reasoning
|
|
193
|
-
- **monologue**: Models supporting extended thinking/reasoning traces
|
|
194
|
-
|
|
195
|
-
### Message Types
|
|
196
|
-
|
|
197
|
-
```typescript
|
|
198
|
-
// User/Assistant messages
|
|
199
|
-
interface TextMessage {
|
|
200
|
-
type: 'message';
|
|
201
|
-
role: 'user' | 'assistant' | 'developer';
|
|
202
|
-
content: string | MessageContent[];
|
|
203
|
-
status?: 'completed' | 'in_progress';
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
// Multi-modal content
|
|
207
|
-
type MessageContent =
|
|
208
|
-
| { type: 'input_text', text: string }
|
|
209
|
-
| { type: 'input_image', image_url: string, detail?: 'auto' | 'low' | 'high' }
|
|
210
|
-
| { type: 'tool_use', id: string, name: string, arguments: any };
|
|
211
|
-
|
|
212
|
-
// Tool-related messages
|
|
213
|
-
interface FunctionCall {
|
|
214
|
-
type: 'function_call';
|
|
215
|
-
id: string;
|
|
216
|
-
name: string;
|
|
217
|
-
arguments: string;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
interface FunctionCallOutput {
|
|
221
|
-
type: 'function_call_output';
|
|
222
|
-
id: string;
|
|
223
|
-
output: string;
|
|
224
|
-
}
|
|
225
|
-
```
|
|
226
|
-
|
|
227
|
-
## Common Use Cases
|
|
228
|
-
|
|
229
|
-
### 1. Basic Conversations
|
|
230
|
-
|
|
231
|
-
```typescript
|
|
232
|
-
import { request } from '@just-every/ensemble';
|
|
36
|
+
// Basic usage
|
|
37
|
+
const stream = request('claude-3.5-sonnet', [
|
|
38
|
+
{ type: 'message', role: 'user', content: 'Explain quantum computing' }
|
|
39
|
+
]);
|
|
233
40
|
|
|
234
|
-
|
|
235
|
-
for await (const event of request('gpt-4o-mini', [
|
|
236
|
-
{ type: 'message', role: 'user', content: 'Explain quantum computing in simple terms' }
|
|
237
|
-
])) {
|
|
41
|
+
for await (const event of stream) {
|
|
238
42
|
if (event.type === 'text_delta') {
|
|
239
43
|
process.stdout.write(event.delta);
|
|
44
|
+
} else if (event.type === 'cost_update') {
|
|
45
|
+
console.log(`Cost: $${event.usage.total_cost}`);
|
|
240
46
|
}
|
|
241
47
|
}
|
|
242
48
|
|
|
243
|
-
//
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
{ type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
|
|
247
|
-
{ type: 'message', role: 'assistant', content: 'Here are several ways...' },
|
|
248
|
-
{ type: 'message', role: 'user', content: 'What about using flexbox?' }
|
|
249
|
-
];
|
|
250
|
-
|
|
251
|
-
for await (const event of request('claude-3.5-sonnet', messages)) {
|
|
252
|
-
// Handle streaming response
|
|
253
|
-
}
|
|
254
|
-
```
|
|
255
|
-
|
|
256
|
-
### 2. Tool Calling & Function Execution
|
|
257
|
-
|
|
258
|
-
```typescript
|
|
259
|
-
// Define tools with TypeScript types
|
|
260
|
-
interface WeatherParams {
|
|
261
|
-
city: string;
|
|
262
|
-
unit?: 'celsius' | 'fahrenheit';
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
const weatherTool: ToolFunction = {
|
|
266
|
-
function: async ({ city, unit = 'celsius' }: WeatherParams) => {
|
|
267
|
-
// Real implementation would call weather API
|
|
268
|
-
const temp = unit === 'celsius' ? 22 : 72;
|
|
269
|
-
return `${temp}°${unit[0].toUpperCase()} in ${city}`;
|
|
270
|
-
},
|
|
49
|
+
// With tools
|
|
50
|
+
const tools = [{
|
|
51
|
+
function: async ({ city }) => `Weather in ${city}: Sunny, 72°F`,
|
|
271
52
|
definition: {
|
|
272
53
|
type: 'function',
|
|
273
54
|
function: {
|
|
274
55
|
name: 'get_weather',
|
|
275
|
-
description: 'Get
|
|
56
|
+
description: 'Get weather for a city',
|
|
276
57
|
parameters: {
|
|
277
58
|
type: 'object',
|
|
278
59
|
properties: {
|
|
279
|
-
city: { type: 'string'
|
|
280
|
-
unit: {
|
|
281
|
-
type: 'string',
|
|
282
|
-
enum: ['celsius', 'fahrenheit'],
|
|
283
|
-
description: 'Temperature unit'
|
|
284
|
-
}
|
|
60
|
+
city: { type: 'string' }
|
|
285
61
|
},
|
|
286
62
|
required: ['city']
|
|
287
63
|
}
|
|
288
64
|
}
|
|
289
65
|
}
|
|
290
|
-
};
|
|
291
|
-
|
|
292
|
-
// Use with automatic execution
|
|
293
|
-
for await (const event of request('gpt-4o', [
|
|
294
|
-
{ type: 'message', role: 'user', content: 'What\'s the weather in Tokyo and New York?' }
|
|
295
|
-
], { tools: [weatherTool] })) {
|
|
296
|
-
if (event.type === 'tool_start') {
|
|
297
|
-
console.log('Calling tool:', event.tool_calls[0].function.name);
|
|
298
|
-
} else if (event.type === 'text_delta') {
|
|
299
|
-
process.stdout.write(event.delta);
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
### 3. Model Selection Strategies
|
|
66
|
+
}];
|
|
305
67
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
// Route based on task type
|
|
310
|
-
async function intelligentRequest(task: string, messages: ResponseInput) {
|
|
311
|
-
let model: string;
|
|
312
|
-
|
|
313
|
-
if (task.includes('code') || task.includes('debug')) {
|
|
314
|
-
model = getModelFromClass('code'); // Best code model
|
|
315
|
-
} else if (task.includes('analyze') || task.includes('reasoning')) {
|
|
316
|
-
model = getModelFromClass('reasoning'); // Best reasoning model
|
|
317
|
-
} else {
|
|
318
|
-
model = getModelFromClass('standard'); // Cost-effective general model
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
console.log(`Using ${model} for ${task}`);
|
|
322
|
-
|
|
323
|
-
return request(model, messages, {
|
|
324
|
-
fallbackModels: ['gpt-4o-mini', 'claude-3-5-haiku'] // Fallback options
|
|
325
|
-
});
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
// Use model rotation for consensus
|
|
329
|
-
async function consensusRequest(messages: ResponseInput) {
|
|
330
|
-
const models = ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash'];
|
|
331
|
-
const responses = [];
|
|
332
|
-
|
|
333
|
-
for (const model of models) {
|
|
334
|
-
const stream = request(model, messages);
|
|
335
|
-
const result = await convertStreamToMessages(stream);
|
|
336
|
-
responses.push(result.fullResponse);
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
// Analyze responses for consensus
|
|
340
|
-
return analyzeConsensus(responses);
|
|
341
|
-
}
|
|
68
|
+
const stream = request('gpt-4o', [
|
|
69
|
+
{ type: 'message', role: 'user', content: 'What\'s the weather in Paris?' }
|
|
70
|
+
], { tools });
|
|
342
71
|
```
|
|
343
72
|
|
|
344
|
-
###
|
|
345
|
-
|
|
346
|
-
```typescript
|
|
347
|
-
// JSON mode for reliable parsing
|
|
348
|
-
const jsonStream = request('gpt-4o', [
|
|
349
|
-
{ type: 'message', role: 'user', content: 'List 3 programming languages with their pros/cons as JSON' }
|
|
350
|
-
], {
|
|
351
|
-
responseFormat: { type: 'json_object' }
|
|
352
|
-
});
|
|
73
|
+
### `embed(text, options?)`
|
|
353
74
|
|
|
354
|
-
|
|
355
|
-
for await (const event of jsonStream) {
|
|
356
|
-
if (event.type === 'text_delta') {
|
|
357
|
-
jsonContent += event.delta;
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
const data = JSON.parse(jsonContent);
|
|
362
|
-
|
|
363
|
-
// Structured output with schema validation
|
|
364
|
-
const schema = {
|
|
365
|
-
type: 'object',
|
|
366
|
-
properties: {
|
|
367
|
-
name: { type: 'string' },
|
|
368
|
-
age: { type: 'number' },
|
|
369
|
-
skills: {
|
|
370
|
-
type: 'array',
|
|
371
|
-
items: { type: 'string' }
|
|
372
|
-
}
|
|
373
|
-
},
|
|
374
|
-
required: ['name', 'age', 'skills']
|
|
375
|
-
};
|
|
376
|
-
|
|
377
|
-
const structuredStream = request('gpt-4o', [
|
|
378
|
-
{ type: 'message', role: 'user', content: 'Generate a developer profile' }
|
|
379
|
-
], {
|
|
380
|
-
responseFormat: {
|
|
381
|
-
type: 'json_schema',
|
|
382
|
-
json_schema: {
|
|
383
|
-
name: 'developer_profile',
|
|
384
|
-
schema: schema,
|
|
385
|
-
strict: true
|
|
386
|
-
}
|
|
387
|
-
}
|
|
388
|
-
});
|
|
389
|
-
```
|
|
390
|
-
|
|
391
|
-
### 5. Image Processing
|
|
75
|
+
Generate embeddings for semantic search and RAG applications.
|
|
392
76
|
|
|
393
77
|
```typescript
|
|
394
|
-
//
|
|
395
|
-
const
|
|
396
|
-
|
|
397
|
-
type: 'message',
|
|
398
|
-
role: 'user',
|
|
399
|
-
content: [
|
|
400
|
-
{ type: 'input_text', text: 'What\'s in this image? Describe any text you see.' },
|
|
401
|
-
{
|
|
402
|
-
type: 'input_image',
|
|
403
|
-
image_url: 'data:image/jpeg;base64,...',
|
|
404
|
-
detail: 'high' // 'auto' | 'low' | 'high'
|
|
405
|
-
}
|
|
406
|
-
]
|
|
407
|
-
}
|
|
408
|
-
], {
|
|
409
|
-
maxImageDimension: 2048 // Auto-resize large images
|
|
410
|
-
});
|
|
78
|
+
// Simple embedding
|
|
79
|
+
const embedding = await embed('Hello, world!');
|
|
80
|
+
console.log(`Dimension: ${embedding.length}`); // e.g., 1536
|
|
411
81
|
|
|
412
|
-
//
|
|
413
|
-
const
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
role: 'user',
|
|
417
|
-
content: [
|
|
418
|
-
{ type: 'input_text', text: 'Compare these two designs:' },
|
|
419
|
-
{ type: 'input_image', image_url: 'https://example.com/design1.png' },
|
|
420
|
-
{ type: 'input_image', image_url: 'https://example.com/design2.png' }
|
|
421
|
-
]
|
|
422
|
-
}
|
|
423
|
-
]);
|
|
424
|
-
```
|
|
425
|
-
|
|
426
|
-
### 6. Error Handling & Resilience
|
|
427
|
-
|
|
428
|
-
```typescript
|
|
429
|
-
import { isRateLimitError, isAuthenticationError } from '@just-every/ensemble';
|
|
430
|
-
|
|
431
|
-
async function robustRequest(model: string, messages: ResponseInput, options?: RequestOptions) {
|
|
432
|
-
const maxRetries = 3;
|
|
433
|
-
let lastError;
|
|
434
|
-
|
|
435
|
-
for (let i = 0; i < maxRetries; i++) {
|
|
436
|
-
try {
|
|
437
|
-
const events = [];
|
|
438
|
-
for await (const event of request(model, messages, options)) {
|
|
439
|
-
if (event.type === 'error') {
|
|
440
|
-
throw event.error;
|
|
441
|
-
}
|
|
442
|
-
events.push(event);
|
|
443
|
-
}
|
|
444
|
-
return events;
|
|
445
|
-
|
|
446
|
-
} catch (error) {
|
|
447
|
-
lastError = error;
|
|
448
|
-
|
|
449
|
-
if (isAuthenticationError(error)) {
|
|
450
|
-
throw error; // Don't retry auth errors
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
if (isRateLimitError(error)) {
|
|
454
|
-
const waitTime = error.retryAfter || Math.pow(2, i) * 1000;
|
|
455
|
-
console.log(`Rate limited. Waiting ${waitTime}ms...`);
|
|
456
|
-
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
457
|
-
continue;
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
// Try fallback model
|
|
461
|
-
if (options?.fallbackModels?.[i]) {
|
|
462
|
-
model = options.fallbackModels[i];
|
|
463
|
-
console.log(`Falling back to ${model}`);
|
|
464
|
-
continue;
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
throw lastError;
|
|
470
|
-
}
|
|
471
|
-
```
|
|
472
|
-
|
|
473
|
-
## Utilities
|
|
474
|
-
|
|
475
|
-
### Cost & Usage Tracking
|
|
476
|
-
|
|
477
|
-
```typescript
|
|
478
|
-
import { costTracker, quotaTracker } from '@just-every/ensemble';
|
|
479
|
-
|
|
480
|
-
// Track costs across requests
|
|
481
|
-
for await (const event of request('gpt-4o', messages)) {
|
|
482
|
-
if (event.type === 'cost_update') {
|
|
483
|
-
console.log(`Tokens: ${event.usage.input_tokens} in, ${event.usage.output_tokens} out`);
|
|
484
|
-
console.log(`Cost: $${event.usage.total_cost.toFixed(4)}`);
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
// Get cumulative costs
|
|
489
|
-
const usage = costTracker.getAllUsage();
|
|
490
|
-
for (const [model, stats] of Object.entries(usage)) {
|
|
491
|
-
console.log(`${model}: $${stats.total_cost.toFixed(2)} for ${stats.request_count} requests`);
|
|
492
|
-
}
|
|
82
|
+
// With specific model
|
|
83
|
+
const embedding = await embed('Search query', {
|
|
84
|
+
model: 'text-embedding-3-large'
|
|
85
|
+
});
|
|
493
86
|
|
|
494
|
-
//
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
const
|
|
499
|
-
|
|
87
|
+
// Calculate similarity
|
|
88
|
+
function cosineSimilarity(a: number[], b: number[]): number {
|
|
89
|
+
const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
|
|
90
|
+
const normA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
|
|
91
|
+
const normB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
|
|
92
|
+
return dotProduct / (normA * normB);
|
|
500
93
|
}
|
|
501
|
-
```
|
|
502
|
-
|
|
503
|
-
### Stream Conversion & Chaining
|
|
504
94
|
|
|
505
|
-
|
|
506
|
-
import { convertStreamToMessages, chainRequests } from '@just-every/ensemble';
|
|
507
|
-
|
|
508
|
-
let currentMessages = [
|
|
509
|
-
{ type: 'message', role: 'user', content: 'Write a haiku about coding' },
|
|
510
|
-
{ type: 'message', role: 'user', content: 'Make it really long' }
|
|
511
|
-
];
|
|
512
|
-
|
|
513
|
-
let messages = [
|
|
514
|
-
{ type: 'message', role: 'developer', content: 'You are a helpful coding assistant' },
|
|
515
|
-
{ type: 'message', role: 'user', content: 'How do I center a div in CSS?' },
|
|
516
|
-
];
|
|
517
|
-
messages = [...messages, ...(await convertStreamToMessages(request('claude-4-sonnet', messages))).messages];
|
|
518
|
-
messages = [...messages, ...(await convertStreamToMessages(request(getModelFromClass('reasoning_mini'), messages))).messages];
|
|
519
|
-
messages = [...messages, ...(await convertStreamToMessages(request('gemini-2.5-flash', messages))).messages];
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
console.log(result.messages); // Full conversation history
|
|
523
|
-
console.log(result.fullResponse); // Just the assistant's response
|
|
524
|
-
|
|
525
|
-
// Chain multiple models for multi-step tasks
|
|
526
|
-
const analysis = await chainRequests(
|
|
527
|
-
[
|
|
528
|
-
{ type: 'message', role: 'user', content: codeToAnalyze }
|
|
529
|
-
],
|
|
530
|
-
[
|
|
531
|
-
{
|
|
532
|
-
model: getModelFromClass('code'),
|
|
533
|
-
systemPrompt: 'Analyze this code for bugs and security issues',
|
|
534
|
-
},
|
|
535
|
-
{
|
|
536
|
-
model: getModelFromClass('reasoning'),
|
|
537
|
-
systemPrompt: 'Prioritize the issues found and suggest fixes',
|
|
538
|
-
},
|
|
539
|
-
{
|
|
540
|
-
model: 'gpt-4.1-mini',
|
|
541
|
-
systemPrompt: 'Summarize the analysis in 3 bullet points',
|
|
542
|
-
}
|
|
543
|
-
]);
|
|
95
|
+
const similarity = cosineSimilarity(embedding1, embedding2);
|
|
544
96
|
```
|
|
545
97
|
|
|
546
|
-
###
|
|
547
|
-
|
|
548
|
-
```typescript
|
|
549
|
-
import { resizeImageForModel, imageToText } from '@just-every/ensemble';
|
|
550
|
-
|
|
551
|
-
// Auto-resize for specific model requirements
|
|
552
|
-
const resized = await resizeImageForModel(
|
|
553
|
-
base64ImageData,
|
|
554
|
-
'gpt-4o', // Different models have different size limits
|
|
555
|
-
{ maxDimension: 2048 }
|
|
556
|
-
);
|
|
557
|
-
|
|
558
|
-
// Extract text from images
|
|
559
|
-
const extractedText = await imageToText(imageBuffer);
|
|
560
|
-
console.log('Found text:', extractedText);
|
|
561
|
-
```
|
|
98
|
+
### `chainRequests(messages, requests)`
|
|
562
99
|
|
|
563
|
-
|
|
100
|
+
Chain multiple LLM calls, using the output of one as input to the next.
|
|
564
101
|
|
|
565
102
|
```typescript
|
|
566
|
-
import {
|
|
567
|
-
|
|
568
|
-
// Production-ready logger example
|
|
569
|
-
class ProductionLogger implements EnsembleLogger {
|
|
570
|
-
log_llm_request(agentId: string, providerName: string, model: string, requestData: unknown, timestamp?: Date): string {
|
|
571
|
-
const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
572
|
-
|
|
573
|
-
// Log to your monitoring system
|
|
574
|
-
logger.info('LLM Request', {
|
|
575
|
-
requestId,
|
|
576
|
-
agentId,
|
|
577
|
-
provider: providerName,
|
|
578
|
-
model,
|
|
579
|
-
timestamp,
|
|
580
|
-
// Be careful not to log sensitive data
|
|
581
|
-
messageCount: (requestData as any).messages?.length,
|
|
582
|
-
hasTools: !!(requestData as any).tools?.length
|
|
583
|
-
});
|
|
584
|
-
|
|
585
|
-
return requestId;
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
log_llm_response(requestId: string | undefined, responseData: unknown, timestamp?: Date): void {
|
|
589
|
-
const response = responseData as any;
|
|
590
|
-
|
|
591
|
-
logger.info('LLM Response', {
|
|
592
|
-
requestId,
|
|
593
|
-
timestamp,
|
|
594
|
-
inputTokens: response.usage?.input_tokens,
|
|
595
|
-
outputTokens: response.usage?.output_tokens,
|
|
596
|
-
totalCost: response.usage?.total_cost,
|
|
597
|
-
cached: response.usage?.cache_creation_input_tokens > 0
|
|
598
|
-
});
|
|
599
|
-
}
|
|
103
|
+
import { chainRequests } from '@just-every/ensemble';
|
|
600
104
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
retryAfter: (errorData as any).retryAfter
|
|
608
|
-
});
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
// Enable logging globally
|
|
613
|
-
setEnsembleLogger(new ProductionLogger());
|
|
614
|
-
|
|
615
|
-
// Debug mode for development
|
|
616
|
-
if (process.env.NODE_ENV === 'development') {
|
|
617
|
-
setEnsembleLogger({
|
|
618
|
-
log_llm_request: (agent, provider, model, data) => {
|
|
619
|
-
console.log(`[${new Date().toISOString()}] → ${provider}/${model}`);
|
|
620
|
-
return Date.now().toString();
|
|
105
|
+
const result = await chainRequests(
|
|
106
|
+
[{ type: 'message', role: 'user', content: 'Analyze this code for bugs: ...' }],
|
|
107
|
+
[
|
|
108
|
+
{
|
|
109
|
+
model: 'gpt-4o',
|
|
110
|
+
systemPrompt: 'You are a code reviewer. Find bugs and security issues.'
|
|
621
111
|
},
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
112
|
+
{
|
|
113
|
+
model: 'claude-3.5-sonnet',
|
|
114
|
+
systemPrompt: 'Prioritize the issues found and suggest fixes.'
|
|
625
115
|
},
|
|
626
|
-
|
|
627
|
-
|
|
116
|
+
{
|
|
117
|
+
model: 'gpt-4o-mini',
|
|
118
|
+
systemPrompt: 'Summarize the analysis in 3 bullet points.'
|
|
628
119
|
}
|
|
629
|
-
|
|
630
|
-
|
|
120
|
+
]
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
console.log(result.fullResponse);
|
|
631
124
|
```
|
|
632
125
|
|
|
633
|
-
##
|
|
126
|
+
## Supported Providers
|
|
127
|
+
|
|
128
|
+
- **OpenAI**: GPT-4o, GPT-4o-mini, o1-preview, o1-mini
|
|
129
|
+
- **Anthropic**: Claude 3.5 Sonnet, Claude 3.5 Haiku
|
|
130
|
+
- **Google**: Gemini 2.0 Flash, Gemini 1.5 Pro
|
|
131
|
+
- **DeepSeek**: DeepSeek Chat, DeepSeek Coder
|
|
132
|
+
- **xAI**: Grok 2, Grok Beta
|
|
133
|
+
- **OpenRouter**: Access to 100+ models
|
|
634
134
|
|
|
635
|
-
|
|
135
|
+
## OpenAI SDK Compatibility
|
|
636
136
|
|
|
637
|
-
|
|
137
|
+
Drop-in replacement for the OpenAI SDK:
|
|
638
138
|
|
|
639
139
|
```typescript
|
|
140
|
+
// Instead of: import OpenAI from 'openai';
|
|
640
141
|
import OpenAIEnsemble from '@just-every/ensemble/openai-compat';
|
|
641
|
-
// Or named imports: import { chat, completions } from '@just-every/ensemble';
|
|
642
|
-
|
|
643
|
-
// Replace OpenAI client
|
|
644
|
-
const openai = OpenAIEnsemble; // Instead of: new OpenAI({ apiKey: '...' })
|
|
645
|
-
|
|
646
|
-
// Use exactly like OpenAI SDK - but with any model!
|
|
647
|
-
const completion = await openai.chat.completions.create({
|
|
648
|
-
model: 'claude-3.5-sonnet', // or 'gpt-4o', 'gemini-2.0-flash', etc.
|
|
649
|
-
messages: [
|
|
650
|
-
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
651
|
-
{ role: 'user', content: 'Hello!' }
|
|
652
|
-
],
|
|
653
|
-
temperature: 0.7
|
|
654
|
-
});
|
|
655
|
-
|
|
656
|
-
console.log(completion.choices[0].message.content);
|
|
657
142
|
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
messages: [{ role: 'user', content: 'Tell me a story' }],
|
|
143
|
+
const completion = await OpenAIEnsemble.chat.completions.create({
|
|
144
|
+
model: 'claude-3.5-sonnet', // Use any supported model!
|
|
145
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
662
146
|
stream: true
|
|
663
147
|
});
|
|
664
|
-
|
|
665
|
-
for await (const chunk of stream) {
|
|
666
|
-
process.stdout.write(chunk.choices[0].delta.content || '');
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
// Legacy completions API also supported
|
|
670
|
-
const legacyCompletion = await openai.completions.create({
|
|
671
|
-
model: 'deepseek-chat',
|
|
672
|
-
prompt: 'Once upon a time',
|
|
673
|
-
max_tokens: 100
|
|
674
|
-
});
|
|
675
|
-
```
|
|
676
|
-
|
|
677
|
-
This compatibility layer supports:
|
|
678
|
-
- All chat.completions.create parameters (temperature, tools, response_format, etc.)
|
|
679
|
-
- Streaming and non-streaming responses
|
|
680
|
-
- Tool/function calling
|
|
681
|
-
- Legacy completions.create API
|
|
682
|
-
- Proper TypeScript types matching OpenAI's SDK
|
|
683
|
-
|
|
684
|
-
### Custom Model Providers
|
|
685
|
-
|
|
686
|
-
```typescript
|
|
687
|
-
import { ModelProvider, registerExternalModel } from '@just-every/ensemble';
|
|
688
|
-
|
|
689
|
-
// Register a custom model
|
|
690
|
-
registerExternalModel({
|
|
691
|
-
id: 'my-custom-model',
|
|
692
|
-
provider: 'custom',
|
|
693
|
-
inputCost: 0.001,
|
|
694
|
-
outputCost: 0.002,
|
|
695
|
-
contextWindow: 8192,
|
|
696
|
-
maxOutput: 4096,
|
|
697
|
-
supportsTools: true,
|
|
698
|
-
supportsVision: false,
|
|
699
|
-
supportsStreaming: true
|
|
700
|
-
});
|
|
701
|
-
|
|
702
|
-
// Use your custom model
|
|
703
|
-
const stream = request('my-custom-model', messages);
|
|
704
|
-
```
|
|
705
|
-
|
|
706
|
-
### Performance Optimization
|
|
707
|
-
|
|
708
|
-
```typescript
|
|
709
|
-
// Batch processing with concurrency control
|
|
710
|
-
async function batchProcess(items: string[], concurrency = 3) {
|
|
711
|
-
const results = [];
|
|
712
|
-
const queue = [...items];
|
|
713
|
-
|
|
714
|
-
async function worker() {
|
|
715
|
-
while (queue.length > 0) {
|
|
716
|
-
const item = queue.shift()!;
|
|
717
|
-
const stream = request('gpt-4o-mini', [
|
|
718
|
-
{ type: 'message', role: 'user', content: `Process: ${item}` }
|
|
719
|
-
]);
|
|
720
|
-
|
|
721
|
-
const result = await convertStreamToMessages(stream);
|
|
722
|
-
results.push({ item, result: result.fullResponse });
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
|
|
726
|
-
// Run workers concurrently
|
|
727
|
-
await Promise.all(Array(concurrency).fill(null).map(() => worker()));
|
|
728
|
-
return results;
|
|
729
|
-
}
|
|
730
|
-
|
|
731
|
-
// Stream multiple requests in parallel
|
|
732
|
-
async function parallelStreaming(prompts: string[]) {
|
|
733
|
-
const streams = prompts.map(prompt =>
|
|
734
|
-
request('claude-3.5-haiku', [
|
|
735
|
-
{ type: 'message', role: 'user', content: prompt }
|
|
736
|
-
])
|
|
737
|
-
);
|
|
738
|
-
|
|
739
|
-
// Process all streams concurrently
|
|
740
|
-
const results = await Promise.all(
|
|
741
|
-
streams.map(stream => convertStreamToMessages(stream))
|
|
742
|
-
);
|
|
743
|
-
|
|
744
|
-
return results.map(r => r.fullResponse);
|
|
745
|
-
}
|
|
746
148
|
```
|
|
747
149
|
|
|
748
150
|
## Environment Variables
|
|
749
151
|
|
|
750
|
-
Set up API keys for the providers you want to use:
|
|
751
|
-
|
|
752
152
|
```bash
|
|
753
153
|
ANTHROPIC_API_KEY=your_key_here
|
|
754
154
|
OPENAI_API_KEY=your_key_here
|
|
@@ -758,6 +158,23 @@ XAI_API_KEY=your_key_here
|
|
|
758
158
|
OPENROUTER_API_KEY=your_key_here
|
|
759
159
|
```
|
|
760
160
|
|
|
161
|
+
## Documentation
|
|
162
|
+
|
|
163
|
+
- [Model Selection & Management](./docs/models.md)
|
|
164
|
+
- [Advanced Usage](./docs/advanced-usage.md)
|
|
165
|
+
- [Error Handling](./docs/error-handling.md)
|
|
166
|
+
- [OpenAI Compatibility](./docs/openai-compatibility.md)
|
|
167
|
+
- [Utility Functions](./docs/utilities.md)
|
|
168
|
+
|
|
169
|
+
## Examples
|
|
170
|
+
|
|
171
|
+
See the [examples](./examples) directory for:
|
|
172
|
+
- [Basic usage](./examples/basic-request.ts)
|
|
173
|
+
- [Tool calling](./examples/tool-calling.ts)
|
|
174
|
+
- [Embeddings & semantic search](./examples/embeddings.ts)
|
|
175
|
+
- [Model rotation](./examples/model-rotation.ts)
|
|
176
|
+
- [Stream conversion](./examples/stream-conversion.ts)
|
|
177
|
+
|
|
761
178
|
## License
|
|
762
179
|
|
|
763
|
-
MIT
|
|
180
|
+
MIT
|