llmjs2 1.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONFIG_README.md +98 -0
- package/README.md +382 -357
- package/cli.js +195 -0
- package/config.yaml +149 -0
- package/docs/BASIC_USAGE.md +296 -0
- package/docs/CLI.md +455 -0
- package/docs/GET_STARTED.md +129 -0
- package/docs/GUARDRAILS_GUIDE.md +734 -0
- package/docs/README.md +47 -0
- package/docs/ROUTER_GUIDE.md +397 -0
- package/docs/SERVER_MODE.md +350 -0
- package/index.js +199 -246
- package/package.json +43 -34
- package/providers/ollama.js +120 -88
- package/providers/openai.js +104 -0
- package/providers/openrouter.js +113 -79
- package/router.js +248 -0
- package/server.js +186 -0
- package/test.js +246 -0
- package/validate-config.js +87 -0
- package/LICENSE +0 -21
package/docs/README.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# llmjs2 Documentation
|
|
2
|
+
|
|
3
|
+
Welcome to the llmjs2 documentation! This folder contains all the documentation for using and understanding the llmjs2 library.
|
|
4
|
+
|
|
5
|
+
## Documentation Overview
|
|
6
|
+
|
|
7
|
+
### 📖 Getting Started
|
|
8
|
+
|
|
9
|
+
- **[GET_STARTED.md](GET_STARTED.md)** - Quick setup guide for new users (5 minutes to first completion)
|
|
10
|
+
|
|
11
|
+
### 🔧 Usage Guides
|
|
12
|
+
|
|
13
|
+
- **[BASIC_USAGE.md](BASIC_USAGE.md)** - Core API patterns, configuration, and common use cases
|
|
14
|
+
- **[ROUTER_GUIDE.md](ROUTER_GUIDE.md)** - Model routing and load balancing
|
|
15
|
+
- **[GUARDRAILS_GUIDE.md](GUARDRAILS_GUIDE.md)** - Content filtering and request processing
|
|
16
|
+
- **[SERVER_MODE.md](SERVER_MODE.md)** - Run llmjs2 as an OpenAI-compatible API server with routing
|
|
17
|
+
- **[CLI.md](CLI.md)** - Command-line interface for server management
|
|
18
|
+
|
|
19
|
+
## Quick Navigation
|
|
20
|
+
|
|
21
|
+
### New to llmjs2?
|
|
22
|
+
|
|
23
|
+
Start with **[GET_STARTED.md](GET_STARTED.md)** to get up and running quickly.
|
|
24
|
+
|
|
25
|
+
### Want to use the API directly?
|
|
26
|
+
|
|
27
|
+
Check out **[BASIC_USAGE.md](BASIC_USAGE.md)** for different API patterns and examples.
|
|
28
|
+
|
|
29
|
+
### Need routing and load balancing?
|
|
30
|
+
|
|
31
|
+
See **[ROUTER_GUIDE.md](ROUTER_GUIDE.md)** for intelligent model routing.
|
|
32
|
+
|
|
33
|
+
### Need content filtering or custom processing?
|
|
34
|
+
|
|
35
|
+
See **[GUARDRAILS_GUIDE.md](GUARDRAILS_GUIDE.md)** for guardrails and request processing.
|
|
36
|
+
|
|
37
|
+
### Need to set up a server?
|
|
38
|
+
|
|
39
|
+
See **[SERVER_MODE.md](SERVER_MODE.md)** for OpenAI-compatible server setup.
|
|
40
|
+
|
|
41
|
+
### Prefer command-line tools?
|
|
42
|
+
|
|
43
|
+
**[CLI.md](CLI.md)** covers the command-line interface and configuration files.
|
|
44
|
+
|
|
45
|
+
## Contributing
|
|
46
|
+
|
|
47
|
+
Documentation improvements are welcome! Please ensure any changes maintain consistency across all documentation files.
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
# llmjs2 Router Usage Guide
|
|
2
|
+
|
|
3
|
+
The llmjs2 router provides intelligent model routing and load balancing capabilities, allowing you to distribute requests across multiple model deployments with different strategies.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The router system enables:
|
|
8
|
+
|
|
9
|
+
- **Load balancing** across models with the same name
|
|
10
|
+
- **Multiple routing strategies** (default, random, sequential)
|
|
11
|
+
- **Provider-agnostic routing** with unified API
|
|
12
|
+
- **Flexible model configuration** for different providers
|
|
13
|
+
-
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
### Basic Setup
|
|
18
|
+
|
|
19
|
+
```javascript
|
|
20
|
+
import { router } from 'llmjs2';
|
|
21
|
+
|
|
22
|
+
// Define your model deployments
|
|
23
|
+
const modelList = [
|
|
24
|
+
{
|
|
25
|
+
"model_name": "gpt-3.5-turbo",
|
|
26
|
+
"llm_params": {
|
|
27
|
+
"model": "ollama/chatgpt-v-2",
|
|
28
|
+
"api_key": process.env.OLLAMA_API_KEY,
|
|
29
|
+
"api_base": process.env.OLLAMA_API_BASE
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"model_name": "openai-turbo",
|
|
34
|
+
"llm_params": {
|
|
35
|
+
"model": "gpt-3.5-turbo",
|
|
36
|
+
"api_key": process.env.OPENAI_API_KEY
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"model_name": "gpt-4",
|
|
41
|
+
"llm_params": {
|
|
42
|
+
"model": "ollama/gpt-4",
|
|
43
|
+
"api_key": process.env.OLLAMA_API_KEY,
|
|
44
|
+
"api_base": process.env.OLLAMA_API_BASE
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
// Create routers with different strategies
|
|
50
|
+
const defaultRouter = router(modelList);
|
|
51
|
+
const randomRouter = router(modelList, 'random');
|
|
52
|
+
const sequentialRouter = router(modelList, 'sequential');
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Basic Usage
|
|
56
|
+
|
|
57
|
+
```javascript
|
|
58
|
+
// Route to specific model
|
|
59
|
+
const response = await defaultRouter.completion({
|
|
60
|
+
model: "gpt-3.5-turbo",
|
|
61
|
+
messages: [{"role": "user", "content": "Hey, how's it going?"}]
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// Auto-route with random strategy
|
|
65
|
+
const randomResponse = await randomRouter.completion({
|
|
66
|
+
messages: [{"role": "user", "content": "Hey, how's it going?"}]
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
// Auto-route with sequential strategy
|
|
70
|
+
const seqResponse = await sequentialRouter.completion({
|
|
71
|
+
messages: [{"role": "user", "content": "Hey, how's it going?"}]
|
|
72
|
+
});
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Model Configuration
|
|
76
|
+
|
|
77
|
+
### Model List Format
|
|
78
|
+
|
|
79
|
+
Each model in the list is defined with:
|
|
80
|
+
|
|
81
|
+
```javascript
|
|
82
|
+
{
|
|
83
|
+
"model_name": "string", // Alias for routing (can have multiple providers)
|
|
84
|
+
"llm_params": { // Provider-specific parameters
|
|
85
|
+
"model": "string", // Actual model identifier for the provider
|
|
86
|
+
"api_key": "string", // API key (can use environment variables)
|
|
87
|
+
"api_base": "string?", // Custom API base URL (optional)
|
|
88
|
+
// ... other provider-specific params
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Supported Providers
|
|
94
|
+
|
|
95
|
+
#### Ollama
|
|
96
|
+
|
|
97
|
+
```javascript
|
|
98
|
+
{
|
|
99
|
+
"model_name": "my-ollama-model",
|
|
100
|
+
"llm_params": {
|
|
101
|
+
"model": "ollama/llama2",
|
|
102
|
+
"api_key": process.env.OLLAMA_API_KEY,
|
|
103
|
+
"api_base": process.env.OLLAMA_API_BASE
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
#### OpenRouter
|
|
109
|
+
|
|
110
|
+
```javascript
|
|
111
|
+
{
|
|
112
|
+
"model_name": "my-openrouter-model",
|
|
113
|
+
"llm_params": {
|
|
114
|
+
"model": "openrouter/free-model",
|
|
115
|
+
"api_key": process.env.OPEN_ROUTER_API_KEY
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
#### OpenAI
|
|
121
|
+
|
|
122
|
+
```javascript
|
|
123
|
+
{
|
|
124
|
+
"model_name": "my-openai-model",
|
|
125
|
+
"llm_params": {
|
|
126
|
+
"model": "openai/gpt-4",
|
|
127
|
+
"api_key": process.env.OPENAI_API_KEY
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Routing Strategies
|
|
133
|
+
|
|
134
|
+
### Default Strategy
|
|
135
|
+
|
|
136
|
+
When no strategy is specified, uses load balancing across models with the same `model_name`.
|
|
137
|
+
|
|
138
|
+
```javascript
|
|
139
|
+
const route = router(modelList); // or router(modelList, 'default')
|
|
140
|
+
|
|
141
|
+
// Routes to one of the models with model_name="gpt-3.5-turbo"
|
|
142
|
+
const response = await route.completion({
|
|
143
|
+
model: "gpt-3.5-turbo",
|
|
144
|
+
messages: [...]
|
|
145
|
+
});
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Random Strategy
|
|
149
|
+
|
|
150
|
+
Randomly selects from available models when no specific model is requested.
|
|
151
|
+
|
|
152
|
+
```javascript
|
|
153
|
+
const route = router(modelList, 'random');
|
|
154
|
+
|
|
155
|
+
// Randomly selects from ALL models in the list
|
|
156
|
+
const response = await route.completion({
|
|
157
|
+
messages: [...]
|
|
158
|
+
});
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Sequential Strategy
|
|
162
|
+
|
|
163
|
+
Cycles through models in order for each request.
|
|
164
|
+
|
|
165
|
+
```javascript
|
|
166
|
+
const route = router(modelList, 'sequential');
|
|
167
|
+
|
|
168
|
+
// Uses first model, then second, then third, etc.
|
|
169
|
+
const response1 = await route.completion({ messages: [...] }); // model 1
|
|
170
|
+
const response2 = await route.completion({ messages: [...] }); // model 2
|
|
171
|
+
const response3 = await route.completion({ messages: [...] }); // model 3
|
|
172
|
+
// ... cycles back to model 1
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Advanced Usage
|
|
176
|
+
|
|
177
|
+
### Load Balancing
|
|
178
|
+
|
|
179
|
+
Multiple models with the same `model_name` enable load balancing:
|
|
180
|
+
|
|
181
|
+
```javascript
|
|
182
|
+
const modelList = [
|
|
183
|
+
{
|
|
184
|
+
"model_name": "gpt-3.5-turbo",
|
|
185
|
+
"llm_params": {
|
|
186
|
+
"model": "ollama/chatgpt-v-2",
|
|
187
|
+
"api_key": process.env.OLLAMA_API_KEY
|
|
188
|
+
}
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
"model_name": "gpt-3.5-turbo", // Same name - load balancing
|
|
192
|
+
"llm_params": {
|
|
193
|
+
"model": "openai/gpt-3.5-turbo",
|
|
194
|
+
"api_key": process.env.OPENAI_API_KEY
|
|
195
|
+
}
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
"model_name": "gpt-3.5-turbo", // Same name - load balancing
|
|
199
|
+
"llm_params": {
|
|
200
|
+
"model": "openrouter/minimax2.5",
|
|
201
|
+
"api_key": process.env.OPEN_ROUTER_API_KEY,
|
|
202
|
+
"api_base": process.env.OPEN_ROUTER_BASE
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
];
|
|
206
|
+
|
|
207
|
+
const route = router(modelList);
|
|
208
|
+
|
|
209
|
+
// This will load balance across all 3 "gpt-3.5-turbo" models
|
|
210
|
+
const response = await route.completion({
|
|
211
|
+
model: "gpt-3.5-turbo",
|
|
212
|
+
messages: [...]
|
|
213
|
+
});
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Environment Variables
|
|
217
|
+
|
|
218
|
+
Use environment variables for configuration:
|
|
219
|
+
|
|
220
|
+
```javascript
|
|
221
|
+
const modelList = [
|
|
222
|
+
{
|
|
223
|
+
"model_name": "production-gpt4",
|
|
224
|
+
"llm_params": {
|
|
225
|
+
"model": "openai/gpt-4",
|
|
226
|
+
"api_key": process.env.OPENAI_API_KEY,
|
|
227
|
+
"api_base": process.env.OPENAI_API_BASE || "https://api.openai.com/v1"
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
"model_name": "staging-gpt4",
|
|
232
|
+
"llm_params": {
|
|
233
|
+
"model": "ollama/gpt-4",
|
|
234
|
+
"api_key": process.env.OLLAMA_API_KEY,
|
|
235
|
+
"api_base": process.env.OLLAMA_API_BASE
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
];
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Complete API Reference
|
|
242
|
+
|
|
243
|
+
```javascript
|
|
244
|
+
import { router } from 'llmjs2';
|
|
245
|
+
|
|
246
|
+
// Create router
|
|
247
|
+
const myRouter = router(modelList, strategy);
|
|
248
|
+
|
|
249
|
+
// Completion with specific model
|
|
250
|
+
const response1 = await myRouter.completion({
|
|
251
|
+
model: "model_name", // Optional: specific model to route to
|
|
252
|
+
messages: [...], // Required: chat messages
|
|
253
|
+
tools: [...], // Optional: function calling tools
|
|
254
|
+
// ... other completion params
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
// Auto-routing completion
|
|
258
|
+
const response2 = await myRouter.completion({
|
|
259
|
+
messages: [...], // Required: chat messages
|
|
260
|
+
// Uses routing strategy when no model specified
|
|
261
|
+
});
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
## Error Handling
|
|
265
|
+
|
|
266
|
+
```javascript
|
|
267
|
+
try {
|
|
268
|
+
const response = await route.completion({
|
|
269
|
+
model: "non-existent-model",
|
|
270
|
+
messages: [{"role": "user", "content": "Hello"}]
|
|
271
|
+
});
|
|
272
|
+
} catch (error) {
|
|
273
|
+
if (error.message.includes('Model not found')) {
|
|
274
|
+
console.log('Model not configured in router');
|
|
275
|
+
} else if (error.message.includes('API key')) {
|
|
276
|
+
console.log('Provider API key missing');
|
|
277
|
+
} else {
|
|
278
|
+
console.log('Routing error:', error.message);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## Use Cases
|
|
284
|
+
|
|
285
|
+
### Multi-Provider Fallback
|
|
286
|
+
|
|
287
|
+
```javascript
|
|
288
|
+
const fallbackModels = [
|
|
289
|
+
{
|
|
290
|
+
"model_name": "gpt-4_1",
|
|
291
|
+
"llm_params": { "model": "openai/gpt-4", "api_key": process.env.OPENAI_API_KEY }
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
"model_name": "gpt-4_2",
|
|
295
|
+
"llm_params": { "model": "ollama/gpt-4", "api_key": process.env.OLLAMA_API_KEY }
|
|
296
|
+
},
|
|
297
|
+
{
|
|
298
|
+
"model_name": "gpt-4_3",
|
|
299
|
+
"llm_params": { "model": "openrouter/gpt-4", "api_key": process.env.OPEN_ROUTER_API_KEY }
|
|
300
|
+
}
|
|
301
|
+
];
|
|
302
|
+
|
|
303
|
+
const route = router(fallbackModels);
|
|
304
|
+
|
|
305
|
+
// Automatically tries different providers if one fails
|
|
306
|
+
const response = await route.completion({
|
|
307
|
+
messages: [...]
|
|
308
|
+
});
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
### Cost Optimization
|
|
312
|
+
|
|
313
|
+
```javascript
|
|
314
|
+
const costOptimizedModels = [
|
|
315
|
+
{
|
|
316
|
+
"model_name": "text-davinci-001",
|
|
317
|
+
"llm_params": { "model": "ollama/text-davinci-003", "api_key": process.env.OLLAMA_API_KEY }
|
|
318
|
+
},
|
|
319
|
+
{
|
|
320
|
+
"model_name": "text-davinci-002",
|
|
321
|
+
"llm_params": { "model": "openrouter/text-davinci-003", "api_key": process.env.OPENROUTER_API_KEY }
|
|
322
|
+
},
|
|
323
|
+
{
|
|
324
|
+
"model_name": "text-davinci-003",
|
|
325
|
+
"llm_params": { "model": "openai/gpt-3.5-turbo", "api_key": process.env.OPENAI_API_KEY }
|
|
326
|
+
}
|
|
327
|
+
];
|
|
328
|
+
|
|
329
|
+
const route = router(costOptimizedModels, 'random');
|
|
330
|
+
|
|
331
|
+
// Load balances across cheaper providers
|
|
332
|
+
const response = await route.completion({
|
|
333
|
+
model: "text-davinci-003",
|
|
334
|
+
messages: [...]
|
|
335
|
+
});
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
### A/B Testing
|
|
339
|
+
|
|
340
|
+
```javascript
|
|
341
|
+
const abTestModels = [
|
|
342
|
+
{
|
|
343
|
+
"model_name": "experiment-a",
|
|
344
|
+
"llm_params": { "model": "gpt-4", "api_key": process.env.OPENAI_API_KEY }
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
"model_name": "experiment-b",
|
|
348
|
+
"llm_params": { "model": "ollama/gpt-4", "api_key": process.env.OLLAMA_API_KEY }
|
|
349
|
+
}
|
|
350
|
+
];
|
|
351
|
+
|
|
352
|
+
const route = router(abTestModels, 'random');
|
|
353
|
+
|
|
354
|
+
// Randomly routes between experiment variants
|
|
355
|
+
const response = await route.completion({
|
|
356
|
+
model: "experiment-a", // or "experiment-b"
|
|
357
|
+
messages: [...]
|
|
358
|
+
});
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
## Configuration Examples
|
|
362
|
+
|
|
363
|
+
### Production Setup
|
|
364
|
+
|
|
365
|
+
```javascript
|
|
366
|
+
const productionModels = [
|
|
367
|
+
// Primary OpenAI models
|
|
368
|
+
{ "model_name": "gpt-4", "llm_params": { "model": "gpt-4", "api_key": process.env.OPENAI_API_KEY } },
|
|
369
|
+
{ "model_name": "gpt-3.5-turbo", "llm_params": { "model": "openai/gpt-3.5-turbo", "api_key": process.env.OPENAI_API_KEY } },
|
|
370
|
+
|
|
371
|
+
// Fallback Ollama models
|
|
372
|
+
{ "model_name": "gpt-4", "llm_params": { "model": "ollama/gpt-4", "api_key": process.env.OLLAMA_API_KEY } },
|
|
373
|
+
{ "model_name": "gpt-3.5-turbo", "llm_params": { "model": "ollama/gpt-3.5-turbo", "api_key": process.env.OLLAMA_API_KEY } },
|
|
374
|
+
|
|
375
|
+
// Cost-effective alternatives
|
|
376
|
+
{ "model_name": "gpt-3.5-turbo", "llm_params": { "model": "openrouter/openrouter/free", "api_key": process.env.OPENROUTER_API_KEY } }
|
|
377
|
+
];
|
|
378
|
+
|
|
379
|
+
export const productionRouter = router(productionModels);
|
|
380
|
+
export const stagingRouter = router(productionModels, 'sequential');
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
### Development Setup
|
|
384
|
+
|
|
385
|
+
```javascript
|
|
386
|
+
const devModels = [
|
|
387
|
+
// Mock/echo models for testing
|
|
388
|
+
{ "model_name": "echo", "llm_params": { "model": "echo", "api_key": "dev" } },
|
|
389
|
+
|
|
390
|
+
// Single provider for consistency
|
|
391
|
+
{ "model_name": "gpt-3.5-turbo", "llm_params": { "model": "ollama/gpt-3.5-turbo", "api_key": process.env.OLLAMA_API_KEY } }
|
|
392
|
+
];
|
|
393
|
+
|
|
394
|
+
export const devRouter = router(devModels, 'sequential');
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
This router system provides powerful routing capabilities while maintaining a simple, unified API for LLM completion across multiple providers and deployment strategies.
|