llmjs2 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.js ADDED
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env node
2
+
3
+ const { app } = require('./server');
4
+ const { router } = require('./router');
5
+ const yaml = require('yaml');
6
+ const fs = require('fs');
7
+ const path = require('path');
8
+
9
+ class CLI {
10
+ constructor() {
11
+ this.args = process.argv.slice(2);
12
+ this.options = this.parseArgs();
13
+ }
14
+
15
+ parseArgs() {
16
+ const options = {
17
+ port: 3000,
18
+ host: 'localhost',
19
+ config: null,
20
+ help: false
21
+ };
22
+
23
+ for (let i = 0; i < this.args.length; i++) {
24
+ const arg = this.args[i];
25
+
26
+ switch (arg) {
27
+ case '-p':
28
+ case '--port':
29
+ options.port = parseInt(this.args[++i]);
30
+ break;
31
+ case '-H':
32
+ case '--host':
33
+ options.host = this.args[++i];
34
+ break;
35
+ case '-c':
36
+ case '--config':
37
+ options.config = this.args[++i];
38
+ break;
39
+ case '-h':
40
+ case '--help':
41
+ options.help = true;
42
+ break;
43
+ default:
44
+ if (arg.startsWith('-')) {
45
+ console.error(`Unknown option: ${arg}`);
46
+ this.showHelp();
47
+ process.exit(1);
48
+ }
49
+ }
50
+ }
51
+
52
+ return options;
53
+ }
54
+
55
+ showHelp() {
56
+ console.log(`
57
+ 🤖 llmjs2 - OpenAI-Compatible API Server
58
+
59
+ USAGE:
60
+ llmjs2 [options]
61
+
62
+ DESCRIPTION:
63
+ Starts an OpenAI-compatible API server with intelligent routing and guardrails.
64
+ Supports model load balancing, content filtering, and custom request processing.
65
+ The server listens for POST requests to /v1/chat/completions.
66
+
67
+ OPTIONS:
68
+ -c, --config <file> YAML config file with models, guardrails, and routing
69
+ -p, --port <port> Port to listen on (default: 3000)
70
+ -H, --host <host> Host to bind to (default: localhost)
71
+ -h, --help Show this help message
72
+
73
+ EXAMPLES:
74
+ llmjs2
75
+ llmjs2 --config config.yaml
76
+ llmjs2 --config config.yaml --port 8080 --host 0.0.0.0
77
+ `);
78
+ }
79
+
80
+ loadConfig() {
81
+ if (!this.options.config) {
82
+ return this.createDefaultConfig();
83
+ }
84
+
85
+ const configPath = path.resolve(this.options.config);
86
+ if (!fs.existsSync(configPath)) {
87
+ throw new Error(`Configuration file not found: ${configPath}`);
88
+ }
89
+
90
+ const configContent = fs.readFileSync(configPath, 'utf8');
91
+ const config = yaml.parse(configContent);
92
+
93
+ // Resolve environment variables in config
94
+ return this.resolveEnvVars(config);
95
+ }
96
+
97
+ createDefaultConfig() {
98
+ return {
99
+ model_list: [
100
+ {
101
+ model_name: 'default',
102
+ llm_params: {
103
+ model: process.env.OLLAMA_DEFAULT_MODEL || 'ollama/minimax-m2.5:cloud',
104
+ api_key: process.env.OLLAMA_API_KEY,
105
+ api_base: process.env.OLLAMA_BASE_URL
106
+ }
107
+ }
108
+ ],
109
+ guardrails: [],
110
+ router_settings: {
111
+ routing_strategy: 'default'
112
+ }
113
+ };
114
+ }
115
+
116
+ resolveEnvVars(obj) {
117
+ if (typeof obj === 'string') {
118
+ if (obj.startsWith('os.environ/')) {
119
+ const envVar = obj.replace('os.environ/', '');
120
+ return process.env[envVar] || obj;
121
+ }
122
+ return obj;
123
+ }
124
+
125
+ if (Array.isArray(obj)) {
126
+ return obj.map(item => this.resolveEnvVars(item));
127
+ }
128
+
129
+ if (obj && typeof obj === 'object') {
130
+ const resolved = {};
131
+ for (const [key, value] of Object.entries(obj)) {
132
+ resolved[key] = this.resolveEnvVars(value);
133
+ }
134
+ return resolved;
135
+ }
136
+
137
+ return obj;
138
+ }
139
+
140
+ createRouter(config) {
141
+ const route = router(config.model_list, config.router_settings.routing_strategy);
142
+
143
+ if (config.guardrails && config.guardrails.length > 0) {
144
+ route.setGuardrails(config.guardrails);
145
+ }
146
+
147
+ return route;
148
+ }
149
+
150
+ run() {
151
+ if (this.options.help) {
152
+ this.showHelp();
153
+ return;
154
+ }
155
+
156
+ try {
157
+ const config = this.loadConfig();
158
+ const route = this.createRouter(config);
159
+
160
+ console.log('🤖 Starting llmjs2 server...');
161
+ console.log(`📋 Configuration: ${this.options.config || 'default'}`);
162
+ console.log(`🎯 Models: ${route.getAvailableModels().join(', ')}`);
163
+ console.log(`🔀 Strategy: ${config.router_settings.routing_strategy}`);
164
+ console.log(`🛡️ Guardrails: ${config.guardrails.length}`);
165
+
166
+ app.use(route);
167
+ app.listen(this.options.port, this.options.host);
168
+
169
+ } catch (error) {
170
+ console.error('❌ Failed to start server:', error.message);
171
+ process.exit(1);
172
+ }
173
+ }
174
+ }
175
+
176
+ // Load environment variables from .env file if it exists
177
+ function loadEnvFile() {
178
+ const envPath = path.join(process.cwd(), '.env');
179
+ if (fs.existsSync(envPath)) {
180
+ const envContent = fs.readFileSync(envPath, 'utf8');
181
+ const envVars = envContent.split('\n').filter(line => line.trim() && !line.startsWith('#'));
182
+
183
+ envVars.forEach(line => {
184
+ const [key, value] = line.split('=');
185
+ if (key && value) {
186
+ process.env[key.trim()] = value.trim();
187
+ }
188
+ });
189
+ }
190
+ }
191
+
192
+ // Run CLI
193
+ loadEnvFile();
194
+ const cli = new CLI();
195
+ cli.run();
package/config.yaml ADDED
@@ -0,0 +1,149 @@
1
+ # Sample configuration for llmjs2 server
2
+ # This file demonstrates all available configuration options
3
+
4
+ # Model list defines available models and their providers
5
+ model_list:
6
+
7
+
8
+ # Ollama models
9
+ - model_name: glm-5.1:cloud
10
+ llm_params:
11
+ model: ollama/glm-5.1:cloud
12
+
13
+ - model_name: minimax-m2.5
14
+ llm_params:
15
+ model: ollama/minimax-m2.5:cloud
16
+ # OpenRouter models (multiple with same name for load balancing)
17
+
18
+
19
+ - model_name: free-model
20
+ llm_params:
21
+ model: openrouter/openrouter/free
22
+
23
+
24
+ # Guardrails for request/response processing
25
+ guardrails:
26
+ # Content filtering before LLM calls
27
+ - name: content_filter
28
+ mode: pre_call
29
+ code: |
30
+ (processId, input) => {
31
+ const { model, messages } = input;
32
+
33
+ console.log(`[${processId}] Filtering content for model: ${model}`);
34
+
35
+ // Filter inappropriate content
36
+ const filteredMessages = messages.map(msg => {
37
+ if (msg.role === 'user' && msg.content) {
38
+ // Basic profanity filter
39
+ const filtered = msg.content
40
+ .replace(/badword/gi, '****')
41
+ .replace(/inappropriate/gi, '****');
42
+
43
+ return { ...msg, content: filtered };
44
+ }
45
+ return msg;
46
+ });
47
+
48
+ return { model, messages: filteredMessages };
49
+ }
50
+
51
+ # Rate limiting
52
+ - name: rate_limiter
53
+ mode: pre_call
54
+ code: |
55
+ (processId, input) => {
56
+ // Simple in-memory rate limiting (use Redis in production)
57
+ const now = Date.now();
58
+ const windowMs = 60000; // 1 minute
59
+ const maxRequests = 3; // 10 requests per minute
60
+
61
+ if (!global.rateLimitStore) {
62
+ global.rateLimitStore = new Map();
63
+ }
64
+
65
+ const userId = 'default_user'; // In real app, get from auth
66
+ const userRequests = global.rateLimitStore.get(userId) || [];
67
+
68
+ // Clean old requests
69
+ const recentRequests = userRequests.filter(time => now - time < windowMs);
70
+
71
+ if (recentRequests.length >= maxRequests) {
72
+ throw new Error(`Rate limit exceeded. Maximum ${maxRequests} requests per minute.`);
73
+ }
74
+
75
+ recentRequests.push(now);
76
+ global.rateLimitStore.set(userId, recentRequests);
77
+
78
+ console.log(`[${processId}] Rate limit check passed: ${recentRequests.length}/${maxRequests}`);
79
+ return input;
80
+ }
81
+
82
+ # Request logging
83
+ - name: request_logger
84
+ mode: pre_call
85
+ code: |
86
+ (processId, input) => {
87
+ const { model, messages } = input;
88
+
89
+ const logData = {
90
+ processId,
91
+ timestamp: new Date().toISOString(),
92
+ model,
93
+ messageCount: messages.length,
94
+ totalChars: messages.reduce((sum, m) => sum + (m.content?.length || 0), 0)
95
+ };
96
+
97
+ console.log(`[REQUEST] ${JSON.stringify(logData)}`);
98
+ return input;
99
+ }
100
+
101
+ # Response logging and filtering
102
+ - name: response_logger
103
+ mode: post_call
104
+ code: |
105
+ (processId, result) => {
106
+ const logData = {
107
+ processId,
108
+ timestamp: new Date().toISOString(),
109
+ resultType: typeof result,
110
+ resultLength: typeof result === 'string' ? result.length : 'object'
111
+ };
112
+
113
+ console.log(`[RESPONSE] ${JSON.stringify(logData)}`);
114
+
115
+ // Basic response filtering
116
+ if (typeof result === 'string') {
117
+ const filtered = result.replace(/sensitive/gi, '[FILTERED]');
118
+ return filtered;
119
+ }
120
+
121
+ return result;
122
+ }
123
+
124
+ # Performance monitoring
125
+ - name: performance_monitor
126
+ mode: post_call
127
+ code: |
128
+ (processId, result) => {
129
+ const endTime = Date.now();
130
+ // Note: In a real implementation, you'd track start time from pre_call
131
+
132
+ console.log(`[${processId}] Processing completed in ${Date.now() - (global.startTimes?.[processId] || Date.now())}ms`);
133
+
134
+ // Clean up
135
+ if (global.startTimes) {
136
+ delete global.startTimes[processId];
137
+ }
138
+
139
+ return result;
140
+ }
141
+
142
+ # Router configuration
143
+ router_settings:
144
+ routing_strategy: random # Options: default, random, sequential
145
+
146
+ # Optional: Custom server settings (can also be set via CLI or env vars)
147
+ # server_settings:
148
+ # port: 3000
149
+ # host: localhost
@@ -0,0 +1,296 @@
1
+ # Basic Usage Guide
2
+
3
+ This guide covers the core functionality of llmjs2 - how to use the completion API in different ways.
4
+
5
+ ## API Patterns
6
+
7
+ llmjs2 supports three different API styles to fit your coding style and use case:
8
+
9
+ ### 1. Simple API (Auto-Detection)
10
+
11
+ The easiest way to get started. Just provide a prompt:
12
+
13
+ ```javascript
14
+ import { completion } from 'llmjs2';
15
+
16
+ // llmjs2 automatically chooses a provider and model
17
+ const response = await completion('Explain quantum physics in simple terms');
18
+ console.log(response);
19
+ ```
20
+
21
+ **How it works:**
22
+
23
+ - Detects available API keys in environment variables
24
+ - Chooses between Ollama and OpenRouter if both are available
25
+ - Uses default models if none specified
26
+ - Defaut models of
27
+ - Ollama: minimax-m2.5:cloud
28
+ - Openrouter: openrouter/free
29
+ - Default models are configable by environment variables
30
+
31
+ ### 2. Model-Specific API
32
+
33
+ Specify exactly which model you want to use:
34
+
35
+ ```javascript
36
+ import { completion } from 'llmjs2';
37
+
38
+ // Using Ollama
39
+ const ollamaResponse = await completion('ollama/minimax-m2.5:cloud', 'Hello from Ollama!');
40
+ console.log('Ollama:', ollamaResponse);
41
+
42
+ // Using OpenRouter
43
+ const openrouterResponse = await completion('openrouter/openrouter/free', 'Hello from OpenRouter!');
44
+ console.log('OpenRouter:', openrouterResponse);
45
+ ```
46
+
47
+ ### 3. Object-Based API (Most Powerful)
48
+
49
+ For advanced usage with conversations, system messages, and tools:
50
+
51
+ ```javascript
52
+ import { completion } from 'llmjs2';
53
+
54
+ const response = await completion({
55
+ model: 'ollama/minimax-m2.5:cloud',
56
+ messages: [
57
+ { role: 'system', content: 'You are a helpful coding assistant.' },
58
+ { role: 'user', content: 'Write a function to reverse a string in JavaScript.' }
59
+ ],
60
+ apiKey: 'your-api-key' // optional - uses env vars if not provided
61
+ });
62
+
63
+ console.log(response);
64
+ ```
65
+
66
+ ## Configuration
67
+
68
+ ### Environment Variables
69
+
70
+ Set these to configure llmjs2:
71
+
72
+ ```bash
73
+ # API Keys (required)
74
+ export OLLAMA_API_KEY=your_ollama_key
75
+ export OPEN_ROUTER_API_KEY=your_openrouter_key
76
+ export OPENAI_API_KEY=your_openai_key
77
+
78
+ # Default Models (optional)
79
+ export OLLAMA_DEFAULT_MODEL=minimax-m2.5:cloud
80
+ export OPEN_ROUTER_DEFAULT_MODEL=openrouter/free
81
+ export OPENAI_DEFAULT_MODEL=gpt-3.5-turbo
82
+
83
+ # Base URLs (optional - defaults shown below)
84
+ export OLLAMA_BASE_URL=https://ollama.com/api/chat
85
+ export OPEN_ROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions
86
+ export OPENAI_BASE_URL=https://api.openai.com/v1
87
+ ```
88
+
89
+ if no base url is set, the default base url is:
90
+ - ollama:https://ollama.com/api/chat
91
+ - openrouter:https://openrouter.ai/api/v1/chat/completions
92
+ - openai:https://api.openai.com/v1
93
+
94
+ ## Working with Conversations
95
+
96
+ Use the object-based API for multi-turn conversations:
97
+
98
+ ```javascript
99
+ import { completion } from 'llmjs2';
100
+
101
+ async function chatConversation() {
102
+ const messages = [
103
+ { role: 'system', content: 'You are a helpful assistant.' },
104
+ { role: 'user', content: 'What is the capital of France?' }
105
+ ];
106
+
107
+ // First response
108
+ const response1 = await completion({
109
+ model: 'ollama/minimax-m2.5:cloud',
110
+ messages: messages
111
+ });
112
+
113
+ console.log('Assistant:', response1);
114
+
115
+ // Continue the conversation
116
+ messages.push({ role: 'assistant', content: response1 });
117
+ messages.push({ role: 'user', content: 'What is its population?' });
118
+
119
+ const response2 = await completion({
120
+ model: 'ollama/minimax-m2.5:cloud',
121
+ messages: messages
122
+ });
123
+
124
+ console.log('Assistant:', response2);
125
+ }
126
+
127
+ chatConversation();
128
+ ```
129
+
130
+ ## Function Calling (Tools)
131
+
132
+ Give your LLM access to external functions:
133
+
134
+ ```javascript
135
+ import { completion } from 'llmjs2';
136
+
137
+ // Define a weather tool
138
+ const weatherTool = {
139
+ type: 'function',
140
+ function: {
141
+ name: 'get_weather',
142
+ description: 'Get the current weather in a given location',
143
+ parameters: {
144
+ type: 'object',
145
+ properties: {
146
+ location: {
147
+ type: 'string',
148
+ description: 'The city and state, e.g. San Francisco, CA'
149
+ }
150
+ },
151
+ required: ['location']
152
+ }
153
+ }
154
+ };
155
+
156
+ async function weatherAssistant() {
157
+ const response = await completion({
158
+ model: 'openrouter/openrouter/free',
159
+ messages: [
160
+ { role: 'user', content: 'What is the weather like in Paris?' }
161
+ ],
162
+ tools: [weatherTool]
163
+ });
164
+
165
+ // Check if the model wants to call a tool
166
+ if (response.tool_calls) {
167
+ console.log('Tool calls:', response.tool_calls);
168
+ // Here you would execute the tool and continue the conversation
169
+ } else {
170
+ console.log('Response:', response);
171
+ }
172
+ }
173
+
174
+ weatherAssistant();
175
+ ```
176
+
177
+ ## Error Handling
178
+
179
+ Always wrap your completion calls in try-catch blocks:
180
+
181
+ ```javascript
182
+ import { completion } from 'llmjs2';
183
+
184
+ async function safeCompletion() {
185
+ try {
186
+ const response = await completion('Tell me a joke');
187
+ console.log('Success:', response);
188
+ } catch (error) {
189
+ console.error('Error:', error.message);
190
+
191
+ // Handle different error types
192
+ if (error.message.includes('API key')) {
193
+ console.log('Please set your API keys');
194
+ } else if (error.message.includes('timeout')) {
195
+ console.log('Request timed out, try again');
196
+ } else if (error.message.includes('Invalid model')) {
197
+ console.log('Check your model format: provider/model_name');
198
+ }
199
+ }
200
+ }
201
+
202
+ safeCompletion();
203
+ ```
204
+
205
+ ## Common Patterns
206
+
207
+ ### Content Summarization
208
+
209
+ ```javascript
210
+ async function summarizeText(text, maxWords = 50) {
211
+ const prompt = `Please summarize the following text in ${maxWords} words or less:\n\n${text}`;
212
+
213
+ return await completion({
214
+ model: 'ollama/minimax-m2.5:cloud',
215
+ messages: [{ role: 'user', content: prompt }]
216
+ });
217
+ }
218
+
219
+ // Usage
220
+ const summary = await summarizeText('Your long text here...', 30);
221
+ console.log(summary);
222
+ ```
223
+
224
+ ### Code Generation
225
+
226
+ ```javascript
227
+ async function generateCode(description, language = 'javascript') {
228
+ const prompt = `Write a ${language} function that ${description}. Include comments and error handling.`;
229
+
230
+ return await completion({
231
+ model: 'openrouter/openrouter/free',
232
+ messages: [
233
+ { role: 'system', content: 'You are an expert programmer. Write clean, well-documented code.' },
234
+ { role: 'user', content: prompt }
235
+ ]
236
+ });
237
+ }
238
+
239
+ // Usage
240
+ const code = await generateCode('calculates the fibonacci sequence', 'python');
241
+ console.log(code);
242
+ ```
243
+
244
+ ### Multi-Provider Fallback
245
+
246
+ ```javascript
247
+ async function smartCompletion(prompt) {
248
+ const providers = [
249
+ 'ollama/minimax-m2.5:cloud',
250
+ 'openrouter/openrouter/free'
251
+ ];
252
+
253
+ for (const provider of providers) {
254
+ try {
255
+ console.log(`Trying ${provider}...`);
256
+ const response = await completion(provider, prompt);
257
+ return response;
258
+ } catch (error) {
259
+ console.log(`${provider} failed: ${error.message}`);
260
+ continue;
261
+ }
262
+ }
263
+
264
+ throw new Error('All providers failed');
265
+ }
266
+
267
+ // Usage
268
+ const response = await smartCompletion('Hello world!');
269
+ console.log(response);
270
+ ```
271
+
272
+ ## Model Format Reference
273
+
274
+ Models must be specified in the format: `provider/model_name`
275
+
276
+ **Ollama Examples:**
277
+
278
+ - `ollama/minimax-m2.5:cloud`
279
+ - `ollama/qwen3.5:397b-cloud`
280
+ - `ollama/glm-5.1:cloud`
281
+
282
+ **OpenRouter Examples:**
283
+
284
+ - `openrouter/openrouter/free`
285
+ - `openrouter/openai/gpt-4`
286
+ - `openrouter/anthropic/claude-3-haiku`
287
+
288
+ ## Next Steps
289
+
290
+ Now that you understand the basic usage:
291
+
292
+ - **[Server Mode](SERVER_MODE.md)** - Run llmjs2 as an API server
293
+ - **[CLI Guide](CLI.md)** - Use the command-line interface
294
+ - **[Advanced Features](ADVANCED_FEATURES.md)** - Advanced usage patterns and integrations
295
+
296
+ For technical details, see the [TECHNICAL_SPECIFICATION.md](TECHNICAL_SPECIFICATION.md).