npm - llmjs2 - Versions diffs - 1.1.0 → 1.3.0 - Mend

llmjs2 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CONFIG_README.md +98 -0
package/README.md +382 -357
package/cli.js +195 -0
package/config.yaml +149 -0
package/docs/BASIC_USAGE.md +296 -0
package/docs/CLI.md +455 -0
package/docs/GET_STARTED.md +129 -0
package/docs/GUARDRAILS_GUIDE.md +734 -0
package/docs/README.md +47 -0
package/docs/ROUTER_GUIDE.md +397 -0
package/docs/SERVER_MODE.md +350 -0
package/index.js +199 -228
package/package.json +43 -28
package/providers/ollama.js +120 -88
package/providers/openai.js +104 -0
package/providers/openrouter.js +113 -79
package/router.js +248 -0
package/server.js +186 -0
package/test.js +246 -296
package/validate-config.js +87 -0
package/example.js +0 -298

package/cli.js ADDED Viewed

@@ -0,0 +1,195 @@
+#!/usr/bin/env node
+const { app } = require('./server');
+const { router } = require('./router');
+const yaml = require('yaml');
+const fs = require('fs');
+const path = require('path');
+class CLI {
+  constructor() {
+    this.args = process.argv.slice(2);
+    this.options = this.parseArgs();
+  }
+  parseArgs() {
+    const options = {
+      port: 3000,
+      host: 'localhost',
+      config: null,
+      help: false
+    };
+    for (let i = 0; i < this.args.length; i++) {
+      const arg = this.args[i];
+      switch (arg) {
+        case '-p':
+        case '--port':
+          options.port = parseInt(this.args[++i]);
+          break;
+        case '-H':
+        case '--host':
+          options.host = this.args[++i];
+          break;
+        case '-c':
+        case '--config':
+          options.config = this.args[++i];
+          break;
+        case '-h':
+        case '--help':
+          options.help = true;
+          break;
+        default:
+          if (arg.startsWith('-')) {
+            console.error(`Unknown option: ${arg}`);
+            this.showHelp();
+            process.exit(1);
+          }
+      }
+    }
+    return options;
+  }
+  showHelp() {
+    console.log(`
+🤖 llmjs2 - OpenAI-Compatible API Server
+USAGE:
+  llmjs2 [options]
+DESCRIPTION:
+  Starts an OpenAI-compatible API server with intelligent routing and guardrails.
+  Supports model load balancing, content filtering, and custom request processing.
+  The server listens for POST requests to /v1/chat/completions.
+OPTIONS:
+  -c, --config <file>    YAML config file with models, guardrails, and routing
+  -p, --port <port>      Port to listen on (default: 3000)
+  -H, --host <host>      Host to bind to (default: localhost)
+  -h, --help            Show this help message
+EXAMPLES:
+  llmjs2
+  llmjs2 --config config.yaml
+  llmjs2 --config config.yaml --port 8080 --host 0.0.0.0
+`);
+  }
+  loadConfig() {
+    if (!this.options.config) {
+      return this.createDefaultConfig();
+    }
+    const configPath = path.resolve(this.options.config);
+    if (!fs.existsSync(configPath)) {
+      throw new Error(`Configuration file not found: ${configPath}`);
+    }
+    const configContent = fs.readFileSync(configPath, 'utf8');
+    const config = yaml.parse(configContent);
+    // Resolve environment variables in config
+    return this.resolveEnvVars(config);
+  }
+  createDefaultConfig() {
+    return {
+      model_list: [
+        {
+          model_name: 'default',
+          llm_params: {
+            model: process.env.OLLAMA_DEFAULT_MODEL || 'ollama/minimax-m2.5:cloud',
+            api_key: process.env.OLLAMA_API_KEY,
+            api_base: process.env.OLLAMA_BASE_URL
+          }
+        }
+      ],
+      guardrails: [],
+      router_settings: {
+        routing_strategy: 'default'
+      }
+    };
+  }
+  resolveEnvVars(obj) {
+    if (typeof obj === 'string') {
+      if (obj.startsWith('os.environ/')) {
+        const envVar = obj.replace('os.environ/', '');
+        return process.env[envVar] || obj;
+      }
+      return obj;
+    }
+    if (Array.isArray(obj)) {
+      return obj.map(item => this.resolveEnvVars(item));
+    }
+    if (obj && typeof obj === 'object') {
+      const resolved = {};
+      for (const [key, value] of Object.entries(obj)) {
+        resolved[key] = this.resolveEnvVars(value);
+      }
+      return resolved;
+    }
+    return obj;
+  }
+  createRouter(config) {
+    const route = router(config.model_list, config.router_settings.routing_strategy);
+    if (config.guardrails && config.guardrails.length > 0) {
+      route.setGuardrails(config.guardrails);
+    }
+    return route;
+  }
+  run() {
+    if (this.options.help) {
+      this.showHelp();
+      return;
+    }
+    try {
+      const config = this.loadConfig();
+      const route = this.createRouter(config);
+      console.log('🤖 Starting llmjs2 server...');
+      console.log(`📋 Configuration: ${this.options.config || 'default'}`);
+      console.log(`🎯 Models: ${route.getAvailableModels().join(', ')}`);
+      console.log(`🔀 Strategy: ${config.router_settings.routing_strategy}`);
+      console.log(`🛡️  Guardrails: ${config.guardrails.length}`);
+      app.use(route);
+      app.listen(this.options.port, this.options.host);
+    } catch (error) {
+      console.error('❌ Failed to start server:', error.message);
+      process.exit(1);
+    }
+  }
+}
+// Load environment variables from .env file if it exists
+function loadEnvFile() {
+  const envPath = path.join(process.cwd(), '.env');
+  if (fs.existsSync(envPath)) {
+    const envContent = fs.readFileSync(envPath, 'utf8');
+    const envVars = envContent.split('\n').filter(line => line.trim() && !line.startsWith('#'));
+    envVars.forEach(line => {
+      const [key, value] = line.split('=');
+      if (key && value) {
+        process.env[key.trim()] = value.trim();
+      }
+    });
+  }
+}
+// Run CLI
+loadEnvFile();
+const cli = new CLI();
+cli.run();

package/config.yaml ADDED Viewed

@@ -0,0 +1,149 @@
+# Sample configuration for llmjs2 server
+# This file demonstrates all available configuration options
+# Model list defines available models and their providers
+model_list:
+  # Ollama models
+  - model_name: glm-5.1:cloud
+    llm_params:
+      model: ollama/glm-5.1:cloud
+  - model_name: minimax-m2.5
+    llm_params:
+      model: ollama/minimax-m2.5:cloud
+  # OpenRouter models (multiple with same name for load balancing)
+  - model_name: free-model
+    llm_params:
+      model: openrouter/openrouter/free
+# Guardrails for request/response processing
+guardrails:
+  # Content filtering before LLM calls
+  - name: content_filter
+    mode: pre_call
+    code: |
+      (processId, input) => {
+        const { model, messages } = input;
+        console.log(`[${processId}] Filtering content for model: ${model}`);
+        // Filter inappropriate content
+        const filteredMessages = messages.map(msg => {
+          if (msg.role === 'user' && msg.content) {
+            // Basic profanity filter
+            const filtered = msg.content
+              .replace(/badword/gi, '****')
+              .replace(/inappropriate/gi, '****');
+            return { ...msg, content: filtered };
+          }
+          return msg;
+        });
+        return { model, messages: filteredMessages };
+      }
+  # Rate limiting
+  - name: rate_limiter
+    mode: pre_call
+    code: |
+      (processId, input) => {
+        // Simple in-memory rate limiting (use Redis in production)
+        const now = Date.now();
+        const windowMs = 60000; // 1 minute
+        const maxRequests = 3; // 10 requests per minute
+        if (!global.rateLimitStore) {
+          global.rateLimitStore = new Map();
+        }
+        const userId = 'default_user'; // In real app, get from auth
+        const userRequests = global.rateLimitStore.get(userId) || [];
+        // Clean old requests
+        const recentRequests = userRequests.filter(time => now - time < windowMs);
+        if (recentRequests.length >= maxRequests) {
+          throw new Error(`Rate limit exceeded. Maximum ${maxRequests} requests per minute.`);
+        }
+        recentRequests.push(now);
+        global.rateLimitStore.set(userId, recentRequests);
+        console.log(`[${processId}] Rate limit check passed: ${recentRequests.length}/${maxRequests}`);
+        return input;
+      }
+  # Request logging
+  - name: request_logger
+    mode: pre_call
+    code: |
+      (processId, input) => {
+        const { model, messages } = input;
+        const logData = {
+          processId,
+          timestamp: new Date().toISOString(),
+          model,
+          messageCount: messages.length,
+          totalChars: messages.reduce((sum, m) => sum + (m.content?.length || 0), 0)
+        };
+        console.log(`[REQUEST] ${JSON.stringify(logData)}`);
+        return input;
+      }
+  # Response logging and filtering
+  - name: response_logger
+    mode: post_call
+    code: |
+      (processId, result) => {
+        const logData = {
+          processId,
+          timestamp: new Date().toISOString(),
+          resultType: typeof result,
+          resultLength: typeof result === 'string' ? result.length : 'object'
+        };
+        console.log(`[RESPONSE] ${JSON.stringify(logData)}`);
+        // Basic response filtering
+        if (typeof result === 'string') {
+          const filtered = result.replace(/sensitive/gi, '[FILTERED]');
+          return filtered;
+        }
+        return result;
+      }
+  # Performance monitoring
+  - name: performance_monitor
+    mode: post_call
+    code: |
+      (processId, result) => {
+        const endTime = Date.now();
+        // Note: In a real implementation, you'd track start time from pre_call
+        console.log(`[${processId}] Processing completed in ${Date.now() - (global.startTimes?.[processId] || Date.now())}ms`);
+        // Clean up
+        if (global.startTimes) {
+          delete global.startTimes[processId];
+        }
+        return result;
+      }
+# Router configuration
+router_settings:
+  routing_strategy: random  # Options: default, random, sequential
+# Optional: Custom server settings (can also be set via CLI or env vars)
+# server_settings:
+#   port: 3000
+#   host: localhost

package/docs/BASIC_USAGE.md ADDED Viewed

@@ -0,0 +1,296 @@
+# Basic Usage Guide
+This guide covers the core functionality of llmjs2 - how to use the completion API in different ways.
+## API Patterns
+llmjs2 supports three different API styles to fit your coding style and use case:
+### 1. Simple API (Auto-Detection)
+The easiest way to get started. Just provide a prompt:
+```javascript
+import { completion } from 'llmjs2';
+// llmjs2 automatically chooses a provider and model
+const response = await completion('Explain quantum physics in simple terms');
+console.log(response);
+```
+**How it works:**
+- Detects available API keys in environment variables
+- Chooses between Ollama and OpenRouter if both are available
+- Uses default models if none specified
+- Defaut models of
+  - Ollama: minimax-m2.5:cloud
+  - Openrouter: openrouter/free
+- Default models are configable by environment variables
+### 2. Model-Specific API
+Specify exactly which model you want to use:
+```javascript
+import { completion } from 'llmjs2';
+// Using Ollama
+const ollamaResponse = await completion('ollama/minimax-m2.5:cloud', 'Hello from Ollama!');
+console.log('Ollama:', ollamaResponse);
+// Using OpenRouter
+const openrouterResponse = await completion('openrouter/openrouter/free', 'Hello from OpenRouter!');
+console.log('OpenRouter:', openrouterResponse);
+```
+### 3. Object-Based API (Most Powerful)
+For advanced usage with conversations, system messages, and tools:
+```javascript
+import { completion } from 'llmjs2';
+const response = await completion({
+  model: 'ollama/minimax-m2.5:cloud',
+  messages: [
+    { role: 'system', content: 'You are a helpful coding assistant.' },
+    { role: 'user', content: 'Write a function to reverse a string in JavaScript.' }
+  ],
+  apiKey: 'your-api-key' // optional - uses env vars if not provided
+});
+console.log(response);
+```
+## Configuration
+### Environment Variables
+Set these to configure llmjs2:
+```bash
+# API Keys (required)
+export OLLAMA_API_KEY=your_ollama_key
+export OPEN_ROUTER_API_KEY=your_openrouter_key
+export OPENAI_API_KEY=your_openai_key
+# Default Models (optional)
+export OLLAMA_DEFAULT_MODEL=minimax-m2.5:cloud
+export OPEN_ROUTER_DEFAULT_MODEL=openrouter/free
+export OPENAI_DEFAULT_MODEL=gpt-3.5-turbo
+# Base URLs (optional - defaults shown below)
+export OLLAMA_BASE_URL=https://ollama.com/api/chat
+export OPEN_ROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions
+export OPENAI_BASE_URL=https://api.openai.com/v1
+```
+if no base url is set, the default base url is:
+- ollama:https://ollama.com/api/chat
+- openrouter:https://openrouter.ai/api/v1/chat/completions
+- openai:https://api.openai.com/v1
+## Working with Conversations
+Use the object-based API for multi-turn conversations:
+```javascript
+import { completion } from 'llmjs2';
+async function chatConversation() {
+  const messages = [
+    { role: 'system', content: 'You are a helpful assistant.' },
+    { role: 'user', content: 'What is the capital of France?' }
+  ];
+  // First response
+  const response1 = await completion({
+    model: 'ollama/minimax-m2.5:cloud',
+    messages: messages
+  });
+  console.log('Assistant:', response1);
+  // Continue the conversation
+  messages.push({ role: 'assistant', content: response1 });
+  messages.push({ role: 'user', content: 'What is its population?' });
+  const response2 = await completion({
+    model: 'ollama/minimax-m2.5:cloud',
+    messages: messages
+  });
+  console.log('Assistant:', response2);
+}
+chatConversation();
+```
+## Function Calling (Tools)
+Give your LLM access to external functions:
+```javascript
+import { completion } from 'llmjs2';
+// Define a weather tool
+const weatherTool = {
+  type: 'function',
+  function: {
+    name: 'get_weather',
+    description: 'Get the current weather in a given location',
+    parameters: {
+      type: 'object',
+      properties: {
+        location: {
+          type: 'string',
+          description: 'The city and state, e.g. San Francisco, CA'
+        }
+      },
+      required: ['location']
+    }
+  }
+};
+async function weatherAssistant() {
+  const response = await completion({
+    model: 'openrouter/openrouter/free',
+    messages: [
+      { role: 'user', content: 'What is the weather like in Paris?' }
+    ],
+    tools: [weatherTool]
+  });
+  // Check if the model wants to call a tool
+  if (response.tool_calls) {
+    console.log('Tool calls:', response.tool_calls);
+    // Here you would execute the tool and continue the conversation
+  } else {
+    console.log('Response:', response);
+  }
+}
+weatherAssistant();
+```
+## Error Handling
+Always wrap your completion calls in try-catch blocks:
+```javascript
+import { completion } from 'llmjs2';
+async function safeCompletion() {
+  try {
+    const response = await completion('Tell me a joke');
+    console.log('Success:', response);
+  } catch (error) {
+    console.error('Error:', error.message);
+    // Handle different error types
+    if (error.message.includes('API key')) {
+      console.log('Please set your API keys');
+    } else if (error.message.includes('timeout')) {
+      console.log('Request timed out, try again');
+    } else if (error.message.includes('Invalid model')) {
+      console.log('Check your model format: provider/model_name');
+    }
+  }
+}
+safeCompletion();
+```
+## Common Patterns
+### Content Summarization
+```javascript
+async function summarizeText(text, maxWords = 50) {
+  const prompt = `Please summarize the following text in ${maxWords} words or less:\n\n${text}`;
+  return await completion({
+    model: 'ollama/minimax-m2.5:cloud',
+    messages: [{ role: 'user', content: prompt }]
+  });
+}
+// Usage
+const summary = await summarizeText('Your long text here...', 30);
+console.log(summary);
+```
+### Code Generation
+```javascript
+async function generateCode(description, language = 'javascript') {
+  const prompt = `Write a ${language} function that ${description}. Include comments and error handling.`;
+  return await completion({
+    model: 'openrouter/openrouter/free',
+    messages: [
+      { role: 'system', content: 'You are an expert programmer. Write clean, well-documented code.' },
+      { role: 'user', content: prompt }
+    ]
+  });
+}
+// Usage
+const code = await generateCode('calculates the fibonacci sequence', 'python');
+console.log(code);
+```
+### Multi-Provider Fallback
+```javascript
+async function smartCompletion(prompt) {
+  const providers = [
+    'ollama/minimax-m2.5:cloud',
+    'openrouter/openrouter/free'
+  ];
+  for (const provider of providers) {
+    try {
+      console.log(`Trying ${provider}...`);
+      const response = await completion(provider, prompt);
+      return response;
+    } catch (error) {
+      console.log(`${provider} failed: ${error.message}`);
+      continue;
+    }
+  }
+  throw new Error('All providers failed');
+}
+// Usage
+const response = await smartCompletion('Hello world!');
+console.log(response);
+```
+## Model Format Reference
+Models must be specified in the format: `provider/model_name`
+**Ollama Examples:**
+- `ollama/minimax-m2.5:cloud`
+- `ollama/qwen3.5:397b-cloud`
+- `ollama/glm-5.1:cloud`
+**OpenRouter Examples:**
+- `openrouter/openrouter/free`
+- `openrouter/openai/gpt-4`
+- `openrouter/anthropic/claude-3-haiku`
+## Next Steps
+Now that you understand the basic usage:
+- **[Server Mode](SERVER_MODE.md)** - Run llmjs2 as an API server
+- **[CLI Guide](CLI.md)** - Use the command-line interface
+- **[Advanced Features](ADVANCED_FEATURES.md)** - Advanced usage patterns and integrations
+For technical details, see the [TECHNICAL_SPECIFICATION.md](TECHNICAL_SPECIFICATION.md).