llmjs2 1.3.8 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -476
- package/chain/AGENT_STEP_README.md +102 -0
- package/chain/README.md +257 -0
- package/chain/WORKFLOW_README.md +85 -0
- package/chain/agent-step-example.js +232 -0
- package/chain/docs/AGENT.md +126 -0
- package/chain/docs/GRAPH.md +490 -0
- package/chain/examples.js +314 -0
- package/chain/index.js +31 -0
- package/chain/lib/agent.js +338 -0
- package/chain/lib/flow/agent-step.js +119 -0
- package/chain/lib/flow/edge.js +24 -0
- package/chain/lib/flow/flow.js +76 -0
- package/chain/lib/flow/graph.js +331 -0
- package/chain/lib/flow/index.js +7 -0
- package/chain/lib/flow/step.js +63 -0
- package/chain/lib/memory/in-memory.js +117 -0
- package/chain/lib/memory/index.js +36 -0
- package/chain/lib/memory/lance-memory.js +225 -0
- package/chain/lib/memory/sqlite-memory.js +309 -0
- package/chain/simple-agent-step-example.js +168 -0
- package/chain/workflow-example-usage.js +70 -0
- package/chain/workflow-example.json +59 -0
- package/core/README.md +485 -0
- package/core/cli.js +275 -0
- package/core/docs/BASIC_USAGE.md +62 -0
- package/core/docs/CLI.md +104 -0
- package/{docs → core/docs}/GET_STARTED.md +129 -129
- package/{docs → core/docs}/GUARDRAILS_GUIDE.md +734 -734
- package/{docs → core/docs}/README.md +47 -47
- package/core/docs/ROUTER_GUIDE.md +199 -0
- package/{docs → core/docs}/SERVER_MODE.md +358 -350
- package/core/index.js +115 -0
- package/{providers → core/providers}/ollama.js +14 -6
- package/{providers → core/providers}/openai.js +14 -6
- package/{providers → core/providers}/openrouter.js +14 -6
- package/core/router.js +252 -0
- package/{server.js → core/server.js} +15 -5
- package/package.json +43 -27
- package/cli.js +0 -195
- package/docs/BASIC_USAGE.md +0 -296
- package/docs/CLI.md +0 -455
- package/docs/ROUTER_GUIDE.md +0 -402
- package/index.js +0 -265
- package/router.js +0 -273
- package/test-completion.js +0 -99
- package/test.js +0 -246
- /package/{config.yaml → core/config.yaml} +0 -0
- /package/{logger.js → core/logger.js} +0 -0
|
@@ -1,350 +1,358 @@
|
|
|
1
|
-
# Server Mode Guide
|
|
2
|
-
|
|
3
|
-
Run llmjs2 as an OpenAI-compatible API server with intelligent routing and load balancing capabilities to integrate with existing OpenAI clients and applications.
|
|
4
|
-
|
|
5
|
-
## Quick Start Server
|
|
6
|
-
|
|
7
|
-
### Method 1: Simple JavaScript Server
|
|
8
|
-
|
|
9
|
-
Create a server file:
|
|
10
|
-
|
|
11
|
-
```javascript
|
|
12
|
-
// server.js
|
|
13
|
-
import { app } from 'llmjs2';
|
|
14
|
-
|
|
15
|
-
// Start the server
|
|
16
|
-
app.listen(3000, () => {
|
|
17
|
-
console.log('🚀 llmjs2 server running on http://localhost:3000');
|
|
18
|
-
});
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
Run it:
|
|
22
|
-
|
|
23
|
-
```bash
|
|
24
|
-
node server.js
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
#
|
|
28
|
-
|
|
29
|
-
## API Endpoints
|
|
30
|
-
|
|
31
|
-
### Chat Completions
|
|
32
|
-
|
|
33
|
-
**Endpoint:** `POST /v1/chat/completions`
|
|
34
|
-
|
|
35
|
-
**Content-Type:** `application/json`
|
|
36
|
-
|
|
37
|
-
**Request Format:**
|
|
38
|
-
|
|
39
|
-
```json
|
|
40
|
-
{
|
|
41
|
-
"model": "ollama/minimax-m2.5:cloud",
|
|
42
|
-
"messages": [
|
|
43
|
-
{
|
|
44
|
-
"role": "user",
|
|
45
|
-
"content": "Hello! How are you?"
|
|
46
|
-
}
|
|
47
|
-
],
|
|
48
|
-
"tools": [] // optional
|
|
49
|
-
}
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
**Response Format:**
|
|
53
|
-
|
|
54
|
-
The server returns
|
|
55
|
-
|
|
56
|
-
```json
|
|
57
|
-
{
|
|
58
|
-
"id": "chatcmpl-123456",
|
|
59
|
-
"object": "chat.completion",
|
|
60
|
-
"created": 1640995200,
|
|
61
|
-
"model": "ollama/minimax-m2.5:cloud",
|
|
62
|
-
"
|
|
63
|
-
{
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"
|
|
70
|
-
}
|
|
71
|
-
]
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
"
|
|
155
|
-
|
|
156
|
-
"
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
"
|
|
162
|
-
|
|
163
|
-
"
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
-
|
|
200
|
-
|
|
201
|
-
"messages": [{"role": "user", "content": "Hello!"}]
|
|
202
|
-
}'
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
#
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
```
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
-
|
|
347
|
-
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
1
|
+
# Server Mode Guide
|
|
2
|
+
|
|
3
|
+
Run llmjs2 as an OpenAI-compatible API server with intelligent routing and load balancing capabilities to integrate with existing OpenAI clients and applications.
|
|
4
|
+
|
|
5
|
+
## Quick Start Server
|
|
6
|
+
|
|
7
|
+
### Method 1: Simple JavaScript Server
|
|
8
|
+
|
|
9
|
+
Create a server file:
|
|
10
|
+
|
|
11
|
+
```javascript
|
|
12
|
+
// server.js
|
|
13
|
+
import { app } from 'llmjs2';
|
|
14
|
+
|
|
15
|
+
// Start the server
|
|
16
|
+
app.listen(3000, () => {
|
|
17
|
+
console.log('🚀 llmjs2 server running on http://localhost:3000');
|
|
18
|
+
});
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Run it:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
node server.js
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
## API Endpoints
|
|
30
|
+
|
|
31
|
+
### Chat Completions
|
|
32
|
+
|
|
33
|
+
**Endpoint:** `POST /v1/chat/completions`
|
|
34
|
+
|
|
35
|
+
**Content-Type:** `application/json`
|
|
36
|
+
|
|
37
|
+
**Request Format:**
|
|
38
|
+
|
|
39
|
+
```json
|
|
40
|
+
{
|
|
41
|
+
"model": "ollama/minimax-m2.5:cloud",
|
|
42
|
+
"messages": [
|
|
43
|
+
{
|
|
44
|
+
"role": "user",
|
|
45
|
+
"content": "Hello! How are you?"
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
"tools": [] // optional
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Response Format:**
|
|
53
|
+
|
|
54
|
+
The server returns an OpenAI-style chat completion response. A legacy `messages` array is also included for backward compatibility:
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
{
|
|
58
|
+
"id": "chatcmpl-123456",
|
|
59
|
+
"object": "chat.completion",
|
|
60
|
+
"created": 1640995200,
|
|
61
|
+
"model": "ollama/minimax-m2.5:cloud",
|
|
62
|
+
"choices": [
|
|
63
|
+
{
|
|
64
|
+
"index": 0,
|
|
65
|
+
"message": {
|
|
66
|
+
"role": "assistant",
|
|
67
|
+
"content": "Hello! I'm doing well, thank you for asking!"
|
|
68
|
+
},
|
|
69
|
+
"finish_reason": "stop"
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
"messages": [
|
|
73
|
+
{
|
|
74
|
+
"role": "user",
|
|
75
|
+
"content": "Hello! How are you?"
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"role": "assistant",
|
|
79
|
+
"content": "Hello! I'm doing well, thank you for asking!"
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Using with OpenAI Clients
|
|
86
|
+
|
|
87
|
+
### Direct HTTP Requests
|
|
88
|
+
|
|
89
|
+
You can consume the response like a normal OpenAI chat completion:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
import requests
|
|
93
|
+
|
|
94
|
+
response = requests.post(
|
|
95
|
+
"http://localhost:3000/v1/chat/completions",
|
|
96
|
+
json={
|
|
97
|
+
"messages": [{"role": "user", "content": "Hello!"}]
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
data = response.json()
|
|
102
|
+
assistant_message = data["choices"][0]["message"]
|
|
103
|
+
print(f"Model used: {data['model']}")
|
|
104
|
+
print(f"Assistant: {assistant_message['content']}")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Node.js with fetch
|
|
108
|
+
|
|
109
|
+
```javascript
|
|
110
|
+
const response = await fetch('http://localhost:3000/v1/chat/completions', {
|
|
111
|
+
method: 'POST',
|
|
112
|
+
headers: {
|
|
113
|
+
'Content-Type': 'application/json'
|
|
114
|
+
},
|
|
115
|
+
body: JSON.stringify({
|
|
116
|
+
messages: [{ role: 'user', content: 'Hello!' }]
|
|
117
|
+
})
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const data = await response.json();
|
|
121
|
+
const assistantMessage = data.choices[0].message;
|
|
122
|
+
console.log(`Model used: ${data.model}`);
|
|
123
|
+
console.log(`Assistant: ${assistantMessage.content}`);
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### cURL
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
curl -X POST http://localhost:3000/v1/chat/completions \
|
|
130
|
+
-H "Content-Type: application/json" \
|
|
131
|
+
-H "Authorization: Bearer your-api-key" \
|
|
132
|
+
-d '{
|
|
133
|
+
"model": "ollama/minimax-m2.5:cloud",
|
|
134
|
+
"messages": [
|
|
135
|
+
{
|
|
136
|
+
"role": "user",
|
|
137
|
+
"content": "Hello! How are you?"
|
|
138
|
+
}
|
|
139
|
+
]
|
|
140
|
+
}'
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Router Integration
|
|
144
|
+
|
|
145
|
+
Add intelligent routing and load balancing to your server:
|
|
146
|
+
|
|
147
|
+
### Basic Router Setup
|
|
148
|
+
|
|
149
|
+
```javascript
|
|
150
|
+
import { router, app } from 'llmjs2';
|
|
151
|
+
|
|
152
|
+
const costOptimizedModels = [
|
|
153
|
+
{
|
|
154
|
+
"model_name": "text-davinci-001",
|
|
155
|
+
"llm_params": {
|
|
156
|
+
"model": "ollama/text-davinci-003",
|
|
157
|
+
"api_key": process.env.OLLAMA_API_KEY
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"model_name": "text-davinci-002",
|
|
162
|
+
"llm_params": {
|
|
163
|
+
"model": "openrouter/text-davinci-003",
|
|
164
|
+
"api_key": process.env.OPEN_ROUTER_API_KEY
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"model_name": "text-davinci-003",
|
|
169
|
+
"llm_params": {
|
|
170
|
+
"model": "openai/gpt-3.5-turbo",
|
|
171
|
+
"api_key": process.env.OPENAI_API_KEY
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
];
|
|
175
|
+
|
|
176
|
+
// Create router with random strategy for load balancing
|
|
177
|
+
const route = router(costOptimizedModels, 'random');
|
|
178
|
+
|
|
179
|
+
// Apply router to server
|
|
180
|
+
app.use(route);
|
|
181
|
+
|
|
182
|
+
// Start the server
|
|
183
|
+
app.listen(3000, () => {
|
|
184
|
+
console.log('🚀 llmjs2 server with routing running on http://localhost:3000');
|
|
185
|
+
});
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Router Strategies
|
|
189
|
+
|
|
190
|
+
- **`'random'`**: Randomly selects from available models
|
|
191
|
+
- **`'sequential'`**: Cycles through models in order
|
|
192
|
+
- **`'default'`** or none: Load balances across models with same `model_name`
|
|
193
|
+
|
|
194
|
+
### API Usage with Routing
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
# Automatic routing (uses router strategy)
|
|
198
|
+
curl -X POST http://localhost:3000/v1/chat/completions \
|
|
199
|
+
-H "Content-Type: application/json" \
|
|
200
|
+
-d '{
|
|
201
|
+
"messages": [{"role": "user", "content": "Hello!"}]
|
|
202
|
+
}'
|
|
203
|
+
|
|
204
|
+
# Direct model routing (bypasses router)
|
|
205
|
+
curl -X POST http://localhost:3000/v1/chat/completions \
|
|
206
|
+
-H "Content-Type: application/json" \
|
|
207
|
+
-d '{
|
|
208
|
+
"model": "openai/gpt-3.5-turbo",
|
|
209
|
+
"messages": [{"role": "user", "content": "Hello!"}]
|
|
210
|
+
}'
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Advanced Routing Examples
|
|
214
|
+
|
|
215
|
+
**Multi-Provider Fallback:**
|
|
216
|
+
|
|
217
|
+
```javascript
|
|
218
|
+
const fallbackModels = [
|
|
219
|
+
{ "model_name": "gpt-4", "llm_params": { "model": "openai/gpt-4", "api_key": process.env.OPENAI_API_KEY } },
|
|
220
|
+
{ "model_name": "gpt-4", "llm_params": { "model": "ollama/gpt-4", "api_key": process.env.OLLAMA_API_KEY } },
|
|
221
|
+
{ "model_name": "gpt-4", "llm_params": { "model": "openrouter/gpt-4", "api_key": process.env.OPEN_ROUTER_API_KEY } }
|
|
222
|
+
];
|
|
223
|
+
|
|
224
|
+
const route = router(fallbackModels, 'random');
|
|
225
|
+
app.use(route);
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
**Cost Optimization:**
|
|
229
|
+
|
|
230
|
+
```javascript
|
|
231
|
+
const costModels = [
|
|
232
|
+
{ "model_name": "completion", "llm_params": { "model": "ollama/llama2", "api_key": process.env.OLLAMA_API_KEY } },
|
|
233
|
+
{ "model_name": "completion", "llm_params": { "model": "openrouter/free", "api_key": process.env.OPEN_ROUTER_API_KEY } },
|
|
234
|
+
{ "model_name": "completion", "llm_params": { "model": "openai/gpt-3.5-turbo", "api_key": process.env.OPENAI_API_KEY } }
|
|
235
|
+
];
|
|
236
|
+
|
|
237
|
+
const route = router(costModels, 'sequential'); // Try cheaper models first
|
|
238
|
+
app.use(route);
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## Function Calling (Tools) Support
|
|
242
|
+
|
|
243
|
+
The server supports OpenAI-compatible function calling:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
curl -X POST http://localhost:3000/v1/chat/completions \
|
|
247
|
+
-H "Content-Type: application/json" \
|
|
248
|
+
-H "Authorization: Bearer your-api-key" \
|
|
249
|
+
-d '{
|
|
250
|
+
"model": "openrouter/openrouter/free",
|
|
251
|
+
"messages": [
|
|
252
|
+
{
|
|
253
|
+
"role": "user",
|
|
254
|
+
"content": "What is the weather like in Paris?"
|
|
255
|
+
}
|
|
256
|
+
],
|
|
257
|
+
"tools": [
|
|
258
|
+
{
|
|
259
|
+
"type": "function",
|
|
260
|
+
"function": {
|
|
261
|
+
"name": "get_weather",
|
|
262
|
+
"description": "Get the current weather in a given location",
|
|
263
|
+
"parameters": {
|
|
264
|
+
"type": "object",
|
|
265
|
+
"properties": {
|
|
266
|
+
"location": {
|
|
267
|
+
"type": "string",
|
|
268
|
+
"description": "The city and state, e.g. San Francisco, CA"
|
|
269
|
+
}
|
|
270
|
+
},
|
|
271
|
+
"required": ["location"]
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
]
|
|
276
|
+
}'
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
#### Error Handling
|
|
280
|
+
|
|
281
|
+
The server returns proper HTTP status codes and JSON error responses:
|
|
282
|
+
|
|
283
|
+
```json
|
|
284
|
+
{
|
|
285
|
+
"error": {
|
|
286
|
+
"message": "model is required",
|
|
287
|
+
"type": "invalid_request_error"
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Common status codes:
|
|
293
|
+
|
|
294
|
+
- `400` - Bad Request (missing parameters)
|
|
295
|
+
- `404` - Not Found (invalid endpoint)
|
|
296
|
+
- `500` - Internal Server Error (API failures)
|
|
297
|
+
|
|
298
|
+
### Environment Variables for Production
|
|
299
|
+
|
|
300
|
+
```bash
|
|
301
|
+
# Server configuration
|
|
302
|
+
PORT=3000
|
|
303
|
+
HOST=0.0.0.0
|
|
304
|
+
|
|
305
|
+
# API Keys
|
|
306
|
+
OLLAMA_API_KEY=your_production_key
|
|
307
|
+
OPEN_ROUTER_API_KEY=your_production_key
|
|
308
|
+
|
|
309
|
+
# Default models
|
|
310
|
+
OLLAMA_DEFAULT_MODEL=minimax-m2.5:cloud
|
|
311
|
+
OPEN_ROUTER_DEFAULT_MODEL=openrouter/free
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
## Monitoring and Logging
|
|
315
|
+
|
|
316
|
+
The server includes comprehensive logging:
|
|
317
|
+
|
|
318
|
+
```
|
|
319
|
+
[2024-01-15T10:30:45.123Z] POST /v1/chat/completions
|
|
320
|
+
Headers: {"content-type":"application/json",...}
|
|
321
|
+
Body parsing completed successfully
|
|
322
|
+
Starting completion with model: ollama/minimax-m2.5:cloud
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### API Request Issues
|
|
326
|
+
|
|
327
|
+
**400 Bad Request:**
|
|
328
|
+
|
|
329
|
+
- Check that `model` and `messages` are provided
|
|
330
|
+
- Ensure messages have `role` and `content` properties
|
|
331
|
+
|
|
332
|
+
**500 Internal Server Error:**
|
|
333
|
+
|
|
334
|
+
- Check API keys are valid
|
|
335
|
+
- Verify internet connection
|
|
336
|
+
- Check provider API status
|
|
337
|
+
|
|
338
|
+
### CORS Issues
|
|
339
|
+
|
|
340
|
+
If you're getting CORS errors in the browser:
|
|
341
|
+
|
|
342
|
+
```javascript
|
|
343
|
+
// The server includes CORS headers by default
|
|
344
|
+
// If you need custom CORS, modify the server code
|
|
345
|
+
res.writeHead(statusCode, {
|
|
346
|
+
'Content-Type': 'application/json',
|
|
347
|
+
'Access-Control-Allow-Origin': '*', // Change this for production
|
|
348
|
+
// ... other headers
|
|
349
|
+
});
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
## Next Steps
|
|
353
|
+
|
|
354
|
+
- **[CLI Guide](CLI.md)** - Use the command-line interface
|
|
355
|
+
- **[Basic Usage](BASIC_USAGE.md)** - Learn different API patterns
|
|
356
|
+
- **[Technical Specification](TECHNICAL_SPECIFICATION.md)** - Detailed technical information
|
|
357
|
+
|
|
358
|
+
The server mode makes llmjs2 compatible with any OpenAI-compatible client or application!
|