genai-lite 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -1764
- package/dist/config/llm-presets.json +254 -0
- package/dist/llm/config.js +207 -3
- package/dist/types/image.d.ts +1 -1
- package/dist/types/image.js +1 -1
- package/package.json +1 -1
- package/src/config/llm-presets.json +254 -0
package/README.md
CHANGED
|
@@ -21,6 +21,14 @@ A lightweight, portable Node.js/TypeScript library providing a unified interface
|
|
|
21
21
|
npm install genai-lite
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
+
Set API keys as environment variables:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
export OPENAI_API_KEY=sk-...
|
|
28
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
29
|
+
export GEMINI_API_KEY=AIza...
|
|
30
|
+
```
|
|
31
|
+
|
|
24
32
|
## Quick Start
|
|
25
33
|
|
|
26
34
|
### Cloud Providers (OpenAI, Anthropic, Gemini, Mistral)
|
|
@@ -28,7 +36,6 @@ npm install genai-lite
|
|
|
28
36
|
```typescript
|
|
29
37
|
import { LLMService, fromEnvironment } from 'genai-lite';
|
|
30
38
|
|
|
31
|
-
// Create service with environment variable API key provider
|
|
32
39
|
const llmService = new LLMService(fromEnvironment);
|
|
33
40
|
|
|
34
41
|
const response = await llmService.sendMessage({
|
|
@@ -42,8 +49,6 @@ const response = await llmService.sendMessage({
|
|
|
42
49
|
|
|
43
50
|
if (response.object === 'chat.completion') {
|
|
44
51
|
console.log(response.choices[0].message.content);
|
|
45
|
-
} else {
|
|
46
|
-
console.error('Error:', response.error.message);
|
|
47
52
|
}
|
|
48
53
|
```
|
|
49
54
|
|
|
@@ -69,18 +74,11 @@ if (response.object === 'chat.completion') {
|
|
|
69
74
|
}
|
|
70
75
|
```
|
|
71
76
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
## Image Generation
|
|
75
|
-
|
|
76
|
-
genai-lite provides first-class support for AI image generation alongside its LLM capabilities. Generate images using cloud providers (OpenAI) or run diffusion models locally via genai-electron.
|
|
77
|
-
|
|
78
|
-
### Quick Start - OpenAI Images
|
|
77
|
+
### Image Generation
|
|
79
78
|
|
|
80
79
|
```typescript
|
|
81
80
|
import { ImageService, fromEnvironment } from 'genai-lite';
|
|
82
81
|
|
|
83
|
-
// Create service with API key provider
|
|
84
82
|
const imageService = new ImageService(fromEnvironment);
|
|
85
83
|
|
|
86
84
|
const result = await imageService.generateImage({
|
|
@@ -95,375 +93,58 @@ const result = await imageService.generateImage({
|
|
|
95
93
|
});
|
|
96
94
|
|
|
97
95
|
if (result.object === 'image.result') {
|
|
98
|
-
|
|
99
|
-
const fs = require('fs');
|
|
100
|
-
fs.writeFileSync('output.png', result.data[0].data);
|
|
101
|
-
console.log('Image generated successfully!');
|
|
102
|
-
} else {
|
|
103
|
-
console.error('Error:', result.error.message);
|
|
104
|
-
}
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
### Quick Start - Local Diffusion
|
|
108
|
-
|
|
109
|
-
```typescript
|
|
110
|
-
import { ImageService } from 'genai-lite';
|
|
111
|
-
|
|
112
|
-
// Start genai-electron diffusion server first on port 8081
|
|
113
|
-
const imageService = new ImageService(async () => 'not-needed');
|
|
114
|
-
|
|
115
|
-
const result = await imageService.generateImage({
|
|
116
|
-
providerId: 'genai-electron-images',
|
|
117
|
-
modelId: 'stable-diffusion',
|
|
118
|
-
prompt: 'A majestic dragon soaring through clouds, highly detailed',
|
|
119
|
-
settings: {
|
|
120
|
-
width: 1024,
|
|
121
|
-
height: 1024,
|
|
122
|
-
diffusion: {
|
|
123
|
-
negativePrompt: 'blurry, low quality, distorted',
|
|
124
|
-
steps: 30,
|
|
125
|
-
cfgScale: 7.5,
|
|
126
|
-
sampler: 'dpm++2m',
|
|
127
|
-
seed: 42 // Optional: for reproducible results
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
});
|
|
131
|
-
|
|
132
|
-
if (result.object === 'image.result') {
|
|
133
|
-
console.log('Generated image with seed:', result.data[0].seed);
|
|
134
|
-
// Save the image
|
|
135
|
-
require('fs').writeFileSync('dragon.png', result.data[0].data);
|
|
136
|
-
}
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
### Supported Image Providers
|
|
140
|
-
|
|
141
|
-
#### OpenAI Images
|
|
142
|
-
|
|
143
|
-
**Models:**
|
|
144
|
-
- `gpt-image-1` - Latest OpenAI image model with advanced features (32K character prompts)
|
|
145
|
-
- `gpt-image-1-mini` - Fast and efficient, default model (32K character prompts)
|
|
146
|
-
- `dall-e-3` - High-quality image generation (4K character prompts)
|
|
147
|
-
- `dall-e-2` - Cost-effective generation (1K character prompts)
|
|
148
|
-
|
|
149
|
-
**Configuration:**
|
|
150
|
-
```bash
|
|
151
|
-
export OPENAI_API_KEY=sk-...
|
|
152
|
-
# Optional: override base URL
|
|
153
|
-
export OPENAI_API_BASE_URL=https://api.openai.com/v1
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
**Capabilities:**
|
|
157
|
-
- Multiple images per request (n parameter, except dall-e-3 which only supports n=1)
|
|
158
|
-
- Quality settings: `auto`, `high`, `medium`, `low`, `hd`, `standard`
|
|
159
|
-
- Style options: `vivid` (hyper-real), `natural` (photographic)
|
|
160
|
-
- Multiple formats: PNG, JPEG, WebP
|
|
161
|
-
- Hosted URLs or base64 response
|
|
162
|
-
|
|
163
|
-
#### Local Diffusion (genai-electron)
|
|
164
|
-
|
|
165
|
-
**Models:**
|
|
166
|
-
- `stable-diffusion` - Generic model ID for whatever model is loaded in genai-electron
|
|
167
|
-
|
|
168
|
-
**Configuration:**
|
|
169
|
-
```bash
|
|
170
|
-
# Optional: override base URL (default: http://localhost:8081)
|
|
171
|
-
export GENAI_ELECTRON_IMAGE_BASE_URL=http://localhost:8081
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
**Capabilities:**
|
|
175
|
-
- Negative prompts for better control
|
|
176
|
-
- Multiple samplers: `euler_a`, `euler`, `heun`, `dpm2`, `dpm++2s_a`, `dpm++2m`, `dpm++2mv2`, `lcm`
|
|
177
|
-
- Adjustable steps (1-150), CFG scale (1.0-30.0)
|
|
178
|
-
- Custom seeds for reproducibility
|
|
179
|
-
- Real-time progress callbacks
|
|
180
|
-
- Batch generation support
|
|
181
|
-
|
|
182
|
-
### Advanced Usage
|
|
183
|
-
|
|
184
|
-
#### Using Image Presets
|
|
185
|
-
|
|
186
|
-
genai-lite includes 13 built-in presets for common use cases:
|
|
187
|
-
|
|
188
|
-
```typescript
|
|
189
|
-
const imageService = new ImageService(fromEnvironment);
|
|
190
|
-
|
|
191
|
-
// List available presets
|
|
192
|
-
const presets = imageService.getPresets();
|
|
193
|
-
console.log(presets.map(p => p.id));
|
|
194
|
-
|
|
195
|
-
// Use a preset
|
|
196
|
-
const result = await imageService.generateImage({
|
|
197
|
-
presetId: 'openai-dalle-3-hd',
|
|
198
|
-
prompt: 'A futuristic city at night'
|
|
199
|
-
});
|
|
200
|
-
|
|
201
|
-
// Override preset settings
|
|
202
|
-
const result = await imageService.generateImage({
|
|
203
|
-
presetId: 'genai-electron-sdxl-quality',
|
|
204
|
-
prompt: 'A portrait of a wise old wizard',
|
|
205
|
-
settings: {
|
|
206
|
-
width: 768, // Override preset's 1024x1024
|
|
207
|
-
height: 1024,
|
|
208
|
-
diffusion: {
|
|
209
|
-
steps: 40 // Override preset's 30 steps
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
});
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
**Available Presets:**
|
|
216
|
-
- **OpenAI:** `openai-gpt-image-1-mini-default`, `openai-gpt-image-1-quality`, `openai-dalle-3-hd`, `openai-dalle-3-natural`, `openai-dalle-2-default`, `openai-dalle-2-fast`
|
|
217
|
-
- **Local Diffusion:** `genai-electron-sdxl-quality`, `genai-electron-sdxl-balanced`, `genai-electron-sdxl-fast`, `genai-electron-sdxl-portrait`, `genai-electron-sdxl-turbo`, `genai-electron-sdxl-lightning`, `genai-electron-sdxl-lightning-medium`
|
|
218
|
-
|
|
219
|
-
#### Progress Callbacks (Local Diffusion)
|
|
220
|
-
|
|
221
|
-
Monitor generation progress in real-time with local diffusion models:
|
|
222
|
-
|
|
223
|
-
```typescript
|
|
224
|
-
const result = await imageService.generateImage({
|
|
225
|
-
providerId: 'genai-electron-images',
|
|
226
|
-
modelId: 'stable-diffusion',
|
|
227
|
-
prompt: 'A detailed landscape painting',
|
|
228
|
-
settings: {
|
|
229
|
-
width: 1024,
|
|
230
|
-
height: 1024,
|
|
231
|
-
diffusion: {
|
|
232
|
-
steps: 30,
|
|
233
|
-
onProgress: (progress) => {
|
|
234
|
-
console.log(`Stage: ${progress.stage}`);
|
|
235
|
-
console.log(`Step ${progress.currentStep}/${progress.totalSteps}`);
|
|
236
|
-
console.log(`Progress: ${progress.percentage?.toFixed(1)}%`);
|
|
237
|
-
|
|
238
|
-
// Update your UI progress bar
|
|
239
|
-
// updateProgressBar(progress.percentage);
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
});
|
|
244
|
-
```
|
|
245
|
-
|
|
246
|
-
**Progress stages:**
|
|
247
|
-
- `loading` - Model is being loaded
|
|
248
|
-
- `diffusion` - Actively generating the image
|
|
249
|
-
- `decoding` - Converting latents to final image
|
|
250
|
-
|
|
251
|
-
#### Generating Multiple Images
|
|
252
|
-
|
|
253
|
-
Request multiple variations in a single call:
|
|
254
|
-
|
|
255
|
-
```typescript
|
|
256
|
-
// OpenAI - multiple images
|
|
257
|
-
const result = await imageService.generateImage({
|
|
258
|
-
providerId: 'openai-images',
|
|
259
|
-
modelId: 'gpt-image-1-mini',
|
|
260
|
-
prompt: 'A cute robot assistant',
|
|
261
|
-
count: 4, // Generate 4 variations
|
|
262
|
-
settings: {
|
|
263
|
-
width: 512,
|
|
264
|
-
height: 512
|
|
265
|
-
}
|
|
266
|
-
});
|
|
267
|
-
|
|
268
|
-
// genai-electron - batch generation
|
|
269
|
-
const result = await imageService.generateImage({
|
|
270
|
-
providerId: 'genai-electron-images',
|
|
271
|
-
modelId: 'stable-diffusion',
|
|
272
|
-
prompt: 'Fantasy character concept art',
|
|
273
|
-
count: 3,
|
|
274
|
-
settings: {
|
|
275
|
-
diffusion: {
|
|
276
|
-
steps: 20,
|
|
277
|
-
// Each image gets a different seed automatically
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
});
|
|
281
|
-
|
|
282
|
-
// Process all generated images
|
|
283
|
-
result.data.forEach((image, index) => {
|
|
284
|
-
require('fs').writeFileSync(`output-${index}.png`, image.data);
|
|
285
|
-
console.log(`Image ${index} seed:`, image.seed);
|
|
286
|
-
});
|
|
287
|
-
```
|
|
288
|
-
|
|
289
|
-
#### Provider-Specific Settings
|
|
290
|
-
|
|
291
|
-
**OpenAI-specific settings:**
|
|
292
|
-
|
|
293
|
-
```typescript
|
|
294
|
-
const result = await imageService.generateImage({
|
|
295
|
-
providerId: 'openai-images',
|
|
296
|
-
modelId: 'gpt-image-1',
|
|
297
|
-
prompt: 'A professional product photo',
|
|
298
|
-
settings: {
|
|
299
|
-
width: 1024,
|
|
300
|
-
height: 1024,
|
|
301
|
-
quality: 'high',
|
|
302
|
-
style: 'natural',
|
|
303
|
-
openai: {
|
|
304
|
-
outputFormat: 'png', // 'png', 'jpeg', 'webp'
|
|
305
|
-
background: 'transparent', // 'auto', 'transparent', 'white', 'black'
|
|
306
|
-
moderation: 'auto', // 'auto', 'high', 'low'
|
|
307
|
-
compression: 0.8 // 0.0-1.0 for JPEG/WebP
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
});
|
|
311
|
-
```
|
|
312
|
-
|
|
313
|
-
**Diffusion-specific settings:**
|
|
314
|
-
|
|
315
|
-
```typescript
|
|
316
|
-
const result = await imageService.generateImage({
|
|
317
|
-
providerId: 'genai-electron-images',
|
|
318
|
-
modelId: 'stable-diffusion',
|
|
319
|
-
prompt: 'A mystical forest with glowing mushrooms',
|
|
320
|
-
settings: {
|
|
321
|
-
width: 1024,
|
|
322
|
-
height: 1024,
|
|
323
|
-
diffusion: {
|
|
324
|
-
negativePrompt: 'ugly, blurry, low quality, oversaturated',
|
|
325
|
-
steps: 30, // More steps = higher quality (1-150)
|
|
326
|
-
cfgScale: 7.5, // Prompt adherence (1.0-30.0)
|
|
327
|
-
sampler: 'dpm++2m', // Sampling algorithm
|
|
328
|
-
seed: 12345 // For reproducibility
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
});
|
|
332
|
-
```
|
|
333
|
-
|
|
334
|
-
**Available samplers:**
|
|
335
|
-
- `euler_a` - Fast, good for most use cases
|
|
336
|
-
- `euler` - Similar to euler_a but deterministic
|
|
337
|
-
- `dpm++2m` - High quality, recommended for final images
|
|
338
|
-
- `dpm++2s_a` - Good balance of speed and quality
|
|
339
|
-
- `heun` - High quality but slower
|
|
340
|
-
- `lcm` - Extremely fast (for LCM models)
|
|
341
|
-
|
|
342
|
-
#### Error Handling
|
|
343
|
-
|
|
344
|
-
```typescript
|
|
345
|
-
const result = await imageService.generateImage({
|
|
346
|
-
providerId: 'openai-images',
|
|
347
|
-
modelId: 'gpt-image-1-mini',
|
|
348
|
-
prompt: 'A beautiful sunset'
|
|
349
|
-
});
|
|
350
|
-
|
|
351
|
-
if (result.object === 'error') {
|
|
352
|
-
switch (result.error.type) {
|
|
353
|
-
case 'authentication_error':
|
|
354
|
-
console.error('Invalid API key');
|
|
355
|
-
break;
|
|
356
|
-
case 'rate_limit_error':
|
|
357
|
-
console.error('Rate limit exceeded');
|
|
358
|
-
break;
|
|
359
|
-
case 'validation_error':
|
|
360
|
-
console.error('Invalid request:', result.error.message);
|
|
361
|
-
break;
|
|
362
|
-
case 'network_error':
|
|
363
|
-
console.error('Server not reachable:', result.error.message);
|
|
364
|
-
break;
|
|
365
|
-
default:
|
|
366
|
-
console.error('Error:', result.error.message);
|
|
367
|
-
}
|
|
96
|
+
require('fs').writeFileSync('output.png', result.data[0].data);
|
|
368
97
|
}
|
|
369
98
|
```
|
|
370
99
|
|
|
371
|
-
|
|
100
|
+
## Documentation
|
|
372
101
|
|
|
373
|
-
|
|
102
|
+
Comprehensive documentation is available in the **[`genai-lite-docs`](./genai-lite-docs/index.md)** folder.
|
|
374
103
|
|
|
375
|
-
|
|
376
|
-
|
|
104
|
+
### Getting Started
|
|
105
|
+
- **[Documentation Hub](./genai-lite-docs/index.md)** - Navigation and overview
|
|
106
|
+
- **[Core Concepts](./genai-lite-docs/core-concepts.md)** - API keys, presets, settings, errors
|
|
377
107
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
description: 'Optimized for portrait photography',
|
|
383
|
-
providerId: 'genai-electron-images',
|
|
384
|
-
modelId: 'stable-diffusion',
|
|
385
|
-
settings: {
|
|
386
|
-
width: 768,
|
|
387
|
-
height: 1024,
|
|
388
|
-
diffusion: {
|
|
389
|
-
steps: 35,
|
|
390
|
-
cfgScale: 8.0,
|
|
391
|
-
sampler: 'dpm++2m',
|
|
392
|
-
negativePrompt: 'deformed, ugly, bad anatomy'
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
];
|
|
108
|
+
### API Reference
|
|
109
|
+
- **[LLM Service](./genai-lite-docs/llm-service.md)** - Text generation and chat
|
|
110
|
+
- **[Image Service](./genai-lite-docs/image-service.md)** - Image generation (cloud and local)
|
|
111
|
+
- **[llama.cpp Integration](./genai-lite-docs/llamacpp-integration.md)** - Local LLM inference
|
|
397
112
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
});
|
|
402
|
-
```
|
|
113
|
+
### Utilities & Advanced
|
|
114
|
+
- **[Prompting Utilities](./genai-lite-docs/prompting-utilities.md)** - Template engine, token counting, content parsing
|
|
115
|
+
- **[TypeScript Reference](./genai-lite-docs/typescript-reference.md)** - Type definitions
|
|
403
116
|
|
|
404
|
-
###
|
|
117
|
+
### Provider Reference
|
|
118
|
+
- **[Providers & Models](./genai-lite-docs/providers-and-models.md)** - Supported providers and models
|
|
405
119
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
ImageGenerationResponse,
|
|
411
|
-
ImageFailureResponse,
|
|
412
|
-
ImageGenerationSettings,
|
|
413
|
-
DiffusionSettings,
|
|
414
|
-
OpenAISpecificSettings,
|
|
415
|
-
ImagePreset,
|
|
416
|
-
ImageProviderInfo,
|
|
417
|
-
ImageModelInfo,
|
|
418
|
-
GeneratedImage,
|
|
419
|
-
ImageProgressCallback
|
|
420
|
-
} from 'genai-lite';
|
|
421
|
-
```
|
|
120
|
+
### Examples & Help
|
|
121
|
+
- **[Example: Chat Demo](./genai-lite-docs/example-chat-demo.md)** - Reference implementation for chat applications
|
|
122
|
+
- **[Example: Image Demo](./genai-lite-docs/example-image-demo.md)** - Reference implementation for image generation applications
|
|
123
|
+
- **[Troubleshooting](./genai-lite-docs/troubleshooting.md)** - Common issues and solutions
|
|
422
124
|
|
|
423
|
-
##
|
|
125
|
+
## Supported Providers
|
|
424
126
|
|
|
425
|
-
### LLM
|
|
426
|
-
|
|
427
|
-
-
|
|
428
|
-
-
|
|
429
|
-
-
|
|
430
|
-
-
|
|
127
|
+
### LLM Providers
|
|
128
|
+
- **OpenAI** - GPT-5 (5.2, 5.1, mini, nano), GPT-4.1, o4-mini
|
|
129
|
+
- **Anthropic** - Claude 4.5 (Opus, Sonnet, Haiku), Claude 4, Claude 3.7, Claude 3.5
|
|
130
|
+
- **Google Gemini** - Gemini 3 (Pro, Flash preview), Gemini 2.5, Gemini 2.0
|
|
131
|
+
- **Mistral** - Codestral, Devstral
|
|
132
|
+
- **llama.cpp** - Run any GGUF model locally (no API keys required)
|
|
431
133
|
|
|
432
|
-
### Image
|
|
433
|
-
|
|
434
|
-
-
|
|
435
|
-
- Size presets and batch generation
|
|
436
|
-
- Full-screen image lightbox with keyboard navigation
|
|
437
|
-
- Progress monitoring for diffusion models
|
|
438
|
-
- Comprehensive settings for all DALL-E models
|
|
134
|
+
### Image Providers
|
|
135
|
+
- **OpenAI Images** - gpt-image-1, dall-e-3, dall-e-2
|
|
136
|
+
- **genai-electron** - Local Stable Diffusion models
|
|
439
137
|
|
|
440
|
-
|
|
138
|
+
See **[Providers & Models](./genai-lite-docs/providers-and-models.md)** for complete model listings and capabilities.
|
|
441
139
|
|
|
442
140
|
## API Key Management
|
|
443
141
|
|
|
444
|
-
genai-lite uses a flexible API key provider pattern.
|
|
445
|
-
|
|
446
|
-
### Environment Variables (Built-in)
|
|
447
|
-
|
|
448
|
-
```typescript
|
|
449
|
-
import { fromEnvironment } from 'genai-lite';
|
|
450
|
-
|
|
451
|
-
// Expects environment variables like:
|
|
452
|
-
// OPENAI_API_KEY=sk-...
|
|
453
|
-
// ANTHROPIC_API_KEY=sk-ant-...
|
|
454
|
-
// GEMINI_API_KEY=...
|
|
455
|
-
|
|
456
|
-
const llmService = new LLMService(fromEnvironment);
|
|
457
|
-
```
|
|
458
|
-
|
|
459
|
-
### Custom API Key Provider
|
|
142
|
+
genai-lite uses a flexible API key provider pattern. Use the built-in `fromEnvironment` provider or create your own:
|
|
460
143
|
|
|
461
144
|
```typescript
|
|
462
145
|
import { ApiKeyProvider, LLMService } from 'genai-lite';
|
|
463
146
|
|
|
464
|
-
// Create your own provider
|
|
465
147
|
const myKeyProvider: ApiKeyProvider = async (providerId: string) => {
|
|
466
|
-
// Fetch from your secure storage, vault, etc.
|
|
467
148
|
const key = await mySecureStorage.getKey(providerId);
|
|
468
149
|
return key || null;
|
|
469
150
|
};
|
|
@@ -471,1424 +152,30 @@ const myKeyProvider: ApiKeyProvider = async (providerId: string) => {
|
|
|
471
152
|
const llmService = new LLMService(myKeyProvider);
|
|
472
153
|
```
|
|
473
154
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
**Note:** Model IDs include version dates for precise model selection. Always use the exact model ID as shown below.
|
|
155
|
+
See **[Core Concepts](./genai-lite-docs/core-concepts.md#api-key-management)** for detailed examples including Electron integration.
|
|
477
156
|
|
|
478
|
-
|
|
479
|
-
- **Claude 4** (Latest generation):
|
|
480
|
-
- `claude-sonnet-4-20250514` - Balanced performance model
|
|
481
|
-
- `claude-opus-4-20250514` - Most powerful for complex tasks
|
|
482
|
-
- **Claude 3.7**: `claude-3-7-sonnet-20250219` - Advanced reasoning
|
|
483
|
-
- **Claude 3.5**:
|
|
484
|
-
- `claude-3-5-sonnet-20241022` - Best balance of speed and intelligence
|
|
485
|
-
- `claude-3-5-haiku-20241022` - Fast and cost-effective
|
|
486
|
-
|
|
487
|
-
### Google Gemini
|
|
488
|
-
- **Gemini 2.5** (Latest generation):
|
|
489
|
-
- `gemini-2.5-pro` - Most advanced multimodal capabilities
|
|
490
|
-
- `gemini-2.5-flash` - Fast with large context window
|
|
491
|
-
- `gemini-2.5-flash-lite-preview-06-17` - Most cost-effective
|
|
492
|
-
- **Gemini 2.0**:
|
|
493
|
-
- `gemini-2.0-flash` - High performance multimodal
|
|
494
|
-
- `gemini-2.0-flash-lite` - Lightweight version
|
|
495
|
-
|
|
496
|
-
### OpenAI
|
|
497
|
-
- **o4 series**: `o4-mini` - Advanced reasoning model
|
|
498
|
-
- **GPT-4.1 series**:
|
|
499
|
-
- `gpt-4.1` - Latest GPT-4 with enhanced capabilities
|
|
500
|
-
- `gpt-4.1-mini` - Cost-effective for most tasks
|
|
501
|
-
- `gpt-4.1-nano` - Ultra-efficient version
|
|
502
|
-
|
|
503
|
-
### Mistral
|
|
504
|
-
> **Note:** The official Mistral adapter is under development. Requests made to Mistral models will currently be handled by a mock adapter for API compatibility testing.
|
|
505
|
-
|
|
506
|
-
- `codestral-2501` - Specialized for code generation
|
|
507
|
-
- `devstral-small-2505` - Compact development-focused model
|
|
508
|
-
|
|
509
|
-
### llama.cpp (Local Models)
|
|
510
|
-
|
|
511
|
-
Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Use the generic `'llamacpp'` model ID—the actual model is determined by what you loaded in the llama.cpp server.
|
|
512
|
-
|
|
513
|
-
**Automatic Capability Detection:** genai-lite automatically detects capabilities (reasoning support, context windows, token limits) for known open-weights models (Qwen3, etc.) by matching the GGUF filename from the server. No configuration needed.
|
|
157
|
+
## Example Applications
|
|
514
158
|
|
|
515
|
-
|
|
516
|
-
- `llamacpp` - Generic ID for whatever model the llama.cpp server has loaded
|
|
159
|
+
The library includes two complete demo applications showcasing all features:
|
|
517
160
|
|
|
518
|
-
**
|
|
161
|
+
- **[chat-demo](examples/chat-demo)** - Interactive chat application with all LLM providers, template rendering, and advanced features
|
|
162
|
+
- **[image-gen-demo](examples/image-gen-demo)** - Interactive image generation UI with OpenAI and local diffusion support
|
|
519
163
|
|
|
520
|
-
|
|
521
|
-
```bash
|
|
522
|
-
llama-server -m /path/to/model.gguf --port 8080
|
|
523
|
-
```
|
|
164
|
+
Both demos are production-ready React + Express applications that serve as reference implementations and testing environments. See **[Example: Chat Demo](./genai-lite-docs/example-chat-demo.md)** and **[Example: Image Demo](./genai-lite-docs/example-image-demo.md)** for detailed documentation.
|
|
524
165
|
|
|
525
|
-
|
|
526
|
-
```typescript
|
|
527
|
-
import { LLMService } from 'genai-lite';
|
|
166
|
+
## Contributing
|
|
528
167
|
|
|
529
|
-
|
|
530
|
-
const service = new LLMService(async () => 'not-needed');
|
|
168
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
|
531
169
|
|
|
532
|
-
|
|
533
|
-
providerId: 'llamacpp',
|
|
534
|
-
modelId: 'llamacpp', // Generic ID for loaded model
|
|
535
|
-
messages: [{ role: 'user', content: 'Hello!' }]
|
|
536
|
-
});
|
|
537
|
-
```
|
|
170
|
+
### Development
|
|
538
171
|
|
|
539
|
-
3. Configure server URL via environment variable:
|
|
540
172
|
```bash
|
|
541
|
-
export LLAMACPP_API_BASE_URL=http://localhost:8080
|
|
542
|
-
```
|
|
543
|
-
|
|
544
|
-
**Advanced features** - Access non-LLM endpoints:
|
|
545
|
-
|
|
546
|
-
```typescript
|
|
547
|
-
import { LlamaCppServerClient } from 'genai-lite';
|
|
548
|
-
|
|
549
|
-
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
550
|
-
|
|
551
|
-
// Check server health
|
|
552
|
-
const health = await client.getHealth();
|
|
553
|
-
|
|
554
|
-
// Tokenize text
|
|
555
|
-
const { tokens } = await client.tokenize('Hello world');
|
|
556
|
-
|
|
557
|
-
// Generate embeddings
|
|
558
|
-
const { embedding } = await client.createEmbedding('Some text');
|
|
559
|
-
|
|
560
|
-
// Code completion
|
|
561
|
-
const result = await client.infill('def hello():\n', '\nprint("done")');
|
|
562
|
-
```
|
|
563
|
-
|
|
564
|
-
See the [llama.cpp Integration](#llamacpp-integration) section for details.
|
|
565
|
-
|
|
566
|
-
### Models with Reasoning Support
|
|
567
|
-
|
|
568
|
-
Some models include advanced reasoning/thinking capabilities that enhance their problem-solving abilities:
|
|
569
|
-
|
|
570
|
-
- **Anthropic**: Claude Sonnet 4, Claude Opus 4, Claude 3.7 Sonnet
|
|
571
|
-
- **Google Gemini**: Gemini 2.5 Pro (always on), Gemini 2.5 Flash, Gemini 2.5 Flash-Lite Preview
|
|
572
|
-
- **OpenAI**: o4-mini (always on)
|
|
573
|
-
- **llama.cpp**: Qwen3, DeepSeek-R1, GPT-OSS (requires `--reasoning-format deepseek` server flag)
|
|
574
|
-
|
|
575
|
-
See the [Reasoning Mode](#reasoning-mode) section for usage details.
|
|
576
|
-
|
|
577
|
-
## Advanced Usage
|
|
578
|
-
|
|
579
|
-
### Custom Settings
|
|
580
|
-
|
|
581
|
-
```typescript
|
|
582
|
-
const response = await llmService.sendMessage({
|
|
583
|
-
providerId: 'anthropic',
|
|
584
|
-
modelId: 'claude-3-5-haiku-20241022',
|
|
585
|
-
messages: [{ role: 'user', content: 'Write a haiku' }],
|
|
586
|
-
settings: {
|
|
587
|
-
temperature: 0.7,
|
|
588
|
-
maxTokens: 100,
|
|
589
|
-
topP: 0.9,
|
|
590
|
-
stopSequences: ['\n\n']
|
|
591
|
-
}
|
|
592
|
-
});
|
|
593
|
-
```
|
|
594
|
-
|
|
595
|
-
### Reasoning Mode
|
|
596
|
-
|
|
597
|
-
Enable advanced reasoning capabilities for supported models to get step-by-step thinking and improved problem-solving:
|
|
598
|
-
|
|
599
|
-
```typescript
|
|
600
|
-
// Enable reasoning with automatic token budget
|
|
601
|
-
const response = await llmService.sendMessage({
|
|
602
|
-
providerId: 'gemini',
|
|
603
|
-
modelId: 'gemini-2.5-flash',
|
|
604
|
-
messages: [{ role: 'user', content: 'Solve this step by step: If a train travels 120km in 2 hours, what is its speed in m/s?' }],
|
|
605
|
-
settings: {
|
|
606
|
-
reasoning: {
|
|
607
|
-
enabled: true // Let the model decide how much thinking to do
|
|
608
|
-
}
|
|
609
|
-
}
|
|
610
|
-
});
|
|
611
|
-
|
|
612
|
-
// Use effort levels for quick control
|
|
613
|
-
const response = await llmService.sendMessage({
|
|
614
|
-
providerId: 'anthropic',
|
|
615
|
-
modelId: 'claude-3-7-sonnet-20250219',
|
|
616
|
-
messages: [{ role: 'user', content: 'Analyze this complex problem...' }],
|
|
617
|
-
settings: {
|
|
618
|
-
reasoning: {
|
|
619
|
-
enabled: true,
|
|
620
|
-
effort: 'high' // 'low', 'medium', or 'high'
|
|
621
|
-
}
|
|
622
|
-
}
|
|
623
|
-
});
|
|
624
|
-
|
|
625
|
-
// Set specific token budget for reasoning
|
|
626
|
-
const response = await llmService.sendMessage({
|
|
627
|
-
providerId: 'gemini',
|
|
628
|
-
modelId: 'gemini-2.5-flash-lite-preview-06-17',
|
|
629
|
-
messages: [{ role: 'user', content: 'What is the square root of 144?' }],
|
|
630
|
-
settings: {
|
|
631
|
-
reasoning: {
|
|
632
|
-
enabled: true,
|
|
633
|
-
maxTokens: 5000 // Specific token budget for reasoning
|
|
634
|
-
}
|
|
635
|
-
}
|
|
636
|
-
});
|
|
637
|
-
|
|
638
|
-
// Access reasoning output (if available)
|
|
639
|
-
if (response.object === 'chat.completion' && response.choices[0].reasoning) {
|
|
640
|
-
console.log('Model reasoning:', response.choices[0].reasoning);
|
|
641
|
-
console.log('Final answer:', response.choices[0].message.content);
|
|
642
|
-
}
|
|
643
|
-
```
|
|
644
|
-
|
|
645
|
-
**Reasoning Options:**
|
|
646
|
-
- `enabled`: Turn reasoning on/off (some models like o4-mini and Gemini 2.5 Pro have it always on)
|
|
647
|
-
- `effort`: Quick presets - 'low' (20% budget), 'medium' (50%), 'high' (80%)
|
|
648
|
-
- `maxTokens`: Specific token budget for reasoning
|
|
649
|
-
- `exclude`: Set to `true` to enable reasoning but exclude it from the response
|
|
650
|
-
|
|
651
|
-
**Important Notes:**
|
|
652
|
-
- Reasoning tokens are billed separately and may cost more
|
|
653
|
-
- Some models (o4-mini, Gemini 2.5 Pro) cannot disable reasoning
|
|
654
|
-
- Not all models support reasoning - check the [supported models](#models-with-reasoning-support) list
|
|
655
|
-
- The `reasoning` field in the response contains the model's thought process (when available)
|
|
656
|
-
|
|
657
|
-
### Thinking Extraction and Enforcement
|
|
658
|
-
|
|
659
|
-
For models without native reasoning, you can prompt them to output reasoning in XML tags like `<thinking>`. The library then extracts these tags and moves the content to the standardized `reasoning` field, providing a consistent interface across all models.
|
|
660
|
-
|
|
661
|
-
**Key point:** The library doesn't make models think automatically—you must explicitly instruct non-reasoning models to use thinking tags in your prompt. The library then enforces that these tags are present (for non-reasoning models) or accepts native reasoning (for reasoning models).
|
|
662
|
-
|
|
663
|
-
```typescript
|
|
664
|
-
// Prompt the model to think step-by-step in a <thinking> tag
|
|
665
|
-
const response = await llmService.sendMessage({
|
|
666
|
-
providerId: 'openai',
|
|
667
|
-
modelId: 'gpt-4.1',
|
|
668
|
-
messages: [{
|
|
669
|
-
role: 'system',
|
|
670
|
-
content: 'When solving problems, first write your reasoning inside <thinking> tags, then provide the answer.'
|
|
671
|
-
}, {
|
|
672
|
-
role: 'user',
|
|
673
|
-
content: 'Please think through this problem step by step before answering: What is 15% of 240?'
|
|
674
|
-
}],
|
|
675
|
-
settings: {
|
|
676
|
-
thinkingTagFallback: { enabled: true } // Must explicitly enable
|
|
677
|
-
}
|
|
678
|
-
});
|
|
679
|
-
|
|
680
|
-
// If the model responds with:
|
|
681
|
-
// "<thinking>15% means 15/100 = 0.15. So 15% of 240 = 0.15 × 240 = 36.</thinking>The answer is 36."
|
|
682
|
-
//
|
|
683
|
-
// The response will have:
|
|
684
|
-
// - response.choices[0].message.content = "The answer is 36."
|
|
685
|
-
// - response.choices[0].reasoning = "15% means 15/100 = 0.15. So 15% of 240 = 0.15 × 240 = 36."
|
|
686
|
-
|
|
687
|
-
// If the model doesn't include the <thinking> tag, you'll get an error (with default 'auto' mode)
|
|
688
|
-
```
|
|
689
|
-
|
|
690
|
-
**Configuration Options:**
|
|
691
|
-
|
|
692
|
-
```typescript
|
|
693
|
-
const response = await llmService.sendMessage({
|
|
694
|
-
providerId: 'anthropic',
|
|
695
|
-
modelId: 'claude-3-5-haiku-20241022',
|
|
696
|
-
messages: [{ role: 'user', content: 'Solve this step by step...' }],
|
|
697
|
-
settings: {
|
|
698
|
-
thinkingTagFallback: {
|
|
699
|
-
enabled: true, // Must explicitly enable (default: false)
|
|
700
|
-
tagName: 'scratchpad', // Custom tag name (default: 'thinking')
|
|
701
|
-
enforce: true // Smart enforcement (see below)
|
|
702
|
-
}
|
|
703
|
-
}
|
|
704
|
-
});
|
|
705
|
-
```
|
|
706
|
-
|
|
707
|
-
**The `enforce` Property:**
|
|
708
|
-
|
|
709
|
-
The `enforce` boolean controls whether thinking tags are required when native reasoning is not active:
|
|
710
|
-
|
|
711
|
-
- `enforce: true` - Error if tags missing AND native reasoning not active (smart enforcement)
|
|
712
|
-
- `enforce: false` (default) - Extract tags if present, never error
|
|
713
|
-
|
|
714
|
-
The enforcement is **always smart** - it automatically checks if native reasoning is active and only enforces when the model needs tags as a fallback.
|
|
715
|
-
|
|
716
|
-
**How Smart Enforcement Works:**
|
|
717
|
-
|
|
718
|
-
```typescript
|
|
719
|
-
// With non-native reasoning models (e.g., GPT-4)
|
|
720
|
-
const response = await llmService.sendMessage({
|
|
721
|
-
providerId: 'openai',
|
|
722
|
-
modelId: 'gpt-4.1',
|
|
723
|
-
messages: [{
|
|
724
|
-
role: 'system',
|
|
725
|
-
content: 'Always think in <thinking> tags before answering.'
|
|
726
|
-
}, {
|
|
727
|
-
role: 'user',
|
|
728
|
-
content: 'What is 15% of 240?'
|
|
729
|
-
}],
|
|
730
|
-
settings: {
|
|
731
|
-
thinkingTagFallback: { enabled: true, enforce: true }
|
|
732
|
-
}
|
|
733
|
-
});
|
|
734
|
-
// Result: ERROR if <thinking> tag is missing (native reasoning not active)
|
|
735
|
-
// The response is still accessible via errorResponse.partialResponse
|
|
736
|
-
|
|
737
|
-
// With native reasoning models (e.g., Claude with reasoning enabled)
|
|
738
|
-
const response = await llmService.sendMessage({
|
|
739
|
-
providerId: 'anthropic',
|
|
740
|
-
modelId: 'claude-3-7-sonnet-20250219',
|
|
741
|
-
messages: [/* same prompt */],
|
|
742
|
-
settings: {
|
|
743
|
-
reasoning: { enabled: true },
|
|
744
|
-
thinkingTagFallback: { enabled: true, enforce: true }
|
|
745
|
-
}
|
|
746
|
-
});
|
|
747
|
-
// Result: SUCCESS even if <thinking> tag is missing (native reasoning is active)
|
|
748
|
-
```
|
|
749
|
-
|
|
750
|
-
This intelligent enforcement ensures that:
|
|
751
|
-
- Non-native models are held to strict requirements when instructed to use thinking tags
|
|
752
|
-
- Native reasoning models aren't penalized for using their built-in reasoning instead of tags
|
|
753
|
-
- The same prompt can work across different model types
|
|
754
|
-
|
|
755
|
-
### Provider Information
|
|
756
|
-
|
|
757
|
-
```typescript
|
|
758
|
-
// Get list of supported providers
|
|
759
|
-
const providers = await llmService.getProviders();
|
|
760
|
-
|
|
761
|
-
// Get models for a specific provider
|
|
762
|
-
const models = await llmService.getModels('anthropic');
|
|
763
|
-
|
|
764
|
-
// Get configured model presets
|
|
765
|
-
const presets = llmService.getPresets();
|
|
766
|
-
```
|
|
767
|
-
|
|
768
|
-
### Model Presets
|
|
769
|
-
|
|
770
|
-
genai-lite includes a comprehensive set of model presets for common use cases. You can use these defaults, extend them with your own, or replace them entirely.
|
|
771
|
-
|
|
772
|
-
#### Using Default Presets
|
|
773
|
-
|
|
774
|
-
The library ships with over 20 pre-configured presets (defined in `src/config/presets.json`), including specialized "thinking" presets for models with reasoning capabilities:
|
|
775
|
-
|
|
776
|
-
```typescript
|
|
777
|
-
const llmService = new LLMService(fromEnvironment);
|
|
778
|
-
|
|
779
|
-
// Get all default presets
|
|
780
|
-
const presets = llmService.getPresets();
|
|
781
|
-
// Returns presets like:
|
|
782
|
-
// - anthropic-claude-sonnet-4-20250514-default
|
|
783
|
-
// - anthropic-claude-sonnet-4-20250514-thinking (reasoning enabled)
|
|
784
|
-
// - openai-gpt-4.1-default
|
|
785
|
-
// - google-gemini-2.5-flash-thinking (reasoning enabled)
|
|
786
|
-
// ... and many more
|
|
787
|
-
```
|
|
788
|
-
|
|
789
|
-
The thinking presets automatically enable reasoning mode for supported models, making it easy to leverage advanced problem-solving capabilities without manual configuration.
|
|
790
|
-
|
|
791
|
-
#### Extending Default Presets
|
|
792
|
-
|
|
793
|
-
```typescript
|
|
794
|
-
import { LLMService, fromEnvironment, ModelPreset } from 'genai-lite';
|
|
795
|
-
|
|
796
|
-
const customPresets: ModelPreset[] = [
|
|
797
|
-
{
|
|
798
|
-
id: 'my-creative-preset',
|
|
799
|
-
displayName: 'Creative Writing Assistant',
|
|
800
|
-
providerId: 'openai',
|
|
801
|
-
modelId: 'gpt-4.1',
|
|
802
|
-
settings: {
|
|
803
|
-
temperature: 0.9,
|
|
804
|
-
maxTokens: 2000,
|
|
805
|
-
topP: 0.95
|
|
806
|
-
}
|
|
807
|
-
}
|
|
808
|
-
];
|
|
809
|
-
|
|
810
|
-
const llmService = new LLMService(fromEnvironment, {
|
|
811
|
-
presets: customPresets,
|
|
812
|
-
presetMode: 'extend' // Default behavior - adds to existing presets
|
|
813
|
-
});
|
|
814
|
-
```
|
|
815
|
-
|
|
816
|
-
#### Replacing Default Presets
|
|
817
|
-
|
|
818
|
-
For applications that need full control over available presets:
|
|
819
|
-
|
|
820
|
-
```typescript
|
|
821
|
-
const applicationPresets: ModelPreset[] = [
|
|
822
|
-
{
|
|
823
|
-
id: 'app-gpt4-default',
|
|
824
|
-
displayName: 'GPT-4 Standard',
|
|
825
|
-
providerId: 'openai',
|
|
826
|
-
modelId: 'gpt-4.1',
|
|
827
|
-
settings: { temperature: 0.7 }
|
|
828
|
-
},
|
|
829
|
-
{
|
|
830
|
-
id: 'app-claude-creative',
|
|
831
|
-
displayName: 'Claude Creative',
|
|
832
|
-
providerId: 'anthropic',
|
|
833
|
-
modelId: 'claude-3-5-sonnet-20241022',
|
|
834
|
-
settings: { temperature: 0.8, maxTokens: 4000 }
|
|
835
|
-
}
|
|
836
|
-
];
|
|
837
|
-
|
|
838
|
-
const llmService = new LLMService(fromEnvironment, {
|
|
839
|
-
presets: applicationPresets,
|
|
840
|
-
presetMode: 'replace' // Use ONLY these presets, ignore defaults
|
|
841
|
-
});
|
|
842
|
-
```
|
|
843
|
-
|
|
844
|
-
### Using Presets with Messages
|
|
845
|
-
|
|
846
|
-
You can use presets directly in `sendMessage` calls:
|
|
847
|
-
|
|
848
|
-
```typescript
|
|
849
|
-
// Send a message using a preset
|
|
850
|
-
const response = await llmService.sendMessage({
|
|
851
|
-
presetId: 'anthropic-claude-3-7-sonnet-20250219-thinking',
|
|
852
|
-
messages: [{ role: 'user', content: 'Solve this complex problem...' }]
|
|
853
|
-
});
|
|
854
|
-
|
|
855
|
-
// Override preset settings
|
|
856
|
-
const response = await llmService.sendMessage({
|
|
857
|
-
presetId: 'openai-gpt-4.1-default',
|
|
858
|
-
messages: [{ role: 'user', content: 'Write a story' }],
|
|
859
|
-
settings: {
|
|
860
|
-
temperature: 0.9, // Override preset's temperature
|
|
861
|
-
maxTokens: 3000
|
|
862
|
-
}
|
|
863
|
-
});
|
|
864
|
-
```
|
|
865
|
-
|
|
866
|
-
### Creating Messages from Templates
|
|
867
|
-
|
|
868
|
-
The library provides a powerful `createMessages` method that combines template rendering, model context injection, and role tag parsing into a single, intuitive API:
|
|
869
|
-
|
|
870
|
-
```typescript
|
|
871
|
-
// Basic example: Create model-aware messages
|
|
872
|
-
const { messages, modelContext } = await llmService.createMessages({
|
|
873
|
-
template: `
|
|
874
|
-
<SYSTEM>You are a helpful assistant.</SYSTEM>
|
|
875
|
-
<USER>{{ question }}</USER>
|
|
876
|
-
`,
|
|
877
|
-
variables: {
|
|
878
|
-
question: 'What is the optimal algorithm for finding the shortest path in a weighted graph?'
|
|
879
|
-
},
|
|
880
|
-
presetId: 'anthropic-claude-3-7-sonnet-20250219-thinking'
|
|
881
|
-
});
|
|
882
|
-
|
|
883
|
-
// The messages are ready to send
|
|
884
|
-
const response = await llmService.sendMessage({
|
|
885
|
-
presetId: 'anthropic-claude-3-7-sonnet-20250219-thinking',
|
|
886
|
-
messages: messages
|
|
887
|
-
});
|
|
888
|
-
|
|
889
|
-
// Advanced example: Conditional context and multi-turn conversation
|
|
890
|
-
const { messages } = await llmService.createMessages({
|
|
891
|
-
template: `
|
|
892
|
-
<SYSTEM>You are an expert code reviewer.</SYSTEM>
|
|
893
|
-
{{ hasContext ? '<USER>Context: {{context}}</USER>' : '' }}
|
|
894
|
-
<USER>Review this code:
|
|
895
|
-
```{{language}}
|
|
896
|
-
{{code}}
|
|
897
|
-
```</USER>
|
|
898
|
-
{{ hasExamples ? examples : '' }}
|
|
899
|
-
<USER>Focus on {{ focusAreas.join(', ') }}.</USER>
|
|
900
|
-
`,
|
|
901
|
-
variables: {
|
|
902
|
-
hasContext: true,
|
|
903
|
-
context: 'This is part of a high-performance web server',
|
|
904
|
-
language: 'typescript',
|
|
905
|
-
code: 'async function handleRequest(req: Request) { ... }',
|
|
906
|
-
hasExamples: true,
|
|
907
|
-
examples: '<ASSISTANT>I\'ll review your code focusing on the areas you mentioned.</ASSISTANT>',
|
|
908
|
-
focusAreas: ['error handling', 'performance', 'type safety']
|
|
909
|
-
},
|
|
910
|
-
providerId: 'openai',
|
|
911
|
-
modelId: 'gpt-4.1'
|
|
912
|
-
});
|
|
913
|
-
```
|
|
914
|
-
|
|
915
|
-
The method provides:
|
|
916
|
-
- **Unified API**: Single method for all prompt creation needs
|
|
917
|
-
- **Model Context Injection**: Automatically injects model-specific variables
|
|
918
|
-
- **Template Rendering**: Full support for conditionals and variable substitution
|
|
919
|
-
- **Role Tag Parsing**: Converts `<SYSTEM>`, `<USER>`, and `<ASSISTANT>` tags to messages
|
|
920
|
-
|
|
921
|
-
**Available model context variables:**
|
|
922
|
-
|
|
923
|
-
- `native_reasoning_active`: Whether native reasoning is **currently active** for this request
|
|
924
|
-
- `true`: The model is using built-in reasoning (e.g., Claude 4, o4-mini, Gemini 2.5 Pro with reasoning enabled)
|
|
925
|
-
- `false`: No native reasoning is active (either because the model doesn't support it, or it's been disabled)
|
|
926
|
-
- `native_reasoning_capable`: Whether the model **has the capability** to use native reasoning
|
|
927
|
-
- `true`: Model supports native reasoning (may or may not be enabled)
|
|
928
|
-
- `false`: Model does not support native reasoning
|
|
929
|
-
- `model_id`: The resolved model ID
|
|
930
|
-
- `provider_id`: The resolved provider ID
|
|
931
|
-
- `reasoning_effort`: The reasoning effort level if specified
|
|
932
|
-
- `reasoning_max_tokens`: The reasoning token budget if specified
|
|
933
|
-
|
|
934
|
-
**Best Practice for Templates:**
|
|
935
|
-
When adding thinking tag instructions to your templates, **always use `requires_tags_for_thinking`** (the NOT operator). This ensures:
|
|
936
|
-
- Models with active native reasoning get clean, direct prompts
|
|
937
|
-
- Models without native reasoning get explicit instructions to use `<thinking>` tags
|
|
938
|
-
|
|
939
|
-
Example: `{{ requires_tags_for_thinking ? ' Write your reasoning in <thinking> tags first.' : '' }}`
|
|
940
|
-
|
|
941
|
-
#### Advanced Features
|
|
942
|
-
|
|
943
|
-
**Dynamic Role Injection:**
|
|
944
|
-
Variables can dynamically inject entire role blocks, enabling flexible conversation flows:
|
|
945
|
-
|
|
946
|
-
```typescript
|
|
947
|
-
const { messages } = await llmService.createMessages({
|
|
948
|
-
template: `
|
|
949
|
-
{{ includeSystemPrompt ? '<SYSTEM>{{systemPrompt}}</SYSTEM>' : '' }}
|
|
950
|
-
{{ examples ? examples : '' }}
|
|
951
|
-
<USER>{{userQuery}}</USER>
|
|
952
|
-
`,
|
|
953
|
-
variables: {
|
|
954
|
-
includeSystemPrompt: true,
|
|
955
|
-
systemPrompt: 'You are an expert code reviewer.',
|
|
956
|
-
examples: `
|
|
957
|
-
<USER>Review this code: const x = 1</USER>
|
|
958
|
-
<ASSISTANT>The variable name 'x' is not descriptive...</ASSISTANT>
|
|
959
|
-
`,
|
|
960
|
-
userQuery: 'Review this: const data = fetchData()'
|
|
961
|
-
},
|
|
962
|
-
presetId: 'anthropic-claude-3-5-sonnet-20241022'
|
|
963
|
-
});
|
|
964
|
-
```
|
|
965
|
-
|
|
966
|
-
**Combining with Thinking Extraction:**
|
|
967
|
-
When using models without native reasoning support, combine createMessages with thinking extraction:
|
|
968
|
-
|
|
969
|
-
```typescript
|
|
970
|
-
// Prompt any model to think before answering
|
|
971
|
-
const { messages } = await llmService.createMessages({
|
|
972
|
-
template: `
|
|
973
|
-
<SYSTEM>
|
|
974
|
-
When solving problems, first write your step-by-step reasoning inside <thinking> tags,
|
|
975
|
-
then provide your final answer.
|
|
976
|
-
</SYSTEM>
|
|
977
|
-
<USER>{{ question }}</USER>
|
|
978
|
-
`,
|
|
979
|
-
variables: { question: 'If a train travels 120km in 2 hours, what is its speed in m/s?' },
|
|
980
|
-
providerId: 'openai',
|
|
981
|
-
modelId: 'gpt-4.1'
|
|
982
|
-
});
|
|
983
|
-
|
|
984
|
-
// Send with automatic thinking extraction
|
|
985
|
-
const response = await llmService.sendMessage({
|
|
986
|
-
providerId: 'openai',
|
|
987
|
-
modelId: 'gpt-4.1',
|
|
988
|
-
messages,
|
|
989
|
-
settings: {
|
|
990
|
-
thinkingTagFallback: { enabled: true } // Default, but shown for clarity
|
|
991
|
-
}
|
|
992
|
-
});
|
|
993
|
-
|
|
994
|
-
// Access both reasoning and answer
|
|
995
|
-
if (response.object === 'chat.completion') {
|
|
996
|
-
console.log('Reasoning:', response.choices[0].reasoning);
|
|
997
|
-
console.log('Answer:', response.choices[0].message.content);
|
|
998
|
-
}
|
|
999
|
-
```
|
|
1000
|
-
|
|
1001
|
-
### Self-Contained Templates with Metadata
|
|
1002
|
-
|
|
1003
|
-
Templates can now include their own settings using a `<META>` block, making them truly self-contained and reusable:
|
|
1004
|
-
|
|
1005
|
-
```typescript
|
|
1006
|
-
// Define a template with embedded settings
|
|
1007
|
-
const creativeWritingTemplate = `
|
|
1008
|
-
<META>
|
|
1009
|
-
{
|
|
1010
|
-
"settings": {
|
|
1011
|
-
"temperature": 0.9,
|
|
1012
|
-
"maxTokens": 3000,
|
|
1013
|
-
"thinkingTagFallback": { "enabled": true, "tagName": "reasoning" }
|
|
1014
|
-
}
|
|
1015
|
-
}
|
|
1016
|
-
</META>
|
|
1017
|
-
<SYSTEM>
|
|
1018
|
-
You are a creative writer. Use <reasoning> tags to outline your story structure
|
|
1019
|
-
before writing the actual story.
|
|
1020
|
-
</SYSTEM>
|
|
1021
|
-
<USER>Write a short story about {{ topic }}</USER>
|
|
1022
|
-
`;
|
|
1023
|
-
|
|
1024
|
-
// Use the template - settings are automatically extracted
|
|
1025
|
-
const { messages, settings } = await llmService.createMessages({
|
|
1026
|
-
template: creativeWritingTemplate,
|
|
1027
|
-
variables: { topic: 'a robot discovering music' },
|
|
1028
|
-
providerId: 'openai',
|
|
1029
|
-
modelId: 'gpt-4.1'
|
|
1030
|
-
});
|
|
1031
|
-
|
|
1032
|
-
// Send the message with the template's settings
|
|
1033
|
-
const response = await llmService.sendMessage({
|
|
1034
|
-
providerId: 'openai',
|
|
1035
|
-
modelId: 'gpt-4.1',
|
|
1036
|
-
messages,
|
|
1037
|
-
settings // Uses temperature: 0.9, maxTokens: 3000, etc.
|
|
1038
|
-
});
|
|
1039
|
-
```
|
|
1040
|
-
|
|
1041
|
-
**Benefits of Self-Contained Templates:**
|
|
1042
|
-
- **Portability**: Templates carry their optimal settings with them
|
|
1043
|
-
- **Consistency**: Same template always uses the same settings
|
|
1044
|
-
- **Less Error-Prone**: No need to remember settings for each template
|
|
1045
|
-
- **Shareable**: Easy to share templates with all necessary configuration
|
|
1046
|
-
|
|
1047
|
-
**Settings Hierarchy:**
|
|
1048
|
-
When multiple settings sources exist, they are merged in this order (later overrides earlier):
|
|
1049
|
-
1. Model defaults (lowest priority)
|
|
1050
|
-
2. Preset settings
|
|
1051
|
-
3. Template `<META>` settings
|
|
1052
|
-
4. Runtime settings in `sendMessage()` (highest priority)
|
|
1053
|
-
|
|
1054
|
-
```typescript
|
|
1055
|
-
// Example of settings hierarchy
|
|
1056
|
-
const { messages, settings: templateSettings } = await llmService.createMessages({
|
|
1057
|
-
template: `<META>{"settings": {"temperature": 0.8}}</META><USER>Hello</USER>`,
|
|
1058
|
-
presetId: 'some-preset' // Preset might have temperature: 0.7
|
|
1059
|
-
});
|
|
1060
|
-
|
|
1061
|
-
// Final temperature will be 0.9 (runtime overrides all)
|
|
1062
|
-
const response = await llmService.sendMessage({
|
|
1063
|
-
presetId: 'some-preset',
|
|
1064
|
-
messages,
|
|
1065
|
-
settings: {
|
|
1066
|
-
...templateSettings,
|
|
1067
|
-
temperature: 0.9 // Runtime override
|
|
1068
|
-
}
|
|
1069
|
-
});
|
|
1070
|
-
```
|
|
1071
|
-
|
|
1072
|
-
**Validation:**
|
|
1073
|
-
Invalid settings in the `<META>` block are logged as warnings and ignored:
|
|
1074
|
-
|
|
1075
|
-
```typescript
|
|
1076
|
-
const template = `
|
|
1077
|
-
<META>
|
|
1078
|
-
{
|
|
1079
|
-
"settings": {
|
|
1080
|
-
"temperature": 3.0, // Invalid: will be ignored with warning
|
|
1081
|
-
"maxTokens": 2000, // Valid: will be used
|
|
1082
|
-
"unknownSetting": "foo" // Unknown: will be ignored with warning
|
|
1083
|
-
}
|
|
1084
|
-
}
|
|
1085
|
-
</META>
|
|
1086
|
-
<USER>Test</USER>
|
|
1087
|
-
`;
|
|
1088
|
-
```
|
|
1089
|
-
|
|
1090
|
-
### Error Handling
|
|
1091
|
-
|
|
1092
|
-
```typescript
|
|
1093
|
-
const response = await llmService.sendMessage({
|
|
1094
|
-
providerId: 'openai',
|
|
1095
|
-
modelId: 'gpt-4.1-mini',
|
|
1096
|
-
messages: [{ role: 'user', content: 'Hello' }]
|
|
1097
|
-
});
|
|
1098
|
-
|
|
1099
|
-
if (response.object === 'error') {
|
|
1100
|
-
switch (response.error.type) {
|
|
1101
|
-
case 'authentication_error':
|
|
1102
|
-
console.error('Invalid API key');
|
|
1103
|
-
break;
|
|
1104
|
-
case 'rate_limit_error':
|
|
1105
|
-
console.error('Rate limit exceeded');
|
|
1106
|
-
break;
|
|
1107
|
-
case 'validation_error':
|
|
1108
|
-
console.error('Invalid request:', response.error.message);
|
|
1109
|
-
// For validation errors, the response may still be available
|
|
1110
|
-
if (response.partialResponse) {
|
|
1111
|
-
console.log('Partial response:', response.partialResponse.choices[0].message.content);
|
|
1112
|
-
}
|
|
1113
|
-
break;
|
|
1114
|
-
default:
|
|
1115
|
-
console.error('Error:', response.error.message);
|
|
1116
|
-
}
|
|
1117
|
-
}
|
|
1118
|
-
```
|
|
1119
|
-
|
|
1120
|
-
## llama.cpp Integration
|
|
1121
|
-
|
|
1122
|
-
`genai-lite` provides comprehensive support for running local LLMs via [llama.cpp](https://github.com/ggml-org/llama.cpp) server, enabling completely offline AI capabilities with the same unified interface.
|
|
1123
|
-
|
|
1124
|
-
### Why llama.cpp?
|
|
1125
|
-
|
|
1126
|
-
- **Privacy**: All model inference runs locally on your hardware
|
|
1127
|
-
- **Cost**: No API costs after initial model download
|
|
1128
|
-
- **Control**: Use any GGUF model from Hugging Face
|
|
1129
|
-
- **Performance**: Optimized C++ implementation with hardware acceleration
|
|
1130
|
-
|
|
1131
|
-
### Setup
|
|
1132
|
-
|
|
1133
|
-
#### 1. Install llama.cpp
|
|
1134
|
-
|
|
1135
|
-
```bash
|
|
1136
|
-
# Clone and build llama.cpp
|
|
1137
|
-
git clone https://github.com/ggml-org/llama.cpp
|
|
1138
|
-
cd llama.cpp
|
|
1139
|
-
make
|
|
1140
|
-
|
|
1141
|
-
# Or download pre-built binaries from releases
|
|
1142
|
-
```
|
|
1143
|
-
|
|
1144
|
-
#### 2. Download a Model
|
|
1145
|
-
|
|
1146
|
-
Get GGUF models from Hugging Face, for example:
|
|
1147
|
-
- [Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF)
|
|
1148
|
-
- [Mistral-7B-Instruct-v0.3-GGUF](https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF)
|
|
1149
|
-
|
|
1150
|
-
#### 3. Start the Server
|
|
1151
|
-
|
|
1152
|
-
```bash
|
|
1153
|
-
# Basic usage
|
|
1154
|
-
llama-server -m /path/to/model.gguf --port 8080
|
|
1155
|
-
|
|
1156
|
-
# With reasoning support (for Qwen3, DeepSeek-R1, etc.)
|
|
1157
|
-
llama-server -m /path/to/model.gguf \
|
|
1158
|
-
--port 8080 \
|
|
1159
|
-
--jinja \
|
|
1160
|
-
--reasoning-format deepseek
|
|
1161
|
-
|
|
1162
|
-
# Full options
|
|
1163
|
-
llama-server -m /path/to/model.gguf \
|
|
1164
|
-
--port 8080 \
|
|
1165
|
-
--jinja \ # Required for reasoning
|
|
1166
|
-
--reasoning-format deepseek \ # Extract reasoning from <think> tags
|
|
1167
|
-
-c 4096 \ # Context size
|
|
1168
|
-
-np 4 \ # Parallel requests
|
|
1169
|
-
--threads 8 # CPU threads
|
|
1170
|
-
```
|
|
1171
|
-
|
|
1172
|
-
### Basic Usage
|
|
1173
|
-
|
|
1174
|
-
```typescript
|
|
1175
|
-
import { LLMService } from 'genai-lite';
|
|
1176
|
-
|
|
1177
|
-
// llama.cpp doesn't need API keys
|
|
1178
|
-
const service = new LLMService(async () => 'not-needed');
|
|
1179
|
-
|
|
1180
|
-
const response = await service.sendMessage({
|
|
1181
|
-
providerId: 'llamacpp',
|
|
1182
|
-
modelId: 'llamacpp', // Generic ID for loaded model
|
|
1183
|
-
messages: [
|
|
1184
|
-
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
1185
|
-
{ role: 'user', content: 'Explain quantum computing in simple terms.' }
|
|
1186
|
-
],
|
|
1187
|
-
settings: {
|
|
1188
|
-
temperature: 0.7,
|
|
1189
|
-
maxTokens: 500
|
|
1190
|
-
}
|
|
1191
|
-
});
|
|
1192
|
-
|
|
1193
|
-
if (response.object === 'chat.completion') {
|
|
1194
|
-
console.log(response.choices[0].message.content);
|
|
1195
|
-
}
|
|
1196
|
-
```
|
|
1197
|
-
|
|
1198
|
-
### Configuration
|
|
1199
|
-
|
|
1200
|
-
#### Environment Variable
|
|
1201
|
-
|
|
1202
|
-
Set the server URL via environment variable (default: `http://localhost:8080`):
|
|
1203
|
-
|
|
1204
|
-
```bash
|
|
1205
|
-
export LLAMACPP_API_BASE_URL=http://localhost:8080
|
|
1206
|
-
```
|
|
1207
|
-
|
|
1208
|
-
#### Multiple Servers
|
|
1209
|
-
|
|
1210
|
-
Register multiple llama.cpp instances for different models:
|
|
1211
|
-
|
|
1212
|
-
```typescript
|
|
1213
|
-
import { LLMService, LlamaCppClientAdapter } from 'genai-lite';
|
|
1214
|
-
|
|
1215
|
-
const service = new LLMService(async () => 'not-needed');
|
|
1216
|
-
|
|
1217
|
-
// Register adapters for different servers/models
|
|
1218
|
-
service.registerAdapter(
|
|
1219
|
-
'llamacpp-small',
|
|
1220
|
-
new LlamaCppClientAdapter({ baseURL: 'http://localhost:8080' })
|
|
1221
|
-
);
|
|
1222
|
-
|
|
1223
|
-
service.registerAdapter(
|
|
1224
|
-
'llamacpp-large',
|
|
1225
|
-
new LlamaCppClientAdapter({ baseURL: 'http://localhost:8081' })
|
|
1226
|
-
);
|
|
1227
|
-
|
|
1228
|
-
// Use them
|
|
1229
|
-
const response = await service.sendMessage({
|
|
1230
|
-
providerId: 'llamacpp-small',
|
|
1231
|
-
modelId: 'llamacpp',
|
|
1232
|
-
messages: [{ role: 'user', content: 'Hello!' }]
|
|
1233
|
-
});
|
|
1234
|
-
```
|
|
1235
|
-
|
|
1236
|
-
#### Health Checking
|
|
1237
|
-
|
|
1238
|
-
Enable automatic health checks before requests:
|
|
1239
|
-
|
|
1240
|
-
```typescript
|
|
1241
|
-
import { LlamaCppClientAdapter } from 'genai-lite';
|
|
1242
|
-
|
|
1243
|
-
const adapter = new LlamaCppClientAdapter({
|
|
1244
|
-
baseURL: 'http://localhost:8080',
|
|
1245
|
-
checkHealth: true // Check server status before each request
|
|
1246
|
-
});
|
|
1247
|
-
|
|
1248
|
-
service.registerAdapter('llamacpp', adapter);
|
|
1249
|
-
```
|
|
1250
|
-
|
|
1251
|
-
### Advanced Features
|
|
1252
|
-
|
|
1253
|
-
#### Server Management
|
|
1254
|
-
|
|
1255
|
-
The `LlamaCppServerClient` class provides access to all llama.cpp server endpoints:
|
|
1256
|
-
|
|
1257
|
-
```typescript
|
|
1258
|
-
import { LlamaCppServerClient } from 'genai-lite';
|
|
1259
|
-
|
|
1260
|
-
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
1261
|
-
|
|
1262
|
-
// Health monitoring
|
|
1263
|
-
const health = await client.getHealth();
|
|
1264
|
-
console.log(health.status); // 'ok', 'loading', or 'error'
|
|
1265
|
-
|
|
1266
|
-
// Server properties
|
|
1267
|
-
const props = await client.getProps();
|
|
1268
|
-
console.log(props.total_slots); // Number of available slots
|
|
1269
|
-
|
|
1270
|
-
// Performance metrics (if enabled)
|
|
1271
|
-
const metrics = await client.getMetrics();
|
|
1272
|
-
```
|
|
1273
|
-
|
|
1274
|
-
#### Tokenization
|
|
1275
|
-
|
|
1276
|
-
```typescript
|
|
1277
|
-
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
1278
|
-
|
|
1279
|
-
// Tokenize text
|
|
1280
|
-
const { tokens } = await client.tokenize('Hello, world!');
|
|
1281
|
-
console.log(tokens); // [123, 456, 789]
|
|
1282
|
-
|
|
1283
|
-
// Count tokens before sending to LLM
|
|
1284
|
-
const prompt = 'Long text...';
|
|
1285
|
-
const { tokens: promptTokens } = await client.tokenize(prompt);
|
|
1286
|
-
if (promptTokens.length > 4000) {
|
|
1287
|
-
console.log('Prompt too long, truncating...');
|
|
1288
|
-
}
|
|
1289
|
-
|
|
1290
|
-
// Detokenize back to text
|
|
1291
|
-
const { content } = await client.detokenize([123, 456, 789]);
|
|
1292
|
-
console.log(content); // 'Hello, world!'
|
|
1293
|
-
```
|
|
1294
|
-
|
|
1295
|
-
#### Text Embeddings
|
|
1296
|
-
|
|
1297
|
-
```typescript
|
|
1298
|
-
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
1299
|
-
|
|
1300
|
-
// Generate embeddings for semantic search
|
|
1301
|
-
const { embedding } = await client.createEmbedding('Search query text');
|
|
1302
|
-
console.log(embedding.length); // e.g., 768 dimensions
|
|
1303
|
-
|
|
1304
|
-
// With images (for multimodal models)
|
|
1305
|
-
const { embedding: multimodalEmbed } = await client.createEmbedding(
|
|
1306
|
-
'Describe this image',
|
|
1307
|
-
'base64_image_data_here'
|
|
1308
|
-
);
|
|
1309
|
-
```
|
|
1310
|
-
|
|
1311
|
-
#### Code Infilling
|
|
1312
|
-
|
|
1313
|
-
Perfect for code completion in IDEs:
|
|
1314
|
-
|
|
1315
|
-
```typescript
|
|
1316
|
-
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
1317
|
-
|
|
1318
|
-
const result = await client.infill(
|
|
1319
|
-
'def calculate_fibonacci(n):\n ', // Prefix (before cursor)
|
|
1320
|
-
'\n return result' // Suffix (after cursor)
|
|
1321
|
-
);
|
|
1322
|
-
|
|
1323
|
-
console.log(result.content);
|
|
1324
|
-
// Output: "if n <= 1:\n return n\n result = calculate_fibonacci(n-1) + calculate_fibonacci(n-2)"
|
|
1325
|
-
```
|
|
1326
|
-
|
|
1327
|
-
### Error Handling
|
|
1328
|
-
|
|
1329
|
-
```typescript
|
|
1330
|
-
const response = await service.sendMessage({
|
|
1331
|
-
providerId: 'llamacpp',
|
|
1332
|
-
modelId: 'my-model',
|
|
1333
|
-
messages: [{ role: 'user', content: 'Hello' }]
|
|
1334
|
-
});
|
|
1335
|
-
|
|
1336
|
-
if (response.object === 'error') {
|
|
1337
|
-
switch (response.error.code) {
|
|
1338
|
-
case 'NETWORK_ERROR':
|
|
1339
|
-
console.error('Server not running or unreachable');
|
|
1340
|
-
break;
|
|
1341
|
-
case 'PROVIDER_ERROR':
|
|
1342
|
-
console.error('Server error:', response.error.message);
|
|
1343
|
-
break;
|
|
1344
|
-
default:
|
|
1345
|
-
console.error('Unknown error:', response.error);
|
|
1346
|
-
}
|
|
1347
|
-
}
|
|
1348
|
-
```
|
|
1349
|
-
|
|
1350
|
-
### Best Practices
|
|
1351
|
-
|
|
1352
|
-
1. **Model ID**: Always use `'llamacpp'` as the model ID—the actual model is determined by what you loaded in the server
|
|
1353
|
-
2. **Context Size**: Set appropriate context (`-c` flag) when starting the server
|
|
1354
|
-
3. **Parallel Requests**: Configure slots (`-np`) based on your hardware
|
|
1355
|
-
4. **Health Monitoring**: Enable `checkHealth` for production to detect server issues early
|
|
1356
|
-
5. **Resource Management**: Monitor memory usage; large models need significant RAM
|
|
1357
|
-
|
|
1358
|
-
### Troubleshooting
|
|
1359
|
-
|
|
1360
|
-
**Server not responding:**
|
|
1361
|
-
```bash
|
|
1362
|
-
# Check if server is running
|
|
1363
|
-
curl http://localhost:8080/health
|
|
1364
|
-
|
|
1365
|
-
# Should return: {"status":"ok"}
|
|
1366
|
-
```
|
|
1367
|
-
|
|
1368
|
-
**Model loading errors:**
|
|
1369
|
-
```bash
|
|
1370
|
-
# Increase memory or reduce context size
|
|
1371
|
-
llama-server -m model.gguf --port 8080 -c 2048
|
|
1372
|
-
```
|
|
1373
|
-
|
|
1374
|
-
**Slow responses:**
|
|
1375
|
-
```bash
|
|
1376
|
-
# Use quantized models (smaller but faster)
|
|
1377
|
-
# e.g., Q4_K_M, Q5_K_M instead of F16
|
|
1378
|
-
|
|
1379
|
-
# Increase threads
|
|
1380
|
-
llama-server -m model.gguf --threads 16
|
|
1381
|
-
```
|
|
1382
|
-
|
|
1383
|
-
## Using with Electron
|
|
1384
|
-
|
|
1385
|
-
`genai-lite` is designed to work seamlessly within an Electron application's main process, especially when paired with a secure storage solution like `genai-key-storage-lite`.
|
|
1386
|
-
|
|
1387
|
-
This is the recommended pattern for both new Electron apps and for migrating from older, integrated versions.
|
|
1388
|
-
|
|
1389
|
-
### Example with `genai-key-storage-lite`
|
|
1390
|
-
|
|
1391
|
-
Here’s how to create a custom `ApiKeyProvider` that uses `genai-key-storage-lite` to securely retrieve API keys.
|
|
1392
|
-
|
|
1393
|
-
```typescript
|
|
1394
|
-
// In your Electron app's main process (e.g., main.ts)
|
|
1395
|
-
import { app } from 'electron';
|
|
1396
|
-
import { ApiKeyServiceMain } from 'genai-key-storage-lite';
|
|
1397
|
-
import { LLMService, type ApiKeyProvider } from 'genai-lite';
|
|
1398
|
-
|
|
1399
|
-
// 1. Initialize Electron's secure key storage service
|
|
1400
|
-
const apiKeyService = new ApiKeyServiceMain(app.getPath("userData"));
|
|
1401
|
-
|
|
1402
|
-
// 2. Create a custom ApiKeyProvider that uses the secure storage
|
|
1403
|
-
const electronKeyProvider: ApiKeyProvider = async (providerId) => {
|
|
1404
|
-
try {
|
|
1405
|
-
// Use withDecryptedKey to securely access the key only when needed.
|
|
1406
|
-
// The key is passed to the callback and its result is returned.
|
|
1407
|
-
return await apiKeyService.withDecryptedKey(providerId, async (key) => key);
|
|
1408
|
-
} catch {
|
|
1409
|
-
// If key is not found or decryption fails, return null.
|
|
1410
|
-
// LLMService will handle this as an authentication error.
|
|
1411
|
-
return null;
|
|
1412
|
-
}
|
|
1413
|
-
};
|
|
1414
|
-
|
|
1415
|
-
// 3. Initialize the genai-lite service with our custom provider
|
|
1416
|
-
const llmService = new LLMService(electronKeyProvider);
|
|
1417
|
-
|
|
1418
|
-
// Now you can use llmService anywhere in your main process.
|
|
1419
|
-
```
|
|
1420
|
-
|
|
1421
|
-
## TypeScript Support
|
|
1422
|
-
|
|
1423
|
-
genai-lite is written in TypeScript and provides comprehensive type definitions:
|
|
1424
|
-
|
|
1425
|
-
```typescript
|
|
1426
|
-
import type {
|
|
1427
|
-
LLMChatRequest,
|
|
1428
|
-
LLMChatRequestWithPreset,
|
|
1429
|
-
LLMResponse,
|
|
1430
|
-
LLMFailureResponse,
|
|
1431
|
-
LLMSettings,
|
|
1432
|
-
LLMReasoningSettings,
|
|
1433
|
-
LLMThinkingTagFallbackSettings,
|
|
1434
|
-
ApiKeyProvider,
|
|
1435
|
-
ModelPreset,
|
|
1436
|
-
LLMServiceOptions,
|
|
1437
|
-
PresetMode,
|
|
1438
|
-
ModelContext,
|
|
1439
|
-
CreateMessagesResult,
|
|
1440
|
-
TemplateMetadata
|
|
1441
|
-
} from 'genai-lite';
|
|
1442
|
-
|
|
1443
|
-
// llama.cpp integration types and classes
|
|
1444
|
-
import {
|
|
1445
|
-
LlamaCppClientAdapter,
|
|
1446
|
-
LlamaCppServerClient,
|
|
1447
|
-
createFallbackModelInfo,
|
|
1448
|
-
detectGgufCapabilities,
|
|
1449
|
-
KNOWN_GGUF_MODELS
|
|
1450
|
-
} from 'genai-lite';
|
|
1451
|
-
|
|
1452
|
-
import type {
|
|
1453
|
-
LlamaCppClientConfig,
|
|
1454
|
-
LlamaCppHealthResponse,
|
|
1455
|
-
LlamaCppTokenizeResponse,
|
|
1456
|
-
LlamaCppDetokenizeResponse,
|
|
1457
|
-
LlamaCppEmbeddingResponse,
|
|
1458
|
-
LlamaCppInfillResponse,
|
|
1459
|
-
LlamaCppPropsResponse,
|
|
1460
|
-
LlamaCppMetricsResponse,
|
|
1461
|
-
LlamaCppSlot,
|
|
1462
|
-
LlamaCppSlotsResponse,
|
|
1463
|
-
LlamaCppModel,
|
|
1464
|
-
LlamaCppModelsResponse,
|
|
1465
|
-
GgufModelPattern
|
|
1466
|
-
} from 'genai-lite';
|
|
1467
|
-
```
|
|
1468
|
-
|
|
1469
|
-
## Utilities
|
|
1470
|
-
|
|
1471
|
-
genai-lite includes useful utilities for working with LLMs, available through the `genai-lite/prompting` subpath:
|
|
1472
|
-
|
|
1473
|
-
### Token Counting
|
|
1474
|
-
|
|
1475
|
-
Count the number of tokens in a string using OpenAI's tiktoken library:
|
|
1476
|
-
|
|
1477
|
-
```typescript
|
|
1478
|
-
import { countTokens } from 'genai-lite/prompting';
|
|
1479
|
-
|
|
1480
|
-
const text = 'Hello, this is a sample text for token counting.';
|
|
1481
|
-
const tokenCount = countTokens(text); // Uses gpt-4 tokenizer by default
|
|
1482
|
-
console.log(`Token count: ${tokenCount}`);
|
|
1483
|
-
|
|
1484
|
-
// Specify a different model's tokenizer
|
|
1485
|
-
const gpt35Tokens = countTokens(text, 'gpt-3.5-turbo');
|
|
1486
|
-
```
|
|
1487
|
-
|
|
1488
|
-
**Note:** The `countTokens` function uses the `js-tiktoken` library and supports all models that have tiktoken encodings.
|
|
1489
|
-
|
|
1490
|
-
### Smart Text Preview
|
|
1491
|
-
|
|
1492
|
-
Generate intelligent previews of large text blocks that preserve context:
|
|
1493
|
-
|
|
1494
|
-
```typescript
|
|
1495
|
-
import { getSmartPreview } from 'genai-lite/prompting';
|
|
1496
|
-
|
|
1497
|
-
const largeCodeFile = `
|
|
1498
|
-
function calculateTotal(items) {
|
|
1499
|
-
let total = 0;
|
|
1500
|
-
|
|
1501
|
-
for (const item of items) {
|
|
1502
|
-
total += item.price * item.quantity;
|
|
1503
|
-
}
|
|
1504
|
-
|
|
1505
|
-
return total;
|
|
1506
|
-
}
|
|
1507
|
-
|
|
1508
|
-
function applyDiscount(total, discountPercent) {
|
|
1509
|
-
return total * (1 - discountPercent / 100);
|
|
1510
|
-
}
|
|
1511
|
-
|
|
1512
|
-
// ... many more lines of code ...
|
|
1513
|
-
`;
|
|
1514
|
-
|
|
1515
|
-
// Get a preview that shows at least 5 lines but extends to a logical break point
|
|
1516
|
-
const preview = getSmartPreview(largeCodeFile, {
|
|
1517
|
-
minLines: 5,
|
|
1518
|
-
maxLines: 10
|
|
1519
|
-
});
|
|
1520
|
-
```
|
|
1521
|
-
|
|
1522
|
-
The `getSmartPreview` function intelligently truncates text:
|
|
1523
|
-
- Returns the full content if it's shorter than `maxLines`
|
|
1524
|
-
- Shows at least `minLines` of content
|
|
1525
|
-
- Extends to the next blank line (up to `maxLines`) to avoid cutting off in the middle of a code block or paragraph
|
|
1526
|
-
- Adds `... (content truncated)` when content is truncated
|
|
1527
|
-
|
|
1528
|
-
### Example: Building Token-Aware Prompts
|
|
1529
|
-
|
|
1530
|
-
Combine these utilities to build prompts that fit within model context windows:
|
|
1531
|
-
|
|
1532
|
-
```typescript
|
|
1533
|
-
import { LLMService, fromEnvironment } from 'genai-lite';
|
|
1534
|
-
import { countTokens, getSmartPreview } from 'genai-lite/prompting';
|
|
1535
|
-
|
|
1536
|
-
const llm = new LLMService(fromEnvironment);
|
|
1537
|
-
|
|
1538
|
-
// Large source file
|
|
1539
|
-
const sourceCode = await fs.readFile('large-file.js', 'utf-8');
|
|
1540
|
-
|
|
1541
|
-
// Get a smart preview that fits within token budget
|
|
1542
|
-
let preview = getSmartPreview(sourceCode, { minLines: 20, maxLines: 50 });
|
|
1543
|
-
let tokenCount = countTokens(preview, 'gpt-4.1-mini');
|
|
1544
|
-
|
|
1545
|
-
// Adjust preview if needed to fit token budget
|
|
1546
|
-
const maxTokens = 4000;
|
|
1547
|
-
if (tokenCount > maxTokens) {
|
|
1548
|
-
preview = getSmartPreview(sourceCode, { minLines: 10, maxLines: 30 });
|
|
1549
|
-
}
|
|
1550
|
-
|
|
1551
|
-
// Send to LLM
|
|
1552
|
-
const response = await llm.sendMessage({
|
|
1553
|
-
providerId: 'openai',
|
|
1554
|
-
modelId: 'gpt-4.1-mini',
|
|
1555
|
-
messages: [
|
|
1556
|
-
{
|
|
1557
|
-
role: 'user',
|
|
1558
|
-
content: `Analyze this code:\n\n${preview}`
|
|
1559
|
-
}
|
|
1560
|
-
]
|
|
1561
|
-
});
|
|
1562
|
-
```
|
|
1563
|
-
|
|
1564
|
-
### Template Engine
|
|
1565
|
-
|
|
1566
|
-
Generate dynamic prompts and content using the built-in template engine that supports variable substitution and conditional logic:
|
|
1567
|
-
|
|
1568
|
-
```typescript
|
|
1569
|
-
import { renderTemplate } from 'genai-lite/prompting';
|
|
1570
|
-
|
|
1571
|
-
// Simple variable substitution
|
|
1572
|
-
const greeting = renderTemplate('Hello, {{ name }}!', { name: 'World' });
|
|
1573
|
-
// Result: "Hello, World!"
|
|
1574
|
-
|
|
1575
|
-
// Conditional rendering with ternary syntax
|
|
1576
|
-
const prompt = renderTemplate(
|
|
1577
|
-
'Analyze this {{ language }} code:\n{{ hasContext ? `Context: {{context}}\n` : `` }}```\n{{ code }}\n```',
|
|
1578
|
-
{
|
|
1579
|
-
language: 'TypeScript',
|
|
1580
|
-
hasContext: true,
|
|
1581
|
-
context: 'React component for user authentication',
|
|
1582
|
-
code: 'export const Login = () => { ... }'
|
|
1583
|
-
}
|
|
1584
|
-
);
|
|
1585
|
-
// Result includes the context line when hasContext is true
|
|
1586
|
-
|
|
1587
|
-
// Using logical operators in conditions
|
|
1588
|
-
const accessControl = renderTemplate(
|
|
1589
|
-
'{{ isAuthenticated && !isBanned ? `Welcome {{username}}!` : `Access denied` }}',
|
|
1590
|
-
{ isAuthenticated: true, isBanned: false, username: 'Alice' }
|
|
1591
|
-
);
|
|
1592
|
-
// Result: "Welcome Alice!"
|
|
1593
|
-
|
|
1594
|
-
const notification = renderTemplate(
|
|
1595
|
-
'{{ hasEmail || hasPhone ? `Contact info available` : `No contact info` }}',
|
|
1596
|
-
{ hasEmail: false, hasPhone: true }
|
|
1597
|
-
);
|
|
1598
|
-
// Result: "Contact info available"
|
|
1599
|
-
|
|
1600
|
-
// Complex template with multiple conditionals
|
|
1601
|
-
const complexTemplate = `
|
|
1602
|
-
System: You are a {{ role }} assistant.
|
|
1603
|
-
{{ hasExpertise ? `Expertise: {{expertise}}` : `General knowledge assistant` }}
|
|
1604
|
-
|
|
1605
|
-
Task: {{ task }}
|
|
1606
|
-
{{ hasFiles ? `
|
|
1607
|
-
Files to analyze:
|
|
1608
|
-
{{ fileList }}` : `` }}
|
|
1609
|
-
{{ requiresOutput ? `
|
|
1610
|
-
Expected output format:
|
|
1611
|
-
{{ outputFormat }}` : `` }}
|
|
1612
|
-
`;
|
|
1613
|
-
|
|
1614
|
-
const result = renderTemplate(complexTemplate, {
|
|
1615
|
-
role: 'coding',
|
|
1616
|
-
hasExpertise: true,
|
|
1617
|
-
expertise: 'TypeScript, React, Node.js',
|
|
1618
|
-
task: 'Review the code for best practices',
|
|
1619
|
-
hasFiles: true,
|
|
1620
|
-
fileList: '- src/index.ts\n- src/prompting/template.ts',
|
|
1621
|
-
requiresOutput: false
|
|
1622
|
-
});
|
|
1623
|
-
```
|
|
1624
|
-
|
|
1625
|
-
Template syntax supports:
|
|
1626
|
-
- **Simple substitution**: `{{ variableName }}`
|
|
1627
|
-
- **Ternary conditionals**: `{{ condition ? `true result` : `false result` }}`
|
|
1628
|
-
- **Logical operators in conditions**:
|
|
1629
|
-
- NOT: `{{ !isDisabled ? `enabled` : `disabled` }}`
|
|
1630
|
-
- AND: `{{ hasPermission && isActive ? `show` : `hide` }}`
|
|
1631
|
-
- OR: `{{ isAdmin || isOwner ? `allow` : `deny` }}`
|
|
1632
|
-
- Combined: `{{ !isDraft && isPublished ? `visible` : `hidden` }}`
|
|
1633
|
-
- **Nested variables**: `{{ show ? `Name: {{name}}` : `Anonymous` }}`
|
|
1634
|
-
- **Multi-line strings**: Use backticks to preserve formatting
|
|
1635
|
-
- **Intelligent newline handling**: Empty results remove trailing newlines
|
|
1636
|
-
|
|
1637
|
-
Note: Logical operators support up to 2 operands and don't support parentheses or mixing && and ||.
|
|
1638
|
-
|
|
1639
|
-
### Example: Building Dynamic LLM Prompts
|
|
1640
|
-
|
|
1641
|
-
Combine the template engine with other utilities for powerful prompt generation:
|
|
1642
|
-
|
|
1643
|
-
```typescript
|
|
1644
|
-
import { LLMService, fromEnvironment } from 'genai-lite';
|
|
1645
|
-
import { renderTemplate, countTokens } from 'genai-lite/prompting';
|
|
1646
|
-
|
|
1647
|
-
const llm = new LLMService(fromEnvironment);
|
|
1648
|
-
|
|
1649
|
-
// Define a reusable prompt template
|
|
1650
|
-
const codeReviewTemplate = `
|
|
1651
|
-
You are an expert {{ language }} developer.
|
|
1652
|
-
|
|
1653
|
-
{{ hasGuidelines ? `Follow these coding guidelines:
|
|
1654
|
-
{{ guidelines }}
|
|
1655
|
-
|
|
1656
|
-
` : `` }}Review the following code:
|
|
1657
|
-
\`\`\`{{ language }}
|
|
1658
|
-
{{ code }}
|
|
1659
|
-
\`\`\`
|
|
1660
|
-
|
|
1661
|
-
{{ hasFocus ? `Focus on: {{ focusAreas }}` : `Provide a comprehensive review covering all aspects.` }}
|
|
1662
|
-
`;
|
|
1663
|
-
|
|
1664
|
-
// Render the prompt with specific values
|
|
1665
|
-
const prompt = renderTemplate(codeReviewTemplate, {
|
|
1666
|
-
language: 'TypeScript',
|
|
1667
|
-
hasGuidelines: true,
|
|
1668
|
-
guidelines: '- Use functional components\n- Prefer composition over inheritance',
|
|
1669
|
-
code: sourceCode,
|
|
1670
|
-
hasFocus: true,
|
|
1671
|
-
focusAreas: 'performance optimizations and error handling'
|
|
1672
|
-
});
|
|
1673
|
-
|
|
1674
|
-
// Check token count before sending
|
|
1675
|
-
const tokenCount = countTokens(prompt, 'gpt-4.1-mini');
|
|
1676
|
-
console.log(`Prompt uses ${tokenCount} tokens`);
|
|
1677
|
-
|
|
1678
|
-
// Send to LLM
|
|
1679
|
-
const response = await llm.sendMessage({
|
|
1680
|
-
providerId: 'openai',
|
|
1681
|
-
modelId: 'gpt-4.1-mini',
|
|
1682
|
-
messages: [{ role: 'user', content: prompt }]
|
|
1683
|
-
});
|
|
1684
|
-
```
|
|
1685
|
-
|
|
1686
|
-
### Prompt Engineering Utilities
|
|
1687
|
-
|
|
1688
|
-
genai-lite provides powerful utilities for working with prompts and responses:
|
|
1689
|
-
|
|
1690
|
-
#### Creating Messages from Templates
|
|
1691
|
-
|
|
1692
|
-
The recommended way to create messages is using `LLMService.createMessages`, which provides a unified API for template rendering, model context injection, and role tag parsing:
|
|
1693
|
-
|
|
1694
|
-
```typescript
|
|
1695
|
-
// Basic multi-turn conversation
|
|
1696
|
-
const { messages } = await llmService.createMessages({
|
|
1697
|
-
template: `
|
|
1698
|
-
<SYSTEM>You are a helpful assistant specialized in {{expertise}}.</SYSTEM>
|
|
1699
|
-
<USER>Help me with {{task}}</USER>
|
|
1700
|
-
<ASSISTANT>I'll help you with {{task}}. Let me analyze the requirements...</ASSISTANT>
|
|
1701
|
-
<USER>Can you provide more details?</USER>
|
|
1702
|
-
`,
|
|
1703
|
-
variables: {
|
|
1704
|
-
expertise: 'TypeScript and React',
|
|
1705
|
-
task: 'building a custom hook'
|
|
1706
|
-
},
|
|
1707
|
-
presetId: 'openai-gpt-4.1-default' // Optional: adds model context
|
|
1708
|
-
});
|
|
1709
|
-
|
|
1710
|
-
// Advanced: Adaptive prompts based on model capabilities
|
|
1711
|
-
const { messages, modelContext } = await llmService.createMessages({
|
|
1712
|
-
template: `
|
|
1713
|
-
<SYSTEM>
|
|
1714
|
-
You are a problem-solving assistant.
|
|
1715
|
-
{{ requires_tags_for_thinking ? ' For complex problems, write your reasoning in <thinking> tags before answering.' : '' }}
|
|
1716
|
-
</SYSTEM>
|
|
1717
|
-
<USER>{{ question }}</USER>
|
|
1718
|
-
`,
|
|
1719
|
-
// Note: Use requires_tags_for_thinking (NOT operator) - only instruct models that don't have active native reasoning
|
|
1720
|
-
variables: { question: 'What causes the seasons on Earth?' },
|
|
1721
|
-
presetId: 'anthropic-claude-3-7-sonnet-20250219-thinking'
|
|
1722
|
-
});
|
|
1723
|
-
|
|
1724
|
-
console.log('Model context:', modelContext);
|
|
1725
|
-
// Output: { native_reasoning_active: true, native_reasoning_capable: true, model_id: 'claude-3-7-sonnet-20250219', ... }
|
|
1726
|
-
// Note: With a reasoning model, the system prompt won't include thinking tag instructions
|
|
1727
|
-
```
|
|
1728
|
-
|
|
1729
|
-
**Low-Level Utilities:**
|
|
1730
|
-
For cases where you need template parsing without model context:
|
|
1731
|
-
|
|
1732
|
-
```typescript
|
|
1733
|
-
import { parseRoleTags, renderTemplate } from 'genai-lite/prompting';
|
|
1734
|
-
|
|
1735
|
-
// Render variables first
|
|
1736
|
-
const rendered = renderTemplate(
|
|
1737
|
-
'<SYSTEM>You are a {{role}} assistant.</SYSTEM><USER>{{query}}</USER>',
|
|
1738
|
-
{ role: 'helpful', query: 'What is TypeScript?' }
|
|
1739
|
-
);
|
|
1740
|
-
|
|
1741
|
-
// Then parse role tags
|
|
1742
|
-
const messages = parseRoleTags(rendered);
|
|
1743
|
-
// Result: [{ role: 'system', content: 'You are a helpful assistant.' }, { role: 'user', content: 'What is TypeScript?' }]
|
|
1744
|
-
```
|
|
1745
|
-
|
|
1746
|
-
#### Extracting Random Variables for Few-Shot Learning
|
|
1747
|
-
|
|
1748
|
-
Implement few-shot prompting by extracting and shuffling examples:
|
|
1749
|
-
|
|
1750
|
-
```typescript
|
|
1751
|
-
import { extractRandomVariables, renderTemplate } from 'genai-lite/prompting';
|
|
1752
|
-
|
|
1753
|
-
// Define examples in your template
|
|
1754
|
-
const examplesTemplate = `
|
|
1755
|
-
<RANDOM_INPUT>User: Translate "hello" to Spanish</RANDOM_INPUT>
|
|
1756
|
-
<RANDOM_OUTPUT>Assistant: The translation of "hello" to Spanish is "hola".</RANDOM_OUTPUT>
|
|
1757
|
-
|
|
1758
|
-
<RANDOM_INPUT>User: Translate "goodbye" to French</RANDOM_INPUT>
|
|
1759
|
-
<RANDOM_OUTPUT>Assistant: The translation of "goodbye" to French is "au revoir".</RANDOM_OUTPUT>
|
|
1760
|
-
|
|
1761
|
-
<RANDOM_INPUT>User: Translate "thank you" to German</RANDOM_INPUT>
|
|
1762
|
-
<RANDOM_OUTPUT>Assistant: The translation of "thank you" to German is "danke".</RANDOM_OUTPUT>
|
|
1763
|
-
`;
|
|
1764
|
-
|
|
1765
|
-
// Extract random variables (shuffled each time)
|
|
1766
|
-
const variables = extractRandomVariables(examplesTemplate, { maxPerTag: 2 });
|
|
1767
|
-
|
|
1768
|
-
// Use in a prompt template
|
|
1769
|
-
const promptTemplate = `
|
|
1770
|
-
You are a translation assistant. Here are some examples:
|
|
1771
|
-
|
|
1772
|
-
{{ random_input_1 }}
|
|
1773
|
-
{{ random_output_1 }}
|
|
1774
|
-
|
|
1775
|
-
{{ random_input_2 }}
|
|
1776
|
-
{{ random_output_2 }}
|
|
1777
|
-
|
|
1778
|
-
Now translate: "{{word}}" to {{language}}
|
|
1779
|
-
`;
|
|
1780
|
-
|
|
1781
|
-
const prompt = renderTemplate(promptTemplate, {
|
|
1782
|
-
...variables,
|
|
1783
|
-
word: 'please',
|
|
1784
|
-
language: 'Italian'
|
|
1785
|
-
});
|
|
1786
|
-
```
|
|
1787
|
-
|
|
1788
|
-
#### Parsing Structured LLM Responses
|
|
1789
|
-
|
|
1790
|
-
Extract structured data from LLM responses using custom tags:
|
|
1791
|
-
|
|
1792
|
-
```typescript
|
|
1793
|
-
import { parseStructuredContent } from 'genai-lite/prompting';
|
|
1794
|
-
|
|
1795
|
-
// Example LLM response with structured output
|
|
1796
|
-
const llmResponse = `
|
|
1797
|
-
Let me analyze this code for you.
|
|
1798
|
-
|
|
1799
|
-
<ANALYSIS>
|
|
1800
|
-
The code has good structure but could benefit from:
|
|
1801
|
-
1. Better error handling in the API calls
|
|
1802
|
-
2. Memoization for expensive computations
|
|
1803
|
-
3. More descriptive variable names
|
|
1804
|
-
</ANALYSIS>
|
|
1805
|
-
|
|
1806
|
-
<SUGGESTIONS>
|
|
1807
|
-
- Add try-catch blocks around async operations
|
|
1808
|
-
- Use React.memo() for the expensive component
|
|
1809
|
-
- Rename 'data' to 'userData' for clarity
|
|
1810
|
-
</SUGGESTIONS>
|
|
1811
|
-
|
|
1812
|
-
<REFACTORED_CODE>
|
|
1813
|
-
const UserProfile = React.memo(({ userId }) => {
|
|
1814
|
-
const [userData, setUserData] = useState(null);
|
|
1815
|
-
|
|
1816
|
-
useEffect(() => {
|
|
1817
|
-
fetchUserData(userId)
|
|
1818
|
-
.then(setUserData)
|
|
1819
|
-
.catch(error => console.error('Failed to load user:', error));
|
|
1820
|
-
}, [userId]);
|
|
1821
|
-
|
|
1822
|
-
return userData ? <Profile data={userData} /> : <Loading />;
|
|
1823
|
-
});
|
|
1824
|
-
</REFACTORED_CODE>
|
|
1825
|
-
`;
|
|
1826
|
-
|
|
1827
|
-
// Parse the structured content
|
|
1828
|
-
const parsed = parseStructuredContent(llmResponse, [
|
|
1829
|
-
'ANALYSIS',
|
|
1830
|
-
'SUGGESTIONS',
|
|
1831
|
-
'REFACTORED_CODE'
|
|
1832
|
-
]);
|
|
1833
|
-
|
|
1834
|
-
console.log(parsed.ANALYSIS); // The analysis text
|
|
1835
|
-
console.log(parsed.SUGGESTIONS); // The suggestions text
|
|
1836
|
-
console.log(parsed.REFACTORED_CODE); // The refactored code
|
|
1837
|
-
```
|
|
1838
|
-
|
|
1839
|
-
These utilities enable:
|
|
1840
|
-
- **Structured Conversations**: Build multi-turn conversations from templates with model context awareness
|
|
1841
|
-
- **Few-Shot Learning**: Randomly sample examples to improve AI responses
|
|
1842
|
-
- **Reliable Output Parsing**: Extract specific sections from AI responses
|
|
1843
|
-
- **Automatic Thinking Extraction**: Capture reasoning from any model using XML tags
|
|
1844
|
-
- **Template Reusability**: Define templates once, use with different variables
|
|
1845
|
-
- **Type Safety**: Full TypeScript support with LLMMessage types
|
|
1846
|
-
|
|
1847
|
-
## Examples
|
|
1848
|
-
|
|
1849
|
-
genai-lite includes two complete demo applications:
|
|
1850
|
-
|
|
1851
|
-
- **[chat-demo](examples/chat-demo)** - Full-featured LLM chat interface with all providers, template rendering, and advanced features
|
|
1852
|
-
- **[image-gen-demo](examples/image-gen-demo)** - Interactive image generation UI with OpenAI and local diffusion support
|
|
1853
|
-
|
|
1854
|
-
Both are production-ready React + Express applications that demonstrate library features and serve as testing environments.
|
|
1855
|
-
|
|
1856
|
-
## Contributing
|
|
1857
|
-
|
|
1858
|
-
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
|
1859
|
-
|
|
1860
|
-
### Development
|
|
1861
|
-
|
|
1862
|
-
```bash
|
|
1863
|
-
# Install dependencies
|
|
1864
173
|
npm install
|
|
1865
|
-
|
|
1866
|
-
# Build the project
|
|
1867
174
|
npm run build
|
|
1868
|
-
|
|
1869
|
-
# Run tests (when available)
|
|
1870
175
|
npm test
|
|
1871
176
|
```
|
|
1872
177
|
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
The project includes an end-to-end test suite that makes real API calls to providers. These tests are separate from the main unit test suite and are not run in CI by default.
|
|
1876
|
-
|
|
1877
|
-
To run these tests locally, you must first provide API keys as environment variables with the `E2E_` prefix:
|
|
1878
|
-
|
|
1879
|
-
```bash
|
|
1880
|
-
export E2E_OPENAI_API_KEY="sk-..."
|
|
1881
|
-
export E2E_ANTHROPIC_API_KEY="sk-ant-..."
|
|
1882
|
-
export E2E_GEMINI_API_KEY="AIza..."
|
|
1883
|
-
```
|
|
1884
|
-
|
|
1885
|
-
Then, run the E2E test script:
|
|
1886
|
-
|
|
1887
|
-
```bash
|
|
1888
|
-
npm run test:e2e
|
|
1889
|
-
```
|
|
1890
|
-
|
|
1891
|
-
The tests will automatically skip any provider for which an API key is not found.
|
|
178
|
+
See **[Troubleshooting](./genai-lite-docs/troubleshooting.md)** for information about E2E tests and development workflows.
|
|
1892
179
|
|
|
1893
180
|
## License
|
|
1894
181
|
|
|
@@ -1896,4 +183,4 @@ This project is licensed under the MIT License - see the LICENSE file for detail
|
|
|
1896
183
|
|
|
1897
184
|
## Acknowledgments
|
|
1898
185
|
|
|
1899
|
-
Originally developed as part of the Athanor project, genai-lite has been extracted and made standalone to benefit the wider developer community.
|
|
186
|
+
Originally developed as part of the Athanor project, genai-lite has been extracted and made standalone to benefit the wider developer community.
|