llm_conductor 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -0
- data/README.md +88 -2
- data/VISION_USAGE.md +233 -0
- data/examples/openrouter_vision_usage.rb +108 -0
- data/lib/llm_conductor/clients/groq_client.rb +3 -4
- data/lib/llm_conductor/clients/openrouter_client.rb +112 -7
- data/lib/llm_conductor/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0ca46783dd713d49b3292342d83f5adde4a0da684e4365004651464e6ac630bb
|
|
4
|
+
data.tar.gz: 5f693e2e4d8da70bebe5faf174a71e1880a705eb7dc6468d4fdf6774b8f2e9f3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 70ccb3ae2317588199a2820f1da19188e5ada27da19052de9cc964aeac775bbdd8feae350c958e797676eb8cbaf6549966591ad1299a1088ceeb8cfcbf70dc35
|
|
7
|
+
data.tar.gz: d24c1ff9423b009b2d50a35d6b122817174d5871813db037ee15378938cfd88aaf9dbc4765325bd65cf99d9fbbbeaae2a2d5ad926677efc6b36187209ddccf09
|
data/.rubocop.yml
CHANGED
|
@@ -33,6 +33,7 @@ Metrics/MethodLength:
|
|
|
33
33
|
Max: 15
|
|
34
34
|
Exclude:
|
|
35
35
|
- 'lib/llm_conductor/prompts.rb'
|
|
36
|
+
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
36
37
|
|
|
37
38
|
RSpec/ExampleLength:
|
|
38
39
|
Enabled: false
|
|
@@ -89,14 +90,17 @@ Metrics/BlockLength:
|
|
|
89
90
|
Metrics/AbcSize:
|
|
90
91
|
Exclude:
|
|
91
92
|
- 'lib/llm_conductor/prompts.rb'
|
|
93
|
+
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
92
94
|
|
|
93
95
|
Metrics/CyclomaticComplexity:
|
|
94
96
|
Exclude:
|
|
95
97
|
- 'lib/llm_conductor/prompts.rb'
|
|
98
|
+
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
96
99
|
|
|
97
100
|
Metrics/PerceivedComplexity:
|
|
98
101
|
Exclude:
|
|
99
102
|
- 'lib/llm_conductor/prompts.rb'
|
|
103
|
+
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
100
104
|
|
|
101
105
|
Layout/LineLength:
|
|
102
106
|
Max: 120
|
data/README.md
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# LLM Conductor
|
|
2
2
|
|
|
3
|
-
A powerful Ruby gem from [Ekohe](https://ekohe.com) for orchestrating multiple Language Model providers with a unified, modern interface. LLM Conductor provides seamless integration with OpenAI GPT, Anthropic Claude, Google Gemini, Groq, and
|
|
3
|
+
A powerful Ruby gem from [Ekohe](https://ekohe.com) for orchestrating multiple Language Model providers with a unified, modern interface. LLM Conductor provides seamless integration with OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, and OpenRouter with advanced prompt management, data building patterns, vision/multimodal support, and comprehensive response handling.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
🚀 **Multi-Provider Support** - OpenAI GPT, Anthropic Claude, Google Gemini, Groq, and
|
|
7
|
+
🚀 **Multi-Provider Support** - OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, and OpenRouter with automatic vendor detection
|
|
8
8
|
🎯 **Unified Modern API** - Simple `LlmConductor.generate()` interface with rich Response objects
|
|
9
|
+
🖼️ **Vision/Multimodal Support** - Send images alongside text prompts for vision-enabled models (OpenRouter)
|
|
9
10
|
📝 **Advanced Prompt Management** - Registrable prompt classes with inheritance and templating
|
|
10
11
|
🏗️ **Data Builder Pattern** - Structured data preparation for complex LLM inputs
|
|
11
12
|
⚡ **Smart Configuration** - Rails-style configuration with environment variable support
|
|
@@ -114,6 +115,11 @@ LlmConductor.configure do |config|
|
|
|
114
115
|
base_url: ENV['OLLAMA_ADDRESS'] || 'http://localhost:11434'
|
|
115
116
|
)
|
|
116
117
|
|
|
118
|
+
config.openrouter(
|
|
119
|
+
api_key: ENV['OPENROUTER_API_KEY'],
|
|
120
|
+
uri_base: 'https://openrouter.ai/api/v1' # Optional, this is the default
|
|
121
|
+
)
|
|
122
|
+
|
|
117
123
|
# Optional: Configure custom logger
|
|
118
124
|
config.logger = Logger.new($stdout) # Log to stdout
|
|
119
125
|
config.logger = Logger.new('log/llm_conductor.log') # Log to file
|
|
@@ -153,6 +159,7 @@ The gem automatically detects these environment variables:
|
|
|
153
159
|
- `GEMINI_API_KEY` - Google Gemini API key
|
|
154
160
|
- `GROQ_API_KEY` - Groq API key
|
|
155
161
|
- `OLLAMA_ADDRESS` - Ollama server address
|
|
162
|
+
- `OPENROUTER_API_KEY` - OpenRouter API key
|
|
156
163
|
|
|
157
164
|
## Supported Providers & Models
|
|
158
165
|
|
|
@@ -223,6 +230,85 @@ response = LlmConductor.generate(
|
|
|
223
230
|
)
|
|
224
231
|
```
|
|
225
232
|
|
|
233
|
+
### OpenRouter (Access to Multiple Providers)
|
|
234
|
+
OpenRouter provides unified access to various LLM providers with automatic routing. It also supports vision/multimodal models with automatic retry logic for handling intermittent availability issues.
|
|
235
|
+
|
|
236
|
+
**Vision-capable models:**
|
|
237
|
+
- `nvidia/nemotron-nano-12b-v2-vl:free` - **FREE** 12B vision model (may need retries)
|
|
238
|
+
- `openai/gpt-4o-mini` - Fast and reliable
|
|
239
|
+
- `google/gemini-flash-1.5` - Fast vision processing
|
|
240
|
+
- `anthropic/claude-3.5-sonnet` - High quality analysis
|
|
241
|
+
- `openai/gpt-4o` - Best quality (higher cost)
|
|
242
|
+
|
|
243
|
+
**Note:** Free-tier models may experience intermittent 502 errors. The client includes automatic retry logic with exponential backoff (up to 5 retries) to handle these transient failures.
|
|
244
|
+
|
|
245
|
+
```ruby
|
|
246
|
+
# Text-only request
|
|
247
|
+
response = LlmConductor.generate(
|
|
248
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
249
|
+
vendor: :openrouter,
|
|
250
|
+
prompt: 'Your prompt here'
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Vision/multimodal request with single image
|
|
254
|
+
response = LlmConductor.generate(
|
|
255
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
256
|
+
vendor: :openrouter,
|
|
257
|
+
prompt: {
|
|
258
|
+
text: 'What is in this image?',
|
|
259
|
+
images: 'https://example.com/image.jpg'
|
|
260
|
+
}
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Vision request with multiple images
|
|
264
|
+
response = LlmConductor.generate(
|
|
265
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
266
|
+
vendor: :openrouter,
|
|
267
|
+
prompt: {
|
|
268
|
+
text: 'Compare these images',
|
|
269
|
+
images: [
|
|
270
|
+
'https://example.com/image1.jpg',
|
|
271
|
+
'https://example.com/image2.jpg'
|
|
272
|
+
]
|
|
273
|
+
}
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Vision request with detail level
|
|
277
|
+
response = LlmConductor.generate(
|
|
278
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
279
|
+
vendor: :openrouter,
|
|
280
|
+
prompt: {
|
|
281
|
+
text: 'Describe this image in detail',
|
|
282
|
+
images: [
|
|
283
|
+
{ url: 'https://example.com/image.jpg', detail: 'high' }
|
|
284
|
+
]
|
|
285
|
+
}
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Advanced: Raw array format (OpenAI-compatible)
|
|
289
|
+
response = LlmConductor.generate(
|
|
290
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
291
|
+
vendor: :openrouter,
|
|
292
|
+
prompt: [
|
|
293
|
+
{ type: 'text', text: 'What is in this image?' },
|
|
294
|
+
{ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }
|
|
295
|
+
]
|
|
296
|
+
)
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
**Reliability:** The OpenRouter client includes intelligent retry logic:
|
|
300
|
+
- Automatically retries on 502 errors (up to 5 attempts)
|
|
301
|
+
- Exponential backoff: 2s, 4s, 8s, 16s, 32s
|
|
302
|
+
- Transparent to your code - works seamlessly
|
|
303
|
+
- Enable logging to see retry attempts:
|
|
304
|
+
|
|
305
|
+
```ruby
|
|
306
|
+
LlmConductor.configure do |config|
|
|
307
|
+
config.logger = Logger.new($stdout)
|
|
308
|
+
config.logger.level = Logger::INFO
|
|
309
|
+
end
|
|
310
|
+
```
|
|
311
|
+
|
|
226
312
|
### Vendor Detection
|
|
227
313
|
|
|
228
314
|
The gem automatically detects the appropriate provider based on model names:
|
data/VISION_USAGE.md
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# Vision/Multimodal Usage Guide
|
|
2
|
+
|
|
3
|
+
This guide explains how to use vision/multimodal capabilities with the OpenRouter client in LLM Conductor.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
require 'llm_conductor'
|
|
9
|
+
|
|
10
|
+
# Configure
|
|
11
|
+
LlmConductor.configure do |config|
|
|
12
|
+
config.openrouter(api_key: ENV['OPENROUTER_API_KEY'])
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Analyze an image
|
|
16
|
+
response = LlmConductor.generate(
|
|
17
|
+
model: 'openai/gpt-4o-mini',
|
|
18
|
+
vendor: :openrouter,
|
|
19
|
+
prompt: {
|
|
20
|
+
text: 'What is in this image?',
|
|
21
|
+
images: 'https://example.com/image.jpg'
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
puts response.output
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Recommended Models
|
|
29
|
+
|
|
30
|
+
For vision tasks via OpenRouter, these models work reliably:
|
|
31
|
+
|
|
32
|
+
- **`openai/gpt-4o-mini`** - Fast, reliable, good balance of cost/quality ✅
|
|
33
|
+
- **`google/gemini-flash-1.5`** - Fast vision processing
|
|
34
|
+
- **`anthropic/claude-3.5-sonnet`** - High quality analysis
|
|
35
|
+
- **`openai/gpt-4o`** - Best quality (higher cost)
|
|
36
|
+
|
|
37
|
+
## Usage Formats
|
|
38
|
+
|
|
39
|
+
### 1. Single Image (Simple Format)
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
response = LlmConductor.generate(
|
|
43
|
+
model: 'openai/gpt-4o-mini',
|
|
44
|
+
vendor: :openrouter,
|
|
45
|
+
prompt: {
|
|
46
|
+
text: 'Describe this image',
|
|
47
|
+
images: 'https://example.com/image.jpg'
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 2. Multiple Images
|
|
53
|
+
|
|
54
|
+
```ruby
|
|
55
|
+
response = LlmConductor.generate(
|
|
56
|
+
model: 'openai/gpt-4o-mini',
|
|
57
|
+
vendor: :openrouter,
|
|
58
|
+
prompt: {
|
|
59
|
+
text: 'Compare these images',
|
|
60
|
+
images: [
|
|
61
|
+
'https://example.com/image1.jpg',
|
|
62
|
+
'https://example.com/image2.jpg',
|
|
63
|
+
'https://example.com/image3.jpg'
|
|
64
|
+
]
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### 3. Image with Detail Level
|
|
70
|
+
|
|
71
|
+
For high-resolution images, specify the detail level:
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
response = LlmConductor.generate(
|
|
75
|
+
model: 'openai/gpt-4o-mini',
|
|
76
|
+
vendor: :openrouter,
|
|
77
|
+
prompt: {
|
|
78
|
+
text: 'Analyze this image in detail',
|
|
79
|
+
images: [
|
|
80
|
+
{ url: 'https://example.com/hires-image.jpg', detail: 'high' }
|
|
81
|
+
]
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Detail levels:
|
|
87
|
+
- `'high'` - Better for detailed analysis (uses more tokens)
|
|
88
|
+
- `'low'` - Faster, cheaper (default if not specified)
|
|
89
|
+
- `'auto'` - Let the model decide
|
|
90
|
+
|
|
91
|
+
### 4. Raw Format (Advanced)
|
|
92
|
+
|
|
93
|
+
For maximum control, use the OpenAI-compatible array format:
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
response = LlmConductor.generate(
|
|
97
|
+
model: 'openai/gpt-4o-mini',
|
|
98
|
+
vendor: :openrouter,
|
|
99
|
+
prompt: [
|
|
100
|
+
{ type: 'text', text: 'What is in this image?' },
|
|
101
|
+
{ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } },
|
|
102
|
+
{ type: 'text', text: 'Describe it in detail.' }
|
|
103
|
+
]
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Text-Only Requests (Backward Compatible)
|
|
108
|
+
|
|
109
|
+
The client still supports regular text-only requests:
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
response = LlmConductor.generate(
|
|
113
|
+
model: 'openai/gpt-4o-mini',
|
|
114
|
+
vendor: :openrouter,
|
|
115
|
+
prompt: 'What is the capital of France?'
|
|
116
|
+
)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Image URL Requirements
|
|
120
|
+
|
|
121
|
+
- Images must be publicly accessible URLs
|
|
122
|
+
- Supported formats: JPEG, PNG, GIF, WebP
|
|
123
|
+
- Maximum file size depends on the model
|
|
124
|
+
- Use HTTPS URLs when possible
|
|
125
|
+
|
|
126
|
+
## Error Handling
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
response = LlmConductor.generate(
|
|
130
|
+
model: 'openai/gpt-4o-mini',
|
|
131
|
+
vendor: :openrouter,
|
|
132
|
+
prompt: {
|
|
133
|
+
text: 'Analyze this',
|
|
134
|
+
images: 'https://example.com/image.jpg'
|
|
135
|
+
}
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if response.success?
|
|
139
|
+
puts response.output
|
|
140
|
+
else
|
|
141
|
+
puts "Error: #{response.metadata[:error]}"
|
|
142
|
+
end
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Testing in Development
|
|
146
|
+
|
|
147
|
+
### Interactive Console
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
./bin/console
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Then:
|
|
154
|
+
|
|
155
|
+
```ruby
|
|
156
|
+
LlmConductor.configure do |config|
|
|
157
|
+
config.openrouter(api_key: 'your-key')
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
response = LlmConductor.generate(
|
|
161
|
+
model: 'openai/gpt-4o-mini',
|
|
162
|
+
vendor: :openrouter,
|
|
163
|
+
prompt: {
|
|
164
|
+
text: 'What is this?',
|
|
165
|
+
images: 'https://example.com/image.jpg'
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Run Examples
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
export OPENROUTER_API_KEY='your-key'
|
|
174
|
+
ruby examples/openrouter_vision_usage.rb
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Token Counting
|
|
178
|
+
|
|
179
|
+
Token counting for multimodal requests counts only the text portion. Image tokens vary by:
|
|
180
|
+
- Image size
|
|
181
|
+
- Detail level specified
|
|
182
|
+
- Model being used
|
|
183
|
+
|
|
184
|
+
The gem provides an approximation based on text tokens. For precise billing, check the OpenRouter dashboard.
|
|
185
|
+
|
|
186
|
+
## Common Issues
|
|
187
|
+
|
|
188
|
+
### 502 Server Error
|
|
189
|
+
|
|
190
|
+
If you get a 502 error:
|
|
191
|
+
- The model might be unavailable
|
|
192
|
+
- Try a different model (e.g., switch to `openai/gpt-4o-mini`)
|
|
193
|
+
- Free tier models may be overloaded
|
|
194
|
+
|
|
195
|
+
### "No implicit conversion of Hash into String"
|
|
196
|
+
|
|
197
|
+
This was fixed in the current version. Make sure you're using the latest version of the gem.
|
|
198
|
+
|
|
199
|
+
### Image Not Loading
|
|
200
|
+
|
|
201
|
+
- Verify the URL is publicly accessible
|
|
202
|
+
- Check that the image format is supported
|
|
203
|
+
- Try a smaller image size
|
|
204
|
+
|
|
205
|
+
## Cost Considerations
|
|
206
|
+
|
|
207
|
+
Vision models are more expensive than text-only models. Costs vary by:
|
|
208
|
+
|
|
209
|
+
- **Model choice**: GPT-4o > GPT-4o-mini > Gemini Flash
|
|
210
|
+
- **Detail level**: `high` uses more tokens than `low`
|
|
211
|
+
- **Image count**: Each image adds to the cost
|
|
212
|
+
- **Image size**: Larger images may use more tokens
|
|
213
|
+
|
|
214
|
+
For development, use:
|
|
215
|
+
- `openai/gpt-4o-mini` for cost-effective testing
|
|
216
|
+
- `detail: 'low'` for quick analysis
|
|
217
|
+
- Single images when possible
|
|
218
|
+
|
|
219
|
+
For production:
|
|
220
|
+
- Use `openai/gpt-4o` for best quality
|
|
221
|
+
- Use `detail: 'high'` when needed
|
|
222
|
+
- Monitor costs via OpenRouter dashboard
|
|
223
|
+
|
|
224
|
+
## Examples
|
|
225
|
+
|
|
226
|
+
See `examples/openrouter_vision_usage.rb` for complete working examples.
|
|
227
|
+
|
|
228
|
+
## Further Reading
|
|
229
|
+
|
|
230
|
+
- [OpenRouter Documentation](https://openrouter.ai/docs)
|
|
231
|
+
- [OpenAI Vision API Reference](https://platform.openai.com/docs/guides/vision)
|
|
232
|
+
- [Anthropic Claude Vision](https://docs.anthropic.com/claude/docs/vision)
|
|
233
|
+
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example of OpenRouter vision/multimodal usage
|
|
5
|
+
require_relative '../lib/llm_conductor'
|
|
6
|
+
|
|
7
|
+
# Configure OpenRouter
|
|
8
|
+
LlmConductor.configure do |config|
|
|
9
|
+
config.openrouter(
|
|
10
|
+
api_key: ENV['OPENROUTER_API_KEY']
|
|
11
|
+
)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Example 1: Simple text-only request (backward compatible)
|
|
15
|
+
puts '=== Example 1: Text-only request ==='
|
|
16
|
+
response = LlmConductor.generate(
|
|
17
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free', # Free vision-capable model
|
|
18
|
+
vendor: :openrouter,
|
|
19
|
+
prompt: 'What is the capital of France?'
|
|
20
|
+
)
|
|
21
|
+
puts response.output
|
|
22
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
23
|
+
|
|
24
|
+
# Example 2: Vision request with a single image
|
|
25
|
+
puts '=== Example 2: Single image analysis ==='
|
|
26
|
+
response = LlmConductor.generate(
|
|
27
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
28
|
+
vendor: :openrouter,
|
|
29
|
+
prompt: {
|
|
30
|
+
text: 'What is in this image?',
|
|
31
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
32
|
+
}
|
|
33
|
+
)
|
|
34
|
+
puts response.output
|
|
35
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
36
|
+
|
|
37
|
+
# Example 3: Vision request with multiple images
|
|
38
|
+
puts '=== Example 3: Multiple images comparison ==='
|
|
39
|
+
response = LlmConductor.generate(
|
|
40
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
41
|
+
vendor: :openrouter,
|
|
42
|
+
prompt: {
|
|
43
|
+
text: 'Compare these two images and describe the differences.',
|
|
44
|
+
images: [
|
|
45
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
46
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/681px-Placeholder_view_vector.svg.png'
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
puts response.output
|
|
51
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
52
|
+
|
|
53
|
+
# Example 4: Image with detail level specification
|
|
54
|
+
puts '=== Example 4: Image with detail level ==='
|
|
55
|
+
response = LlmConductor.generate(
|
|
56
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
57
|
+
vendor: :openrouter,
|
|
58
|
+
prompt: {
|
|
59
|
+
text: 'Describe this image in detail.',
|
|
60
|
+
images: [
|
|
61
|
+
{
|
|
62
|
+
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
63
|
+
detail: 'high'
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
puts response.output
|
|
69
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
70
|
+
|
|
71
|
+
# Example 5: Using raw array format (advanced)
|
|
72
|
+
puts '=== Example 5: Raw array format ==='
|
|
73
|
+
response = LlmConductor.generate(
|
|
74
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
75
|
+
vendor: :openrouter,
|
|
76
|
+
prompt: [
|
|
77
|
+
{ type: 'text', text: 'What is in this image?' },
|
|
78
|
+
{
|
|
79
|
+
type: 'image_url',
|
|
80
|
+
image_url: {
|
|
81
|
+
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
]
|
|
85
|
+
)
|
|
86
|
+
puts response.output
|
|
87
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
88
|
+
|
|
89
|
+
# Example 6: Error handling
|
|
90
|
+
puts '=== Example 6: Error handling ==='
|
|
91
|
+
begin
|
|
92
|
+
response = LlmConductor.generate(
|
|
93
|
+
model: 'nvidia/nemotron-nano-12b-v2-vl:free',
|
|
94
|
+
vendor: :openrouter,
|
|
95
|
+
prompt: {
|
|
96
|
+
text: 'Analyze this image',
|
|
97
|
+
images: 'invalid-url'
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if response.success?
|
|
102
|
+
puts response.output
|
|
103
|
+
else
|
|
104
|
+
puts "Error: #{response.metadata[:error]}"
|
|
105
|
+
end
|
|
106
|
+
rescue StandardError => e
|
|
107
|
+
puts "Exception: #{e.message}"
|
|
108
|
+
end
|
|
@@ -7,10 +7,9 @@ module LlmConductor
|
|
|
7
7
|
private
|
|
8
8
|
|
|
9
9
|
def generate_content(prompt)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
).dig('choices', 0, 'message', 'content')
|
|
10
|
+
# Groq::Client.chat expects messages as positional arg, not keyword arg
|
|
11
|
+
messages = [{ role: 'user', content: prompt }]
|
|
12
|
+
client.chat(messages, model_id: model)['content']
|
|
14
13
|
end
|
|
15
14
|
|
|
16
15
|
def client
|
|
@@ -3,17 +3,122 @@
|
|
|
3
3
|
module LlmConductor
|
|
4
4
|
module Clients
|
|
5
5
|
# OpenRouter client implementation for accessing various LLM providers through OpenRouter API
|
|
6
|
+
# Supports both text-only and multimodal (vision) requests
|
|
6
7
|
class OpenrouterClient < BaseClient
|
|
7
8
|
private
|
|
8
9
|
|
|
10
|
+
# Override token calculation to handle multimodal content
|
|
11
|
+
def calculate_tokens(content)
|
|
12
|
+
case content
|
|
13
|
+
when String
|
|
14
|
+
super(content)
|
|
15
|
+
when Hash
|
|
16
|
+
# For multimodal content, count tokens only for text part
|
|
17
|
+
# Note: This is an approximation as images have variable token counts
|
|
18
|
+
text = content[:text] || content['text'] || ''
|
|
19
|
+
super(text)
|
|
20
|
+
when Array
|
|
21
|
+
# For pre-formatted arrays, extract and count text parts
|
|
22
|
+
text_parts = content.select { |part| part[:type] == 'text' || part['type'] == 'text' }
|
|
23
|
+
.map { |part| part[:text] || part['text'] || '' }
|
|
24
|
+
.join(' ')
|
|
25
|
+
super(text_parts)
|
|
26
|
+
else
|
|
27
|
+
super(content.to_s)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
9
31
|
def generate_content(prompt)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
32
|
+
content = format_content(prompt)
|
|
33
|
+
|
|
34
|
+
# Retry logic for transient 502 errors (common with free-tier models)
|
|
35
|
+
# Free-tier vision models can be slow/overloaded, so we use more retries
|
|
36
|
+
max_retries = 5
|
|
37
|
+
retry_count = 0
|
|
38
|
+
|
|
39
|
+
begin
|
|
40
|
+
client.chat(
|
|
41
|
+
parameters: {
|
|
42
|
+
model:,
|
|
43
|
+
messages: [{ role: 'user', content: }],
|
|
44
|
+
provider: { sort: 'throughput' }
|
|
45
|
+
}
|
|
46
|
+
).dig('choices', 0, 'message', 'content')
|
|
47
|
+
rescue Faraday::ServerError => e
|
|
48
|
+
retry_count += 1
|
|
49
|
+
|
|
50
|
+
# Log retry attempts if logger is configured
|
|
51
|
+
configuration.logger&.warn(
|
|
52
|
+
"OpenRouter API error (attempt #{retry_count}/#{max_retries}): #{e.message}"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
raise unless e.response[:status] == 502 && retry_count < max_retries
|
|
56
|
+
|
|
57
|
+
wait_time = 2**retry_count # Exponential backoff: 2, 4, 8, 16, 32 seconds
|
|
58
|
+
configuration.logger&.info("Retrying in #{wait_time}s...")
|
|
59
|
+
sleep(wait_time)
|
|
60
|
+
retry
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Format content based on whether it's a simple string or multimodal content
|
|
65
|
+
# @param prompt [String, Hash, Array] The prompt content
|
|
66
|
+
# @return [String, Array] Formatted content for the API
|
|
67
|
+
def format_content(prompt)
|
|
68
|
+
case prompt
|
|
69
|
+
when Hash
|
|
70
|
+
# Handle hash with text and/or images
|
|
71
|
+
format_multimodal_hash(prompt)
|
|
72
|
+
when Array
|
|
73
|
+
# Already formatted as array of content parts
|
|
74
|
+
prompt
|
|
75
|
+
else
|
|
76
|
+
# Simple string prompt
|
|
77
|
+
prompt.to_s
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Format a hash containing text and/or images into multimodal content array
|
|
82
|
+
# @param prompt_hash [Hash] Hash with :text and/or :images keys
|
|
83
|
+
# @return [Array] Array of content parts for the API
|
|
84
|
+
def format_multimodal_hash(prompt_hash)
|
|
85
|
+
content_parts = []
|
|
86
|
+
|
|
87
|
+
# Add text part if present
|
|
88
|
+
if prompt_hash[:text] || prompt_hash['text']
|
|
89
|
+
text = prompt_hash[:text] || prompt_hash['text']
|
|
90
|
+
content_parts << { type: 'text', text: }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Add image parts if present
|
|
94
|
+
images = prompt_hash[:images] || prompt_hash['images'] || []
|
|
95
|
+
images = [images] unless images.is_a?(Array)
|
|
96
|
+
|
|
97
|
+
images.each do |image|
|
|
98
|
+
content_parts << format_image_part(image)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
content_parts
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Format an image into the appropriate API structure
|
|
105
|
+
# @param image [String, Hash] Image URL or hash with url/detail keys
|
|
106
|
+
# @return [Hash] Formatted image part for the API
|
|
107
|
+
def format_image_part(image)
|
|
108
|
+
case image
|
|
109
|
+
when String
|
|
110
|
+
# Simple URL string
|
|
111
|
+
{ type: 'image_url', image_url: { url: image } }
|
|
112
|
+
when Hash
|
|
113
|
+
# Hash with url and optional detail level
|
|
114
|
+
{
|
|
115
|
+
type: 'image_url',
|
|
116
|
+
image_url: {
|
|
117
|
+
url: image[:url] || image['url'],
|
|
118
|
+
detail: image[:detail] || image['detail']
|
|
119
|
+
}.compact
|
|
15
120
|
}
|
|
16
|
-
|
|
121
|
+
end
|
|
17
122
|
end
|
|
18
123
|
|
|
19
124
|
def client
|
|
@@ -21,7 +126,7 @@ module LlmConductor
|
|
|
21
126
|
config = LlmConductor.configuration.provider_config(:openrouter)
|
|
22
127
|
OpenAI::Client.new(
|
|
23
128
|
access_token: config[:api_key],
|
|
24
|
-
uri_base: config[:uri_base] || 'https://openrouter.ai/api/'
|
|
129
|
+
uri_base: config[:uri_base] || 'https://openrouter.ai/api/v1'
|
|
25
130
|
)
|
|
26
131
|
end
|
|
27
132
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm_conductor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ben Zheng
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2025-10-
|
|
10
|
+
date: 2025-10-29 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: activesupport
|
|
@@ -152,10 +152,12 @@ files:
|
|
|
152
152
|
- LICENSE
|
|
153
153
|
- README.md
|
|
154
154
|
- Rakefile
|
|
155
|
+
- VISION_USAGE.md
|
|
155
156
|
- config/initializers/llm_conductor.rb
|
|
156
157
|
- examples/data_builder_usage.rb
|
|
157
158
|
- examples/gemini_usage.rb
|
|
158
159
|
- examples/groq_usage.rb
|
|
160
|
+
- examples/openrouter_vision_usage.rb
|
|
159
161
|
- examples/prompt_registration.rb
|
|
160
162
|
- examples/rag_usage.rb
|
|
161
163
|
- examples/simple_usage.rb
|