llm_conductor 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/VISION_USAGE.md +158 -8
- data/examples/claude_vision_usage.rb +138 -0
- data/examples/gemini_usage.rb +1 -1
- data/examples/gemini_vision_usage.rb +168 -0
- data/examples/gpt_vision_usage.rb +156 -0
- data/lib/llm_conductor/clients/anthropic_client.rb +28 -1
- data/lib/llm_conductor/clients/concerns/vision_support.rb +159 -0
- data/lib/llm_conductor/clients/gemini_client.rb +105 -1
- data/lib/llm_conductor/clients/gpt_client.rb +7 -1
- data/lib/llm_conductor/clients/openrouter_client.rb +4 -81
- data/lib/llm_conductor/clients/zai_client.rb +4 -81
- data/lib/llm_conductor/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e5bb3310ea1328acac93c59e7bc227e63de52397e51402eee0eb921eb92acc8
|
|
4
|
+
data.tar.gz: cacb73f7d04e46a100581df3b77781a98d0d4277fc726a0b32c266443999f66a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 97d9c89718834420532c391c207790b416048b65958f3b6d7feac008f099cc533c644c614a014bc437b1631cf9fa60c2d0e340b8bea8316217e950af5449764b
|
|
7
|
+
data.tar.gz: 4fb3301001cebc258485568ebfa0078b3773b3563dd5149c8cd7527cc9a306ae41785fea6a73f09d1dca19a09f1af92b88611efa283e80efed317ef876f59330
|
data/.rubocop.yml
CHANGED
data/VISION_USAGE.md
CHANGED
|
@@ -1,9 +1,55 @@
|
|
|
1
1
|
# Vision/Multimodal Usage Guide
|
|
2
2
|
|
|
3
|
-
This guide explains how to use vision/multimodal capabilities with
|
|
3
|
+
This guide explains how to use vision/multimodal capabilities with LLM Conductor. Vision support is available for Claude (Anthropic), GPT (OpenAI), Gemini (Google), OpenRouter, and Z.ai clients.
|
|
4
4
|
|
|
5
5
|
## Quick Start
|
|
6
6
|
|
|
7
|
+
### Using Claude (Anthropic)
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
require 'llm_conductor'
|
|
11
|
+
|
|
12
|
+
# Configure
|
|
13
|
+
LlmConductor.configure do |config|
|
|
14
|
+
config.anthropic(api_key: ENV['ANTHROPIC_API_KEY'])
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Analyze an image
|
|
18
|
+
response = LlmConductor.generate(
|
|
19
|
+
model: 'claude-sonnet-4-20250514',
|
|
20
|
+
vendor: :anthropic,
|
|
21
|
+
prompt: {
|
|
22
|
+
text: 'What is in this image?',
|
|
23
|
+
images: 'https://example.com/image.jpg'
|
|
24
|
+
}
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
puts response.output
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Using GPT (OpenAI)
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
require 'llm_conductor'
|
|
34
|
+
|
|
35
|
+
# Configure
|
|
36
|
+
LlmConductor.configure do |config|
|
|
37
|
+
config.openai(api_key: ENV['OPENAI_API_KEY'])
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Analyze an image
|
|
41
|
+
response = LlmConductor.generate(
|
|
42
|
+
model: 'gpt-4o',
|
|
43
|
+
vendor: :openai,
|
|
44
|
+
prompt: {
|
|
45
|
+
text: 'What is in this image?',
|
|
46
|
+
images: 'https://example.com/image.jpg'
|
|
47
|
+
}
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
puts response.output
|
|
51
|
+
```
|
|
52
|
+
|
|
7
53
|
### Using OpenRouter
|
|
8
54
|
|
|
9
55
|
```ruby
|
|
@@ -27,6 +73,29 @@ response = LlmConductor.generate(
|
|
|
27
73
|
puts response.output
|
|
28
74
|
```
|
|
29
75
|
|
|
76
|
+
### Using Gemini (Google)
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
require 'llm_conductor'
|
|
80
|
+
|
|
81
|
+
# Configure
|
|
82
|
+
LlmConductor.configure do |config|
|
|
83
|
+
config.gemini(api_key: ENV['GEMINI_API_KEY'])
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Analyze an image
|
|
87
|
+
response = LlmConductor.generate(
|
|
88
|
+
model: 'gemini-2.5-flash',
|
|
89
|
+
vendor: :gemini,
|
|
90
|
+
prompt: {
|
|
91
|
+
text: 'What is in this image?',
|
|
92
|
+
images: 'https://cdn.autonomous.ai/production/ecm/230930/10-Comfortable-Office-Chairs-for-Gaming-A-Comprehensive-Review00002.webp'
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
puts response.output
|
|
97
|
+
```
|
|
98
|
+
|
|
30
99
|
### Using Z.ai (Zhipu AI)
|
|
31
100
|
|
|
32
101
|
```ruby
|
|
@@ -52,6 +121,23 @@ puts response.output
|
|
|
52
121
|
|
|
53
122
|
## Recommended Models
|
|
54
123
|
|
|
124
|
+
### Claude Models (Anthropic)
|
|
125
|
+
|
|
126
|
+
For vision tasks via Anthropic API:
|
|
127
|
+
|
|
128
|
+
- **`claude-sonnet-4-20250514`** - Claude Sonnet 4 (latest, best for vision) ✅
|
|
129
|
+
- **`claude-opus-4-20250514`** - Claude Opus 4 (maximum quality)
|
|
130
|
+
- **`claude-opus-4-1-20250805`** - Claude Opus 4.1 (newest flagship model)
|
|
131
|
+
|
|
132
|
+
### GPT Models (OpenAI)
|
|
133
|
+
|
|
134
|
+
For vision tasks via OpenAI API:
|
|
135
|
+
|
|
136
|
+
- **`gpt-4o`** - Latest GPT-4 Omni with advanced vision capabilities ✅
|
|
137
|
+
- **`gpt-4o-mini`** - Fast, cost-effective vision model
|
|
138
|
+
- **`gpt-4-turbo`** - Previous generation with vision support
|
|
139
|
+
- **`gpt-4-vision-preview`** - Legacy vision model (deprecated)
|
|
140
|
+
|
|
55
141
|
### OpenRouter Models
|
|
56
142
|
|
|
57
143
|
For vision tasks via OpenRouter, these models work reliably:
|
|
@@ -61,6 +147,17 @@ For vision tasks via OpenRouter, these models work reliably:
|
|
|
61
147
|
- **`anthropic/claude-3.5-sonnet`** - High quality analysis
|
|
62
148
|
- **`openai/gpt-4o`** - Best quality (higher cost)
|
|
63
149
|
|
|
150
|
+
### Gemini Models (Google)
|
|
151
|
+
|
|
152
|
+
For vision tasks via Google Gemini API:
|
|
153
|
+
|
|
154
|
+
- **`gemini-2.0-flash`** - Gemini 2.0 Flash (fast, efficient, multimodal) ✅
|
|
155
|
+
- **`gemini-2.5-flash`** - Gemini 2.5 Flash (latest fast model)
|
|
156
|
+
- **`gemini-1.5-pro`** - Gemini 1.5 Pro (high quality, large context window)
|
|
157
|
+
- **`gemini-1.5-flash`** - Gemini 1.5 Flash (previous generation fast model)
|
|
158
|
+
|
|
159
|
+
**Note:** Gemini client automatically fetches images from URLs and encodes them as base64, as required by the Gemini API.
|
|
160
|
+
|
|
64
161
|
### Z.ai Models (Zhipu AI)
|
|
65
162
|
|
|
66
163
|
For vision tasks via Z.ai, these GLM models are recommended:
|
|
@@ -103,12 +200,12 @@ response = LlmConductor.generate(
|
|
|
103
200
|
|
|
104
201
|
### 3. Image with Detail Level
|
|
105
202
|
|
|
106
|
-
For high-resolution images, specify the detail level:
|
|
203
|
+
For high-resolution images, specify the detail level (supported by GPT and OpenRouter):
|
|
107
204
|
|
|
108
205
|
```ruby
|
|
109
206
|
response = LlmConductor.generate(
|
|
110
|
-
model: '
|
|
111
|
-
vendor: :
|
|
207
|
+
model: 'gpt-4o',
|
|
208
|
+
vendor: :openai,
|
|
112
209
|
prompt: {
|
|
113
210
|
text: 'Analyze this image in detail',
|
|
114
211
|
images: [
|
|
@@ -118,19 +215,22 @@ response = LlmConductor.generate(
|
|
|
118
215
|
)
|
|
119
216
|
```
|
|
120
217
|
|
|
121
|
-
Detail levels:
|
|
218
|
+
Detail levels (GPT and OpenRouter only):
|
|
122
219
|
- `'high'` - Better for detailed analysis (uses more tokens)
|
|
123
220
|
- `'low'` - Faster, cheaper (default if not specified)
|
|
124
221
|
- `'auto'` - Let the model decide
|
|
125
222
|
|
|
223
|
+
**Note:** Claude (Anthropic), Gemini (Google), and Z.ai don't support the `detail` parameter.
|
|
224
|
+
|
|
126
225
|
### 4. Raw Format (Advanced)
|
|
127
226
|
|
|
128
|
-
For maximum control, use
|
|
227
|
+
For maximum control, use provider-specific array formats:
|
|
129
228
|
|
|
229
|
+
**GPT/OpenRouter Format:**
|
|
130
230
|
```ruby
|
|
131
231
|
response = LlmConductor.generate(
|
|
132
|
-
model: '
|
|
133
|
-
vendor: :
|
|
232
|
+
model: 'gpt-4o',
|
|
233
|
+
vendor: :openai,
|
|
134
234
|
prompt: [
|
|
135
235
|
{ type: 'text', text: 'What is in this image?' },
|
|
136
236
|
{ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } },
|
|
@@ -139,6 +239,30 @@ response = LlmConductor.generate(
|
|
|
139
239
|
)
|
|
140
240
|
```
|
|
141
241
|
|
|
242
|
+
**Claude Format:**
|
|
243
|
+
```ruby
|
|
244
|
+
response = LlmConductor.generate(
|
|
245
|
+
model: 'claude-sonnet-4-20250514',
|
|
246
|
+
vendor: :anthropic,
|
|
247
|
+
prompt: [
|
|
248
|
+
{ type: 'image', source: { type: 'url', url: 'https://example.com/image.jpg' } },
|
|
249
|
+
{ type: 'text', text: 'What is in this image? Describe it in detail.' }
|
|
250
|
+
]
|
|
251
|
+
)
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
**Gemini Format:**
|
|
255
|
+
```ruby
|
|
256
|
+
response = LlmConductor.generate(
|
|
257
|
+
model: 'gemini-2.0-flash',
|
|
258
|
+
vendor: :gemini,
|
|
259
|
+
prompt: [
|
|
260
|
+
{ type: 'text', text: 'What is in this image? Describe it in detail.' },
|
|
261
|
+
{ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }
|
|
262
|
+
]
|
|
263
|
+
)
|
|
264
|
+
```
|
|
265
|
+
|
|
142
266
|
## Text-Only Requests (Backward Compatible)
|
|
143
267
|
|
|
144
268
|
The client still supports regular text-only requests:
|
|
@@ -158,6 +282,10 @@ response = LlmConductor.generate(
|
|
|
158
282
|
- Maximum file size depends on the model
|
|
159
283
|
- Use HTTPS URLs when possible
|
|
160
284
|
|
|
285
|
+
**Provider-Specific Notes:**
|
|
286
|
+
- **Gemini**: URLs are automatically fetched and base64-encoded by the client before sending to the API
|
|
287
|
+
- **Claude, GPT, OpenRouter, Z.ai**: URLs are sent directly to the API (no preprocessing required)
|
|
288
|
+
|
|
161
289
|
## Error Handling
|
|
162
290
|
|
|
163
291
|
```ruby
|
|
@@ -204,12 +332,30 @@ response = LlmConductor.generate(
|
|
|
204
332
|
|
|
205
333
|
### Run Examples
|
|
206
334
|
|
|
335
|
+
For Claude:
|
|
336
|
+
```bash
|
|
337
|
+
export ANTHROPIC_API_KEY='your-key'
|
|
338
|
+
ruby examples/claude_vision_usage.rb
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
For GPT:
|
|
342
|
+
```bash
|
|
343
|
+
export OPENAI_API_KEY='your-key'
|
|
344
|
+
ruby examples/gpt_vision_usage.rb
|
|
345
|
+
```
|
|
346
|
+
|
|
207
347
|
For OpenRouter:
|
|
208
348
|
```bash
|
|
209
349
|
export OPENROUTER_API_KEY='your-key'
|
|
210
350
|
ruby examples/openrouter_vision_usage.rb
|
|
211
351
|
```
|
|
212
352
|
|
|
353
|
+
For Gemini:
|
|
354
|
+
```bash
|
|
355
|
+
export GEMINI_API_KEY='your-key'
|
|
356
|
+
ruby examples/gemini_vision_usage.rb
|
|
357
|
+
```
|
|
358
|
+
|
|
213
359
|
For Z.ai:
|
|
214
360
|
```bash
|
|
215
361
|
export ZAI_API_KEY='your-key'
|
|
@@ -265,6 +411,9 @@ For production:
|
|
|
265
411
|
|
|
266
412
|
## Examples
|
|
267
413
|
|
|
414
|
+
- `examples/claude_vision_usage.rb` - Complete Claude vision examples with Claude Sonnet 4
|
|
415
|
+
- `examples/gpt_vision_usage.rb` - Complete GPT vision examples with GPT-4o
|
|
416
|
+
- `examples/gemini_vision_usage.rb` - Complete Gemini vision examples with Gemini 2.0 Flash
|
|
268
417
|
- `examples/openrouter_vision_usage.rb` - Complete OpenRouter vision examples
|
|
269
418
|
- `examples/zai_usage.rb` - Complete Z.ai GLM-4.5V examples including vision and text
|
|
270
419
|
|
|
@@ -273,6 +422,7 @@ For production:
|
|
|
273
422
|
- [OpenRouter Documentation](https://openrouter.ai/docs)
|
|
274
423
|
- [OpenAI Vision API Reference](https://platform.openai.com/docs/guides/vision)
|
|
275
424
|
- [Anthropic Claude Vision](https://docs.anthropic.com/claude/docs/vision)
|
|
425
|
+
- [Google Gemini API Documentation](https://ai.google.dev/docs)
|
|
276
426
|
- [Z.ai API Platform](https://api.z.ai/)
|
|
277
427
|
- [GLM-4.5V Documentation](https://bigmodel.cn/)
|
|
278
428
|
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/llm_conductor'
|
|
5
|
+
|
|
6
|
+
# This example demonstrates using Claude Sonnet 4 vision capabilities
|
|
7
|
+
# Set your Anthropic API key: export ANTHROPIC_API_KEY='your-key-here'
|
|
8
|
+
|
|
9
|
+
puts '=' * 80
|
|
10
|
+
puts 'Claude Sonnet 4 Vision Usage Examples'
|
|
11
|
+
puts '=' * 80
|
|
12
|
+
puts
|
|
13
|
+
|
|
14
|
+
# Check for API key
|
|
15
|
+
api_key = ENV['ANTHROPIC_API_KEY']
|
|
16
|
+
if api_key.nil? || api_key.empty?
|
|
17
|
+
puts 'ERROR: ANTHROPIC_API_KEY environment variable is not set!'
|
|
18
|
+
puts
|
|
19
|
+
puts 'Please set your Anthropic API key:'
|
|
20
|
+
puts ' export ANTHROPIC_API_KEY="your-key-here"'
|
|
21
|
+
puts
|
|
22
|
+
puts 'You can get an API key from: https://console.anthropic.com/'
|
|
23
|
+
exit 1
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Configure the client
|
|
27
|
+
LlmConductor.configure do |config|
|
|
28
|
+
config.anthropic(api_key:)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Example 1: Single Image Analysis
|
|
32
|
+
puts "\n1. Single Image Analysis"
|
|
33
|
+
puts '-' * 80
|
|
34
|
+
|
|
35
|
+
begin
|
|
36
|
+
response = LlmConductor.generate(
|
|
37
|
+
model: 'claude-sonnet-4-20250514',
|
|
38
|
+
vendor: :anthropic,
|
|
39
|
+
prompt: {
|
|
40
|
+
text: 'What is in this image? Please describe it in detail.',
|
|
41
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
42
|
+
}
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
puts "Response: #{response.output}"
|
|
46
|
+
puts "Success: #{response.success?}"
|
|
47
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
48
|
+
puts "Metadata: #{response.metadata.inspect}" if response.metadata && !response.metadata.empty?
|
|
49
|
+
rescue StandardError => e
|
|
50
|
+
puts "ERROR: #{e.message}"
|
|
51
|
+
puts "Backtrace: #{e.backtrace.first(5).join("\n")}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Example 2: Multiple Images Comparison
|
|
55
|
+
puts "\n2. Multiple Images Comparison"
|
|
56
|
+
puts '-' * 80
|
|
57
|
+
|
|
58
|
+
response = LlmConductor.generate(
|
|
59
|
+
model: 'claude-sonnet-4-20250514',
|
|
60
|
+
vendor: :anthropic,
|
|
61
|
+
prompt: {
|
|
62
|
+
text: 'Compare these two images. What are the main differences?',
|
|
63
|
+
images: [
|
|
64
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
65
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/1024px-Placeholder_view_vector.svg.png'
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
puts "Response: #{response.output}"
|
|
71
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
72
|
+
|
|
73
|
+
# Example 3: Image with Specific Question
|
|
74
|
+
puts "\n3. Image with Specific Question"
|
|
75
|
+
puts '-' * 80
|
|
76
|
+
|
|
77
|
+
response = LlmConductor.generate(
|
|
78
|
+
model: 'claude-sonnet-4-20250514',
|
|
79
|
+
vendor: :anthropic,
|
|
80
|
+
prompt: {
|
|
81
|
+
text: 'Is there a wooden boardwalk visible in this image? If yes, describe its condition.',
|
|
82
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
puts "Response: #{response.output}"
|
|
87
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
88
|
+
|
|
89
|
+
# Example 4: Raw Format (Advanced)
|
|
90
|
+
puts "\n4. Raw Format (Advanced)"
|
|
91
|
+
puts '-' * 80
|
|
92
|
+
|
|
93
|
+
response = LlmConductor.generate(
|
|
94
|
+
model: 'claude-sonnet-4-20250514',
|
|
95
|
+
vendor: :anthropic,
|
|
96
|
+
prompt: [
|
|
97
|
+
{ type: 'image',
|
|
98
|
+
source: { type: 'url',
|
|
99
|
+
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg' } },
|
|
100
|
+
{ type: 'text', text: 'Describe the weather conditions in this image.' }
|
|
101
|
+
]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
puts "Response: #{response.output}"
|
|
105
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
106
|
+
|
|
107
|
+
# Example 5: Text-Only Request (Backward Compatible)
|
|
108
|
+
puts "\n5. Text-Only Request (Backward Compatible)"
|
|
109
|
+
puts '-' * 80
|
|
110
|
+
|
|
111
|
+
response = LlmConductor.generate(
|
|
112
|
+
model: 'claude-sonnet-4-20250514',
|
|
113
|
+
vendor: :anthropic,
|
|
114
|
+
prompt: 'What is the capital of France?'
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
puts "Response: #{response.output}"
|
|
118
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
119
|
+
|
|
120
|
+
# Example 6: Image Analysis with Detailed Instructions
|
|
121
|
+
puts "\n6. Image Analysis with Detailed Instructions"
|
|
122
|
+
puts '-' * 80
|
|
123
|
+
|
|
124
|
+
response = LlmConductor.generate(
|
|
125
|
+
model: 'claude-sonnet-4-20250514',
|
|
126
|
+
vendor: :anthropic,
|
|
127
|
+
prompt: {
|
|
128
|
+
text: 'Analyze this image and provide: 1) Main subjects, 2) Colors and lighting, 3) Mood or atmosphere, 4) Any notable details',
|
|
129
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
puts "Response: #{response.output}"
|
|
134
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
135
|
+
|
|
136
|
+
puts "\n#{'=' * 80}"
|
|
137
|
+
puts 'All examples completed successfully!'
|
|
138
|
+
puts '=' * 80
|
data/examples/gemini_usage.rb
CHANGED
|
@@ -4,7 +4,7 @@ require_relative '../lib/llm_conductor'
|
|
|
4
4
|
|
|
5
5
|
# Configure Gemini API key
|
|
6
6
|
LlmConductor.configure do |config|
|
|
7
|
-
config.
|
|
7
|
+
config.gemini(api_key: ENV['GEMINI_API_KEY'] || 'your_gemini_api_key_here')
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
# Example usage
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../lib/llm_conductor'
|
|
4
|
+
|
|
5
|
+
# Configure Gemini API key
|
|
6
|
+
LlmConductor.configure do |config|
|
|
7
|
+
config.gemini(api_key: ENV['GEMINI_API_KEY'] || 'your_gemini_api_key_here')
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
puts '=' * 80
|
|
11
|
+
puts 'Google Gemini Vision Examples'
|
|
12
|
+
puts '=' * 80
|
|
13
|
+
puts
|
|
14
|
+
|
|
15
|
+
# Example 1: Single image analysis (simple format)
|
|
16
|
+
puts 'Example 1: Single Image Analysis'
|
|
17
|
+
puts '-' * 40
|
|
18
|
+
|
|
19
|
+
response = LlmConductor.generate(
|
|
20
|
+
model: 'gemini-2.0-flash',
|
|
21
|
+
vendor: :gemini,
|
|
22
|
+
prompt: {
|
|
23
|
+
text: 'What is in this image? Describe it in detail.',
|
|
24
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
25
|
+
}
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
puts "Model: #{response.model}"
|
|
29
|
+
puts "Vendor: #{response.metadata[:vendor]}"
|
|
30
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
31
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
32
|
+
puts "\nResponse:"
|
|
33
|
+
puts response.output
|
|
34
|
+
puts
|
|
35
|
+
|
|
36
|
+
# Example 2: Multiple images comparison
|
|
37
|
+
puts '=' * 80
|
|
38
|
+
puts 'Example 2: Multiple Images Comparison'
|
|
39
|
+
puts '-' * 40
|
|
40
|
+
|
|
41
|
+
response = LlmConductor.generate(
|
|
42
|
+
model: 'gemini-2.0-flash',
|
|
43
|
+
vendor: :gemini,
|
|
44
|
+
prompt: {
|
|
45
|
+
text: 'Compare these images. What are the main differences?',
|
|
46
|
+
images: [
|
|
47
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
48
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/681px-Placeholder_view_vector.svg.png'
|
|
49
|
+
]
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
puts "Model: #{response.model}"
|
|
54
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
55
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
56
|
+
puts "\nResponse:"
|
|
57
|
+
puts response.output
|
|
58
|
+
puts
|
|
59
|
+
|
|
60
|
+
# Example 3: Raw format with Gemini-specific structure
|
|
61
|
+
puts '=' * 80
|
|
62
|
+
puts 'Example 3: Raw Format (Gemini-specific)'
|
|
63
|
+
puts '-' * 40
|
|
64
|
+
|
|
65
|
+
response = LlmConductor.generate(
|
|
66
|
+
model: 'gemini-2.0-flash',
|
|
67
|
+
vendor: :gemini,
|
|
68
|
+
prompt: [
|
|
69
|
+
{ type: 'text', text: 'Analyze this nature scene:' },
|
|
70
|
+
{ type: 'image_url', image_url: { url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg' } },
|
|
71
|
+
{ type: 'text', text: 'What time of day do you think this photo was taken?' }
|
|
72
|
+
]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
puts "Model: #{response.model}"
|
|
76
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
77
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
78
|
+
puts "\nResponse:"
|
|
79
|
+
puts response.output
|
|
80
|
+
puts
|
|
81
|
+
|
|
82
|
+
# Example 4: Image with specific analysis request
|
|
83
|
+
puts '=' * 80
|
|
84
|
+
puts 'Example 4: Specific Analysis Request'
|
|
85
|
+
puts '-' * 40
|
|
86
|
+
|
|
87
|
+
response = LlmConductor.generate(
|
|
88
|
+
model: 'gemini-2.0-flash',
|
|
89
|
+
vendor: :gemini,
|
|
90
|
+
prompt: {
|
|
91
|
+
text: 'Count the number of distinct colors visible in this image and list them.',
|
|
92
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
puts "Model: #{response.model}"
|
|
97
|
+
puts "\nResponse:"
|
|
98
|
+
puts response.output
|
|
99
|
+
puts
|
|
100
|
+
|
|
101
|
+
# Example 5: Error handling
|
|
102
|
+
puts '=' * 80
|
|
103
|
+
puts 'Example 5: Error Handling'
|
|
104
|
+
puts '-' * 40
|
|
105
|
+
|
|
106
|
+
begin
|
|
107
|
+
response = LlmConductor.generate(
|
|
108
|
+
model: 'gemini-2.0-flash',
|
|
109
|
+
vendor: :gemini,
|
|
110
|
+
prompt: {
|
|
111
|
+
text: 'What is in this image?',
|
|
112
|
+
images: 'https://example.com/nonexistent-image.jpg'
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if response.success?
|
|
117
|
+
puts 'Success! Response:'
|
|
118
|
+
puts response.output
|
|
119
|
+
else
|
|
120
|
+
puts "Request failed: #{response.metadata[:error]}"
|
|
121
|
+
end
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
puts "Error occurred: #{e.message}"
|
|
124
|
+
end
|
|
125
|
+
puts
|
|
126
|
+
|
|
127
|
+
# Example 6: Text-only request (backward compatibility)
|
|
128
|
+
puts '=' * 80
|
|
129
|
+
puts 'Example 6: Text-Only Request (No Images)'
|
|
130
|
+
puts '-' * 40
|
|
131
|
+
|
|
132
|
+
response = LlmConductor.generate(
|
|
133
|
+
model: 'gemini-2.0-flash',
|
|
134
|
+
vendor: :gemini,
|
|
135
|
+
prompt: 'Explain how neural networks work in 3 sentences.'
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
puts "Model: #{response.model}"
|
|
139
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
140
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
141
|
+
puts "\nResponse:"
|
|
142
|
+
puts response.output
|
|
143
|
+
puts
|
|
144
|
+
|
|
145
|
+
# Example 7: Image with hash format (URL specified explicitly)
|
|
146
|
+
puts '=' * 80
|
|
147
|
+
puts 'Example 7: Image Hash Format'
|
|
148
|
+
puts '-' * 40
|
|
149
|
+
|
|
150
|
+
response = LlmConductor.generate(
|
|
151
|
+
model: 'gemini-2.0-flash',
|
|
152
|
+
vendor: :gemini,
|
|
153
|
+
prompt: {
|
|
154
|
+
text: 'Describe the mood and atmosphere of this image.',
|
|
155
|
+
images: [
|
|
156
|
+
{ url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg' }
|
|
157
|
+
]
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
puts "Model: #{response.model}"
|
|
162
|
+
puts "\nResponse:"
|
|
163
|
+
puts response.output
|
|
164
|
+
puts
|
|
165
|
+
|
|
166
|
+
puts '=' * 80
|
|
167
|
+
puts 'Examples completed!'
|
|
168
|
+
puts '=' * 80
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/llm_conductor'
|
|
5
|
+
|
|
6
|
+
# This example demonstrates using GPT-4o vision capabilities
|
|
7
|
+
# Set your OpenAI API key: export OPENAI_API_KEY='your-key-here'
|
|
8
|
+
|
|
9
|
+
puts '=' * 80
|
|
10
|
+
puts 'GPT-4o Vision Usage Examples'
|
|
11
|
+
puts '=' * 80
|
|
12
|
+
puts
|
|
13
|
+
|
|
14
|
+
# Check for API key
|
|
15
|
+
api_key = ENV['OPENAI_API_KEY']
|
|
16
|
+
if api_key.nil? || api_key.empty?
|
|
17
|
+
puts 'ERROR: OPENAI_API_KEY environment variable is not set!'
|
|
18
|
+
puts
|
|
19
|
+
puts 'Please set your OpenAI API key:'
|
|
20
|
+
puts ' export OPENAI_API_KEY="your-key-here"'
|
|
21
|
+
puts
|
|
22
|
+
puts 'You can get an API key from: https://platform.openai.com/api-keys'
|
|
23
|
+
exit 1
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Configure the client
|
|
27
|
+
LlmConductor.configure do |config|
|
|
28
|
+
config.openai(api_key:)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Example 1: Single Image Analysis
|
|
32
|
+
puts "\n1. Single Image Analysis"
|
|
33
|
+
puts '-' * 80
|
|
34
|
+
|
|
35
|
+
response = LlmConductor.generate(
|
|
36
|
+
model: 'gpt-4o',
|
|
37
|
+
vendor: :openai,
|
|
38
|
+
prompt: {
|
|
39
|
+
text: 'What is in this image? Please describe it in detail.',
|
|
40
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
41
|
+
}
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
puts "Response: #{response.output}"
|
|
45
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
46
|
+
|
|
47
|
+
# Example 2: Multiple Images Comparison
|
|
48
|
+
puts "\n2. Multiple Images Comparison"
|
|
49
|
+
puts '-' * 80
|
|
50
|
+
|
|
51
|
+
response = LlmConductor.generate(
|
|
52
|
+
model: 'gpt-4o',
|
|
53
|
+
vendor: :openai,
|
|
54
|
+
prompt: {
|
|
55
|
+
text: 'Compare these two images. What are the main differences?',
|
|
56
|
+
images: [
|
|
57
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
58
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/1024px-Placeholder_view_vector.svg.png'
|
|
59
|
+
]
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
puts "Response: #{response.output}"
|
|
64
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
65
|
+
|
|
66
|
+
# Example 3: Image with Detail Level - High Resolution
|
|
67
|
+
puts "\n3. Image with Detail Level - High Resolution"
|
|
68
|
+
puts '-' * 80
|
|
69
|
+
|
|
70
|
+
response = LlmConductor.generate(
|
|
71
|
+
model: 'gpt-4o',
|
|
72
|
+
vendor: :openai,
|
|
73
|
+
prompt: {
|
|
74
|
+
text: 'Analyze this high-resolution image in detail. What are all the elements you can see?',
|
|
75
|
+
images: [
|
|
76
|
+
{ url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg', detail: 'high' }
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
puts "Response: #{response.output}"
|
|
82
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
83
|
+
|
|
84
|
+
# Example 4: Image with Detail Level - Low (Faster, Cheaper)
|
|
85
|
+
puts "\n4. Image with Detail Level - Low (Faster, Cheaper)"
|
|
86
|
+
puts '-' * 80
|
|
87
|
+
|
|
88
|
+
response = LlmConductor.generate(
|
|
89
|
+
model: 'gpt-4o',
|
|
90
|
+
vendor: :openai,
|
|
91
|
+
prompt: {
|
|
92
|
+
text: 'Give me a quick description of this image.',
|
|
93
|
+
images: [
|
|
94
|
+
{ url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg', detail: 'low' }
|
|
95
|
+
]
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
puts "Response: #{response.output}"
|
|
100
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
101
|
+
|
|
102
|
+
# Example 5: Raw Format (Advanced)
|
|
103
|
+
puts "\n5. Raw Format (Advanced)"
|
|
104
|
+
puts '-' * 80
|
|
105
|
+
|
|
106
|
+
response = LlmConductor.generate(
|
|
107
|
+
model: 'gpt-4o',
|
|
108
|
+
vendor: :openai,
|
|
109
|
+
prompt: [
|
|
110
|
+
{ type: 'text', text: 'What is in this image?' },
|
|
111
|
+
{ type: 'image_url',
|
|
112
|
+
image_url: { url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg' } },
|
|
113
|
+
{ type: 'text', text: 'Describe the weather conditions.' }
|
|
114
|
+
]
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
puts "Response: #{response.output}"
|
|
118
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
119
|
+
|
|
120
|
+
# Example 6: Text-Only Request (Backward Compatible)
|
|
121
|
+
puts "\n6. Text-Only Request (Backward Compatible)"
|
|
122
|
+
puts '-' * 80
|
|
123
|
+
|
|
124
|
+
response = LlmConductor.generate(
|
|
125
|
+
model: 'gpt-4o',
|
|
126
|
+
vendor: :openai,
|
|
127
|
+
prompt: 'What is the capital of France?'
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
puts "Response: #{response.output}"
|
|
131
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
132
|
+
|
|
133
|
+
# Example 7: Multiple Images with Mixed Detail Levels
|
|
134
|
+
puts "\n7. Multiple Images with Mixed Detail Levels"
|
|
135
|
+
puts '-' * 80
|
|
136
|
+
|
|
137
|
+
response = LlmConductor.generate(
|
|
138
|
+
model: 'gpt-4o',
|
|
139
|
+
vendor: :openai,
|
|
140
|
+
prompt: {
|
|
141
|
+
text: 'Compare these images at different detail levels.',
|
|
142
|
+
images: [
|
|
143
|
+
{
|
|
144
|
+
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg', detail: 'high'
|
|
145
|
+
},
|
|
146
|
+
{ url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/1024px-Placeholder_view_vector.svg.png', detail: 'low' }
|
|
147
|
+
]
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
puts "Response: #{response.output}"
|
|
152
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
153
|
+
|
|
154
|
+
puts "\n#{'=' * 80}"
|
|
155
|
+
puts 'All examples completed successfully!'
|
|
156
|
+
puts '=' * 80
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'anthropic'
|
|
4
|
+
require_relative 'concerns/vision_support'
|
|
4
5
|
|
|
5
6
|
module LlmConductor
|
|
6
7
|
module Clients
|
|
7
8
|
# Anthropic Claude client implementation for accessing Claude models via Anthropic API
|
|
9
|
+
# Supports both text-only and multimodal (vision) requests
|
|
8
10
|
class AnthropicClient < BaseClient
|
|
11
|
+
include Concerns::VisionSupport
|
|
12
|
+
|
|
9
13
|
private
|
|
10
14
|
|
|
11
15
|
def generate_content(prompt)
|
|
16
|
+
content = format_content(prompt)
|
|
12
17
|
response = client.messages.create(
|
|
13
18
|
model:,
|
|
14
19
|
max_tokens: 4096,
|
|
15
|
-
messages: [{ role: 'user', content:
|
|
20
|
+
messages: [{ role: 'user', content: }]
|
|
16
21
|
)
|
|
17
22
|
|
|
18
23
|
response.content.first.text
|
|
@@ -20,6 +25,28 @@ module LlmConductor
|
|
|
20
25
|
raise StandardError, "Anthropic API error: #{e.message}"
|
|
21
26
|
end
|
|
22
27
|
|
|
28
|
+
# Anthropic uses a different image format than OpenAI
|
|
29
|
+
# Format: { type: 'image', source: { type: 'url', url: '...' } }
|
|
30
|
+
def format_image_url(url)
|
|
31
|
+
{ type: 'image', source: { type: 'url', url: } }
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def format_image_hash(image_hash)
|
|
35
|
+
# Anthropic doesn't have a 'detail' parameter like OpenAI
|
|
36
|
+
{
|
|
37
|
+
type: 'image',
|
|
38
|
+
source: {
|
|
39
|
+
type: 'url',
|
|
40
|
+
url: image_hash[:url] || image_hash['url']
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Anthropic recommends placing images before text
|
|
46
|
+
def images_before_text?
|
|
47
|
+
true
|
|
48
|
+
end
|
|
49
|
+
|
|
23
50
|
def client
|
|
24
51
|
@client ||= begin
|
|
25
52
|
config = LlmConductor.configuration.provider_config(:anthropic)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmConductor
|
|
4
|
+
module Clients
|
|
5
|
+
module Concerns
|
|
6
|
+
# Shared module for vision/multimodal support across different LLM clients
|
|
7
|
+
# Provides common functionality for formatting images and text content
|
|
8
|
+
module VisionSupport
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
# Override token calculation to handle multimodal content
|
|
12
|
+
def calculate_tokens(content)
|
|
13
|
+
case content
|
|
14
|
+
when String then super(content)
|
|
15
|
+
when Hash then calculate_tokens_from_hash(content)
|
|
16
|
+
when Array then calculate_tokens_from_array(content)
|
|
17
|
+
else super(content.to_s)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Calculate tokens from a hash containing text and/or images
|
|
22
|
+
# @param content_hash [Hash] Hash with :text and/or :images keys
|
|
23
|
+
# @return [Integer] Token count for text portion
|
|
24
|
+
def calculate_tokens_from_hash(content_hash)
|
|
25
|
+
text = content_hash[:text] || content_hash['text'] || ''
|
|
26
|
+
# Call the parent class's calculate_tokens with the extracted text
|
|
27
|
+
method(:calculate_tokens).super_method.call(text)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Calculate tokens from an array of content parts
|
|
31
|
+
# @param content_array [Array] Array of content parts with type and text
|
|
32
|
+
# @return [Integer] Token count for all text parts
|
|
33
|
+
def calculate_tokens_from_array(content_array)
|
|
34
|
+
text_parts = extract_text_from_array(content_array)
|
|
35
|
+
# Call the parent class's calculate_tokens with the joined text
|
|
36
|
+
method(:calculate_tokens).super_method.call(text_parts)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Extract and join text from array of content parts
|
|
40
|
+
# @param content_array [Array] Array of content parts
|
|
41
|
+
# @return [String] Joined text from all text parts
|
|
42
|
+
def extract_text_from_array(content_array)
|
|
43
|
+
content_array
|
|
44
|
+
.select { |part| text_part?(part) }
|
|
45
|
+
.map { |part| extract_text_from_part(part) }
|
|
46
|
+
.join(' ')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Check if a content part is a text part
|
|
50
|
+
# @param part [Hash] Content part
|
|
51
|
+
# @return [Boolean] true if part is a text type
|
|
52
|
+
def text_part?(part)
|
|
53
|
+
part[:type] == 'text' || part['type'] == 'text'
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Extract text from a content part
|
|
57
|
+
# @param part [Hash] Content part with text
|
|
58
|
+
# @return [String] Text content
|
|
59
|
+
def extract_text_from_part(part)
|
|
60
|
+
part[:text] || part['text'] || ''
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Format content based on whether it's a simple string or multimodal content
|
|
64
|
+
# @param prompt [String, Hash, Array] The prompt content
|
|
65
|
+
# @return [String, Array] Formatted content for the API
|
|
66
|
+
def format_content(prompt)
|
|
67
|
+
case prompt
|
|
68
|
+
when Hash
|
|
69
|
+
# Handle hash with text and/or images
|
|
70
|
+
format_multimodal_hash(prompt)
|
|
71
|
+
when Array
|
|
72
|
+
# Already formatted as array of content parts
|
|
73
|
+
prompt
|
|
74
|
+
else
|
|
75
|
+
# Simple string prompt
|
|
76
|
+
prompt.to_s
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Format a hash containing text and/or images into multimodal content array
|
|
81
|
+
# @param prompt_hash [Hash] Hash with :text and/or :images keys
|
|
82
|
+
# @return [Array] Array of content parts for the API
|
|
83
|
+
def format_multimodal_hash(prompt_hash)
|
|
84
|
+
content_parts = []
|
|
85
|
+
|
|
86
|
+
# Add image parts (order depends on provider)
|
|
87
|
+
images = prompt_hash[:images] || prompt_hash['images'] || []
|
|
88
|
+
images = [images] unless images.is_a?(Array)
|
|
89
|
+
|
|
90
|
+
if images_before_text?
|
|
91
|
+
# Anthropic recommends images before text
|
|
92
|
+
images.each { |image| content_parts << format_image_part(image) }
|
|
93
|
+
add_text_part(content_parts, prompt_hash)
|
|
94
|
+
else
|
|
95
|
+
# OpenAI/most others: text before images
|
|
96
|
+
add_text_part(content_parts, prompt_hash)
|
|
97
|
+
images.each { |image| content_parts << format_image_part(image) }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
content_parts
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Add text part to content array if present
|
|
104
|
+
# @param content_parts [Array] The content parts array
|
|
105
|
+
# @param prompt_hash [Hash] Hash with :text key
|
|
106
|
+
def add_text_part(content_parts, prompt_hash)
|
|
107
|
+
return unless prompt_hash[:text] || prompt_hash['text']
|
|
108
|
+
|
|
109
|
+
text = prompt_hash[:text] || prompt_hash['text']
|
|
110
|
+
content_parts << { type: 'text', text: }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Format an image into the appropriate API structure
|
|
114
|
+
# This method should be overridden by clients that need different formats
|
|
115
|
+
# @param image [String, Hash] Image URL or hash with url/detail keys
|
|
116
|
+
# @return [Hash] Formatted image part for the API
|
|
117
|
+
def format_image_part(image)
|
|
118
|
+
case image
|
|
119
|
+
when String
|
|
120
|
+
format_image_url(image)
|
|
121
|
+
when Hash
|
|
122
|
+
format_image_hash(image)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Format a simple image URL string
|
|
127
|
+
# Override this in subclasses for provider-specific format
|
|
128
|
+
# @param url [String] Image URL
|
|
129
|
+
# @return [Hash] Formatted image part
|
|
130
|
+
def format_image_url(url)
|
|
131
|
+
# Default: OpenAI format
|
|
132
|
+
{ type: 'image_url', image_url: { url: } }
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Format an image hash with url and optional detail
|
|
136
|
+
# Override this in subclasses for provider-specific format
|
|
137
|
+
# @param image_hash [Hash] Hash with url and optional detail keys
|
|
138
|
+
# @return [Hash] Formatted image part
|
|
139
|
+
def format_image_hash(image_hash)
|
|
140
|
+
# Default: OpenAI format with detail support
|
|
141
|
+
{
|
|
142
|
+
type: 'image_url',
|
|
143
|
+
image_url: {
|
|
144
|
+
url: image_hash[:url] || image_hash['url'],
|
|
145
|
+
detail: image_hash[:detail] || image_hash['detail']
|
|
146
|
+
}.compact
|
|
147
|
+
}
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Whether to place images before text in the content array
|
|
151
|
+
# Override this in subclasses if needed (e.g., Anthropic recommends images first)
|
|
152
|
+
# @return [Boolean] true if images should come before text
|
|
153
|
+
def images_before_text?
|
|
154
|
+
false
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
@@ -1,17 +1,27 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'gemini-ai'
|
|
4
|
+
require 'base64'
|
|
5
|
+
require 'net/http'
|
|
6
|
+
require 'uri'
|
|
7
|
+
require_relative 'concerns/vision_support'
|
|
4
8
|
|
|
5
9
|
module LlmConductor
|
|
6
10
|
module Clients
|
|
7
11
|
# Google Gemini client implementation for accessing Gemini models via Google AI API
|
|
12
|
+
# Supports both text-only and multimodal (vision) requests
|
|
8
13
|
class GeminiClient < BaseClient
|
|
14
|
+
include Concerns::VisionSupport
|
|
15
|
+
|
|
9
16
|
private
|
|
10
17
|
|
|
11
18
|
def generate_content(prompt)
|
|
19
|
+
content = format_content(prompt)
|
|
20
|
+
parts = build_parts_for_gemini(content)
|
|
21
|
+
|
|
12
22
|
payload = {
|
|
13
23
|
contents: [
|
|
14
|
-
{ parts:
|
|
24
|
+
{ parts: }
|
|
15
25
|
]
|
|
16
26
|
}
|
|
17
27
|
|
|
@@ -19,6 +29,100 @@ module LlmConductor
|
|
|
19
29
|
response.dig('candidates', 0, 'content', 'parts', 0, 'text')
|
|
20
30
|
end
|
|
21
31
|
|
|
32
|
+
# Build parts array for Gemini API from formatted content
|
|
33
|
+
# Converts VisionSupport format to Gemini's specific format
|
|
34
|
+
# @param content [String, Array] Formatted content from VisionSupport
|
|
35
|
+
# @return [Array] Array of parts in Gemini format
|
|
36
|
+
def build_parts_for_gemini(content)
|
|
37
|
+
case content
|
|
38
|
+
when String
|
|
39
|
+
[{ text: content }]
|
|
40
|
+
when Array
|
|
41
|
+
content.map { |part| convert_to_gemini_part(part) }
|
|
42
|
+
else
|
|
43
|
+
[{ text: content.to_s }]
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Convert a VisionSupport formatted part to Gemini format
|
|
48
|
+
# @param part [Hash] Content part with type and data
|
|
49
|
+
# @return [Hash] Gemini-formatted part
|
|
50
|
+
def convert_to_gemini_part(part)
|
|
51
|
+
case part[:type]
|
|
52
|
+
when 'text'
|
|
53
|
+
{ text: part[:text] }
|
|
54
|
+
when 'image_url'
|
|
55
|
+
convert_image_url_to_inline_data(part)
|
|
56
|
+
when 'inline_data'
|
|
57
|
+
part # Already in Gemini format
|
|
58
|
+
else
|
|
59
|
+
part
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Convert image_url part to Gemini's inline_data format
|
|
64
|
+
# @param part [Hash] Part with image_url
|
|
65
|
+
# @return [Hash] Gemini inline_data format
|
|
66
|
+
def convert_image_url_to_inline_data(part)
|
|
67
|
+
url = part.dig(:image_url, :url)
|
|
68
|
+
{
|
|
69
|
+
inline_data: {
|
|
70
|
+
mime_type: detect_mime_type(url),
|
|
71
|
+
data: fetch_and_encode_image(url)
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Fetch image from URL and encode as base64
|
|
77
|
+
# Gemini API requires images to be base64-encoded
|
|
78
|
+
# @param url [String] Image URL
|
|
79
|
+
# @return [String] Base64-encoded image data
|
|
80
|
+
def fetch_and_encode_image(url)
|
|
81
|
+
uri = URI.parse(url)
|
|
82
|
+
response = fetch_image_from_uri(uri)
|
|
83
|
+
|
|
84
|
+
raise StandardError, "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
85
|
+
|
|
86
|
+
Base64.strict_encode64(response.body)
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
raise StandardError, "Error fetching image from #{url}: #{e.message}"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Fetch image from URI using Net::HTTP
|
|
92
|
+
# @param uri [URI] Parsed URI
|
|
93
|
+
# @return [Net::HTTPResponse] HTTP response
|
|
94
|
+
def fetch_image_from_uri(uri)
|
|
95
|
+
http = create_http_client(uri)
|
|
96
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
|
97
|
+
http.request(request)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Create HTTP client with SSL configuration
|
|
101
|
+
# @param uri [URI] Parsed URI
|
|
102
|
+
# @return [Net::HTTP] Configured HTTP client
|
|
103
|
+
def create_http_client(uri)
|
|
104
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
105
|
+
return http unless uri.scheme == 'https'
|
|
106
|
+
|
|
107
|
+
http.use_ssl = true
|
|
108
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
109
|
+
http
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Detect MIME type from URL file extension
|
|
113
|
+
# @param url [String] Image URL
|
|
114
|
+
# @return [String] MIME type (e.g., 'image/jpeg', 'image/png')
|
|
115
|
+
def detect_mime_type(url)
|
|
116
|
+
extension = File.extname(URI.parse(url).path).downcase
|
|
117
|
+
case extension
|
|
118
|
+
when '.jpg', '.jpeg' then 'image/jpeg'
|
|
119
|
+
when '.png' then 'image/png'
|
|
120
|
+
when '.gif' then 'image/gif'
|
|
121
|
+
when '.webp' then 'image/webp'
|
|
122
|
+
else 'image/jpeg' # Default to jpeg
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
22
126
|
def client
|
|
23
127
|
@client ||= begin
|
|
24
128
|
config = LlmConductor.configuration.provider_config(:gemini)
|
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'concerns/vision_support'
|
|
4
|
+
|
|
3
5
|
module LlmConductor
|
|
4
6
|
module Clients
|
|
5
7
|
# OpenAI GPT client implementation for accessing GPT models via OpenAI API
|
|
8
|
+
# Supports both text-only and multimodal (vision) requests
|
|
6
9
|
class GptClient < BaseClient
|
|
10
|
+
include Concerns::VisionSupport
|
|
11
|
+
|
|
7
12
|
private
|
|
8
13
|
|
|
9
14
|
def generate_content(prompt)
|
|
10
|
-
|
|
15
|
+
content = format_content(prompt)
|
|
16
|
+
client.chat(parameters: { model:, messages: [{ role: 'user', content: }] })
|
|
11
17
|
.dig('choices', 0, 'message', 'content')
|
|
12
18
|
end
|
|
13
19
|
|
|
@@ -1,32 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'concerns/vision_support'
|
|
4
|
+
|
|
3
5
|
module LlmConductor
|
|
4
6
|
module Clients
|
|
5
7
|
# OpenRouter client implementation for accessing various LLM providers through OpenRouter API
|
|
6
8
|
# Supports both text-only and multimodal (vision) requests
|
|
7
9
|
class OpenrouterClient < BaseClient
|
|
8
|
-
|
|
10
|
+
include Concerns::VisionSupport
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
def calculate_tokens(content)
|
|
12
|
-
case content
|
|
13
|
-
when String
|
|
14
|
-
super(content)
|
|
15
|
-
when Hash
|
|
16
|
-
# For multimodal content, count tokens only for text part
|
|
17
|
-
# Note: This is an approximation as images have variable token counts
|
|
18
|
-
text = content[:text] || content['text'] || ''
|
|
19
|
-
super(text)
|
|
20
|
-
when Array
|
|
21
|
-
# For pre-formatted arrays, extract and count text parts
|
|
22
|
-
text_parts = content.select { |part| part[:type] == 'text' || part['type'] == 'text' }
|
|
23
|
-
.map { |part| part[:text] || part['text'] || '' }
|
|
24
|
-
.join(' ')
|
|
25
|
-
super(text_parts)
|
|
26
|
-
else
|
|
27
|
-
super(content.to_s)
|
|
28
|
-
end
|
|
29
|
-
end
|
|
12
|
+
private
|
|
30
13
|
|
|
31
14
|
def generate_content(prompt)
|
|
32
15
|
content = format_content(prompt)
|
|
@@ -61,66 +44,6 @@ module LlmConductor
|
|
|
61
44
|
end
|
|
62
45
|
end
|
|
63
46
|
|
|
64
|
-
# Format content based on whether it's a simple string or multimodal content
|
|
65
|
-
# @param prompt [String, Hash, Array] The prompt content
|
|
66
|
-
# @return [String, Array] Formatted content for the API
|
|
67
|
-
def format_content(prompt)
|
|
68
|
-
case prompt
|
|
69
|
-
when Hash
|
|
70
|
-
# Handle hash with text and/or images
|
|
71
|
-
format_multimodal_hash(prompt)
|
|
72
|
-
when Array
|
|
73
|
-
# Already formatted as array of content parts
|
|
74
|
-
prompt
|
|
75
|
-
else
|
|
76
|
-
# Simple string prompt
|
|
77
|
-
prompt.to_s
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# Format a hash containing text and/or images into multimodal content array
|
|
82
|
-
# @param prompt_hash [Hash] Hash with :text and/or :images keys
|
|
83
|
-
# @return [Array] Array of content parts for the API
|
|
84
|
-
def format_multimodal_hash(prompt_hash)
|
|
85
|
-
content_parts = []
|
|
86
|
-
|
|
87
|
-
# Add text part if present
|
|
88
|
-
if prompt_hash[:text] || prompt_hash['text']
|
|
89
|
-
text = prompt_hash[:text] || prompt_hash['text']
|
|
90
|
-
content_parts << { type: 'text', text: }
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Add image parts if present
|
|
94
|
-
images = prompt_hash[:images] || prompt_hash['images'] || []
|
|
95
|
-
images = [images] unless images.is_a?(Array)
|
|
96
|
-
|
|
97
|
-
images.each do |image|
|
|
98
|
-
content_parts << format_image_part(image)
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
content_parts
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
# Format an image into the appropriate API structure
|
|
105
|
-
# @param image [String, Hash] Image URL or hash with url/detail keys
|
|
106
|
-
# @return [Hash] Formatted image part for the API
|
|
107
|
-
def format_image_part(image)
|
|
108
|
-
case image
|
|
109
|
-
when String
|
|
110
|
-
# Simple URL string
|
|
111
|
-
{ type: 'image_url', image_url: { url: image } }
|
|
112
|
-
when Hash
|
|
113
|
-
# Hash with url and optional detail level
|
|
114
|
-
{
|
|
115
|
-
type: 'image_url',
|
|
116
|
-
image_url: {
|
|
117
|
-
url: image[:url] || image['url'],
|
|
118
|
-
detail: image[:detail] || image['detail']
|
|
119
|
-
}.compact
|
|
120
|
-
}
|
|
121
|
-
end
|
|
122
|
-
end
|
|
123
|
-
|
|
124
47
|
def client
|
|
125
48
|
@client ||= begin
|
|
126
49
|
config = LlmConductor.configuration.provider_config(:openrouter)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'concerns/vision_support'
|
|
4
|
+
|
|
3
5
|
module LlmConductor
|
|
4
6
|
module Clients
|
|
5
7
|
# Z.ai client implementation for accessing GLM models including GLM-4.5V
|
|
@@ -8,28 +10,9 @@ module LlmConductor
|
|
|
8
10
|
# Note: Z.ai uses OpenAI-compatible API format but with /v4/ path instead of /v1/
|
|
9
11
|
# We use Faraday directly instead of the ruby-openai gem to properly handle the API path
|
|
10
12
|
class ZaiClient < BaseClient
|
|
11
|
-
|
|
13
|
+
include Concerns::VisionSupport
|
|
12
14
|
|
|
13
|
-
|
|
14
|
-
def calculate_tokens(content)
|
|
15
|
-
case content
|
|
16
|
-
when String
|
|
17
|
-
super(content)
|
|
18
|
-
when Hash
|
|
19
|
-
# For multimodal content, count tokens only for text part
|
|
20
|
-
# Note: This is an approximation as images have variable token counts
|
|
21
|
-
text = content[:text] || content['text'] || ''
|
|
22
|
-
super(text)
|
|
23
|
-
when Array
|
|
24
|
-
# For pre-formatted arrays, extract and count text parts
|
|
25
|
-
text_parts = content.select { |part| part[:type] == 'text' || part['type'] == 'text' }
|
|
26
|
-
.map { |part| part[:text] || part['text'] || '' }
|
|
27
|
-
.join(' ')
|
|
28
|
-
super(text_parts)
|
|
29
|
-
else
|
|
30
|
-
super(content.to_s)
|
|
31
|
-
end
|
|
32
|
-
end
|
|
15
|
+
private
|
|
33
16
|
|
|
34
17
|
def generate_content(prompt)
|
|
35
18
|
content = format_content(prompt)
|
|
@@ -67,66 +50,6 @@ module LlmConductor
|
|
|
67
50
|
end
|
|
68
51
|
end
|
|
69
52
|
|
|
70
|
-
# Format content based on whether it's a simple string or multimodal content
|
|
71
|
-
# @param prompt [String, Hash, Array] The prompt content
|
|
72
|
-
# @return [String, Array] Formatted content for the API
|
|
73
|
-
def format_content(prompt)
|
|
74
|
-
case prompt
|
|
75
|
-
when Hash
|
|
76
|
-
# Handle hash with text and/or images
|
|
77
|
-
format_multimodal_hash(prompt)
|
|
78
|
-
when Array
|
|
79
|
-
# Already formatted as array of content parts
|
|
80
|
-
prompt
|
|
81
|
-
else
|
|
82
|
-
# Simple string prompt
|
|
83
|
-
prompt.to_s
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# Format a hash containing text and/or images into multimodal content array
|
|
88
|
-
# @param prompt_hash [Hash] Hash with :text and/or :images keys
|
|
89
|
-
# @return [Array] Array of content parts for the API
|
|
90
|
-
def format_multimodal_hash(prompt_hash)
|
|
91
|
-
content_parts = []
|
|
92
|
-
|
|
93
|
-
# Add text part if present
|
|
94
|
-
if prompt_hash[:text] || prompt_hash['text']
|
|
95
|
-
text = prompt_hash[:text] || prompt_hash['text']
|
|
96
|
-
content_parts << { type: 'text', text: }
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# Add image parts if present
|
|
100
|
-
images = prompt_hash[:images] || prompt_hash['images'] || []
|
|
101
|
-
images = [images] unless images.is_a?(Array)
|
|
102
|
-
|
|
103
|
-
images.each do |image|
|
|
104
|
-
content_parts << format_image_part(image)
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
content_parts
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
# Format an image into the appropriate API structure
|
|
111
|
-
# @param image [String, Hash] Image URL or hash with url/detail keys
|
|
112
|
-
# @return [Hash] Formatted image part for the API
|
|
113
|
-
def format_image_part(image)
|
|
114
|
-
case image
|
|
115
|
-
when String
|
|
116
|
-
# Simple URL string or base64 data
|
|
117
|
-
{ type: 'image_url', image_url: { url: image } }
|
|
118
|
-
when Hash
|
|
119
|
-
# Hash with url and optional detail level
|
|
120
|
-
{
|
|
121
|
-
type: 'image_url',
|
|
122
|
-
image_url: {
|
|
123
|
-
url: image[:url] || image['url'],
|
|
124
|
-
detail: image[:detail] || image['detail']
|
|
125
|
-
}.compact
|
|
126
|
-
}
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
|
|
130
53
|
# HTTP client for making requests to Z.ai API
|
|
131
54
|
# Z.ai uses /v4/ in their path, not /v1/ like OpenAI, so we use Faraday directly
|
|
132
55
|
def http_client
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm_conductor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ben Zheng
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2025-
|
|
10
|
+
date: 2025-11-13 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: activesupport
|
|
@@ -154,8 +154,11 @@ files:
|
|
|
154
154
|
- Rakefile
|
|
155
155
|
- VISION_USAGE.md
|
|
156
156
|
- config/initializers/llm_conductor.rb
|
|
157
|
+
- examples/claude_vision_usage.rb
|
|
157
158
|
- examples/data_builder_usage.rb
|
|
158
159
|
- examples/gemini_usage.rb
|
|
160
|
+
- examples/gemini_vision_usage.rb
|
|
161
|
+
- examples/gpt_vision_usage.rb
|
|
159
162
|
- examples/groq_usage.rb
|
|
160
163
|
- examples/openrouter_vision_usage.rb
|
|
161
164
|
- examples/prompt_registration.rb
|
|
@@ -166,6 +169,7 @@ files:
|
|
|
166
169
|
- lib/llm_conductor/client_factory.rb
|
|
167
170
|
- lib/llm_conductor/clients/anthropic_client.rb
|
|
168
171
|
- lib/llm_conductor/clients/base_client.rb
|
|
172
|
+
- lib/llm_conductor/clients/concerns/vision_support.rb
|
|
169
173
|
- lib/llm_conductor/clients/gemini_client.rb
|
|
170
174
|
- lib/llm_conductor/clients/gpt_client.rb
|
|
171
175
|
- lib/llm_conductor/clients/groq_client.rb
|