llm_conductor 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VISION_USAGE.md +60 -2
- data/examples/gemini_usage.rb +1 -1
- data/examples/gemini_vision_usage.rb +168 -0
- data/lib/llm_conductor/clients/gemini_client.rb +105 -1
- data/lib/llm_conductor/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e5bb3310ea1328acac93c59e7bc227e63de52397e51402eee0eb921eb92acc8
|
|
4
|
+
data.tar.gz: cacb73f7d04e46a100581df3b77781a98d0d4277fc726a0b32c266443999f66a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 97d9c89718834420532c391c207790b416048b65958f3b6d7feac008f099cc533c644c614a014bc437b1631cf9fa60c2d0e340b8bea8316217e950af5449764b
|
|
7
|
+
data.tar.gz: 4fb3301001cebc258485568ebfa0078b3773b3563dd5149c8cd7527cc9a306ae41785fea6a73f09d1dca19a09f1af92b88611efa283e80efed317ef876f59330
|
data/VISION_USAGE.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Vision/Multimodal Usage Guide
|
|
2
2
|
|
|
3
|
-
This guide explains how to use vision/multimodal capabilities with LLM Conductor. Vision support is available for Claude (Anthropic), GPT (OpenAI), OpenRouter, and Z.ai clients.
|
|
3
|
+
This guide explains how to use vision/multimodal capabilities with LLM Conductor. Vision support is available for Claude (Anthropic), GPT (OpenAI), Gemini (Google), OpenRouter, and Z.ai clients.
|
|
4
4
|
|
|
5
5
|
## Quick Start
|
|
6
6
|
|
|
@@ -73,6 +73,29 @@ response = LlmConductor.generate(
|
|
|
73
73
|
puts response.output
|
|
74
74
|
```
|
|
75
75
|
|
|
76
|
+
### Using Gemini (Google)
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
require 'llm_conductor'
|
|
80
|
+
|
|
81
|
+
# Configure
|
|
82
|
+
LlmConductor.configure do |config|
|
|
83
|
+
config.gemini(api_key: ENV['GEMINI_API_KEY'])
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Analyze an image
|
|
87
|
+
response = LlmConductor.generate(
|
|
88
|
+
model: 'gemini-2.5-flash',
|
|
89
|
+
vendor: :gemini,
|
|
90
|
+
prompt: {
|
|
91
|
+
text: 'What is in this image?',
|
|
92
|
+
images: 'https://cdn.autonomous.ai/production/ecm/230930/10-Comfortable-Office-Chairs-for-Gaming-A-Comprehensive-Review00002.webp'
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
puts response.output
|
|
97
|
+
```
|
|
98
|
+
|
|
76
99
|
### Using Z.ai (Zhipu AI)
|
|
77
100
|
|
|
78
101
|
```ruby
|
|
@@ -124,6 +147,17 @@ For vision tasks via OpenRouter, these models work reliably:
|
|
|
124
147
|
- **`anthropic/claude-3.5-sonnet`** - High quality analysis
|
|
125
148
|
- **`openai/gpt-4o`** - Best quality (higher cost)
|
|
126
149
|
|
|
150
|
+
### Gemini Models (Google)
|
|
151
|
+
|
|
152
|
+
For vision tasks via Google Gemini API:
|
|
153
|
+
|
|
154
|
+
- **`gemini-2.0-flash`** - Gemini 2.0 Flash (fast, efficient, multimodal) ✅
|
|
155
|
+
- **`gemini-2.5-flash`** - Gemini 2.5 Flash (latest fast model)
|
|
156
|
+
- **`gemini-1.5-pro`** - Gemini 1.5 Pro (high quality, large context window)
|
|
157
|
+
- **`gemini-1.5-flash`** - Gemini 1.5 Flash (previous generation fast model)
|
|
158
|
+
|
|
159
|
+
**Note:** Gemini client automatically fetches images from URLs and encodes them as base64, as required by the Gemini API.
|
|
160
|
+
|
|
127
161
|
### Z.ai Models (Zhipu AI)
|
|
128
162
|
|
|
129
163
|
For vision tasks via Z.ai, these GLM models are recommended:
|
|
@@ -186,7 +220,7 @@ Detail levels (GPT and OpenRouter only):
|
|
|
186
220
|
- `'low'` - Faster, cheaper (default if not specified)
|
|
187
221
|
- `'auto'` - Let the model decide
|
|
188
222
|
|
|
189
|
-
**Note:** Claude (Anthropic) and Z.ai don't support the `detail` parameter.
|
|
223
|
+
**Note:** Claude (Anthropic), Gemini (Google), and Z.ai don't support the `detail` parameter.
|
|
190
224
|
|
|
191
225
|
### 4. Raw Format (Advanced)
|
|
192
226
|
|
|
@@ -217,6 +251,18 @@ response = LlmConductor.generate(
|
|
|
217
251
|
)
|
|
218
252
|
```
|
|
219
253
|
|
|
254
|
+
**Gemini Format:**
|
|
255
|
+
```ruby
|
|
256
|
+
response = LlmConductor.generate(
|
|
257
|
+
model: 'gemini-2.0-flash',
|
|
258
|
+
vendor: :gemini,
|
|
259
|
+
prompt: [
|
|
260
|
+
{ type: 'text', text: 'What is in this image? Describe it in detail.' },
|
|
261
|
+
{ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }
|
|
262
|
+
]
|
|
263
|
+
)
|
|
264
|
+
```
|
|
265
|
+
|
|
220
266
|
## Text-Only Requests (Backward Compatible)
|
|
221
267
|
|
|
222
268
|
The client still supports regular text-only requests:
|
|
@@ -236,6 +282,10 @@ response = LlmConductor.generate(
|
|
|
236
282
|
- Maximum file size depends on the model
|
|
237
283
|
- Use HTTPS URLs when possible
|
|
238
284
|
|
|
285
|
+
**Provider-Specific Notes:**
|
|
286
|
+
- **Gemini**: URLs are automatically fetched and base64-encoded by the client before sending to the API
|
|
287
|
+
- **Claude, GPT, OpenRouter, Z.ai**: URLs are sent directly to the API (no preprocessing required)
|
|
288
|
+
|
|
239
289
|
## Error Handling
|
|
240
290
|
|
|
241
291
|
```ruby
|
|
@@ -300,6 +350,12 @@ export OPENROUTER_API_KEY='your-key'
|
|
|
300
350
|
ruby examples/openrouter_vision_usage.rb
|
|
301
351
|
```
|
|
302
352
|
|
|
353
|
+
For Gemini:
|
|
354
|
+
```bash
|
|
355
|
+
export GEMINI_API_KEY='your-key'
|
|
356
|
+
ruby examples/gemini_vision_usage.rb
|
|
357
|
+
```
|
|
358
|
+
|
|
303
359
|
For Z.ai:
|
|
304
360
|
```bash
|
|
305
361
|
export ZAI_API_KEY='your-key'
|
|
@@ -357,6 +413,7 @@ For production:
|
|
|
357
413
|
|
|
358
414
|
- `examples/claude_vision_usage.rb` - Complete Claude vision examples with Claude Sonnet 4
|
|
359
415
|
- `examples/gpt_vision_usage.rb` - Complete GPT vision examples with GPT-4o
|
|
416
|
+
- `examples/gemini_vision_usage.rb` - Complete Gemini vision examples with Gemini 2.0 Flash
|
|
360
417
|
- `examples/openrouter_vision_usage.rb` - Complete OpenRouter vision examples
|
|
361
418
|
- `examples/zai_usage.rb` - Complete Z.ai GLM-4.5V examples including vision and text
|
|
362
419
|
|
|
@@ -365,6 +422,7 @@ For production:
|
|
|
365
422
|
- [OpenRouter Documentation](https://openrouter.ai/docs)
|
|
366
423
|
- [OpenAI Vision API Reference](https://platform.openai.com/docs/guides/vision)
|
|
367
424
|
- [Anthropic Claude Vision](https://docs.anthropic.com/claude/docs/vision)
|
|
425
|
+
- [Google Gemini API Documentation](https://ai.google.dev/docs)
|
|
368
426
|
- [Z.ai API Platform](https://api.z.ai/)
|
|
369
427
|
- [GLM-4.5V Documentation](https://bigmodel.cn/)
|
|
370
428
|
|
data/examples/gemini_usage.rb
CHANGED
|
@@ -4,7 +4,7 @@ require_relative '../lib/llm_conductor'
|
|
|
4
4
|
|
|
5
5
|
# Configure Gemini API key
|
|
6
6
|
LlmConductor.configure do |config|
|
|
7
|
-
config.
|
|
7
|
+
config.gemini(api_key: ENV['GEMINI_API_KEY'] || 'your_gemini_api_key_here')
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
# Example usage
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../lib/llm_conductor'
|
|
4
|
+
|
|
5
|
+
# Configure Gemini API key
|
|
6
|
+
LlmConductor.configure do |config|
|
|
7
|
+
config.gemini(api_key: ENV['GEMINI_API_KEY'] || 'your_gemini_api_key_here')
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
puts '=' * 80
|
|
11
|
+
puts 'Google Gemini Vision Examples'
|
|
12
|
+
puts '=' * 80
|
|
13
|
+
puts
|
|
14
|
+
|
|
15
|
+
# Example 1: Single image analysis (simple format)
|
|
16
|
+
puts 'Example 1: Single Image Analysis'
|
|
17
|
+
puts '-' * 40
|
|
18
|
+
|
|
19
|
+
response = LlmConductor.generate(
|
|
20
|
+
model: 'gemini-2.0-flash',
|
|
21
|
+
vendor: :gemini,
|
|
22
|
+
prompt: {
|
|
23
|
+
text: 'What is in this image? Describe it in detail.',
|
|
24
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
25
|
+
}
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
puts "Model: #{response.model}"
|
|
29
|
+
puts "Vendor: #{response.metadata[:vendor]}"
|
|
30
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
31
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
32
|
+
puts "\nResponse:"
|
|
33
|
+
puts response.output
|
|
34
|
+
puts
|
|
35
|
+
|
|
36
|
+
# Example 2: Multiple images comparison
|
|
37
|
+
puts '=' * 80
|
|
38
|
+
puts 'Example 2: Multiple Images Comparison'
|
|
39
|
+
puts '-' * 40
|
|
40
|
+
|
|
41
|
+
response = LlmConductor.generate(
|
|
42
|
+
model: 'gemini-2.0-flash',
|
|
43
|
+
vendor: :gemini,
|
|
44
|
+
prompt: {
|
|
45
|
+
text: 'Compare these images. What are the main differences?',
|
|
46
|
+
images: [
|
|
47
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
48
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/681px-Placeholder_view_vector.svg.png'
|
|
49
|
+
]
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
puts "Model: #{response.model}"
|
|
54
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
55
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
56
|
+
puts "\nResponse:"
|
|
57
|
+
puts response.output
|
|
58
|
+
puts
|
|
59
|
+
|
|
60
|
+
# Example 3: Raw format with Gemini-specific structure
|
|
61
|
+
puts '=' * 80
|
|
62
|
+
puts 'Example 3: Raw Format (Gemini-specific)'
|
|
63
|
+
puts '-' * 40
|
|
64
|
+
|
|
65
|
+
response = LlmConductor.generate(
|
|
66
|
+
model: 'gemini-2.0-flash',
|
|
67
|
+
vendor: :gemini,
|
|
68
|
+
prompt: [
|
|
69
|
+
{ type: 'text', text: 'Analyze this nature scene:' },
|
|
70
|
+
{ type: 'image_url', image_url: { url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg' } },
|
|
71
|
+
{ type: 'text', text: 'What time of day do you think this photo was taken?' }
|
|
72
|
+
]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
puts "Model: #{response.model}"
|
|
76
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
77
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
78
|
+
puts "\nResponse:"
|
|
79
|
+
puts response.output
|
|
80
|
+
puts
|
|
81
|
+
|
|
82
|
+
# Example 4: Image with specific analysis request
|
|
83
|
+
puts '=' * 80
|
|
84
|
+
puts 'Example 4: Specific Analysis Request'
|
|
85
|
+
puts '-' * 40
|
|
86
|
+
|
|
87
|
+
response = LlmConductor.generate(
|
|
88
|
+
model: 'gemini-2.0-flash',
|
|
89
|
+
vendor: :gemini,
|
|
90
|
+
prompt: {
|
|
91
|
+
text: 'Count the number of distinct colors visible in this image and list them.',
|
|
92
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
puts "Model: #{response.model}"
|
|
97
|
+
puts "\nResponse:"
|
|
98
|
+
puts response.output
|
|
99
|
+
puts
|
|
100
|
+
|
|
101
|
+
# Example 5: Error handling
|
|
102
|
+
puts '=' * 80
|
|
103
|
+
puts 'Example 5: Error Handling'
|
|
104
|
+
puts '-' * 40
|
|
105
|
+
|
|
106
|
+
begin
|
|
107
|
+
response = LlmConductor.generate(
|
|
108
|
+
model: 'gemini-2.0-flash',
|
|
109
|
+
vendor: :gemini,
|
|
110
|
+
prompt: {
|
|
111
|
+
text: 'What is in this image?',
|
|
112
|
+
images: 'https://example.com/nonexistent-image.jpg'
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if response.success?
|
|
117
|
+
puts 'Success! Response:'
|
|
118
|
+
puts response.output
|
|
119
|
+
else
|
|
120
|
+
puts "Request failed: #{response.metadata[:error]}"
|
|
121
|
+
end
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
puts "Error occurred: #{e.message}"
|
|
124
|
+
end
|
|
125
|
+
puts
|
|
126
|
+
|
|
127
|
+
# Example 6: Text-only request (backward compatibility)
|
|
128
|
+
puts '=' * 80
|
|
129
|
+
puts 'Example 6: Text-Only Request (No Images)'
|
|
130
|
+
puts '-' * 40
|
|
131
|
+
|
|
132
|
+
response = LlmConductor.generate(
|
|
133
|
+
model: 'gemini-2.0-flash',
|
|
134
|
+
vendor: :gemini,
|
|
135
|
+
prompt: 'Explain how neural networks work in 3 sentences.'
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
puts "Model: #{response.model}"
|
|
139
|
+
puts "Input tokens: #{response.input_tokens}"
|
|
140
|
+
puts "Output tokens: #{response.output_tokens}"
|
|
141
|
+
puts "\nResponse:"
|
|
142
|
+
puts response.output
|
|
143
|
+
puts
|
|
144
|
+
|
|
145
|
+
# Example 7: Image with hash format (URL specified explicitly)
|
|
146
|
+
puts '=' * 80
|
|
147
|
+
puts 'Example 7: Image Hash Format'
|
|
148
|
+
puts '-' * 40
|
|
149
|
+
|
|
150
|
+
response = LlmConductor.generate(
|
|
151
|
+
model: 'gemini-2.0-flash',
|
|
152
|
+
vendor: :gemini,
|
|
153
|
+
prompt: {
|
|
154
|
+
text: 'Describe the mood and atmosphere of this image.',
|
|
155
|
+
images: [
|
|
156
|
+
{ url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg' }
|
|
157
|
+
]
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
puts "Model: #{response.model}"
|
|
162
|
+
puts "\nResponse:"
|
|
163
|
+
puts response.output
|
|
164
|
+
puts
|
|
165
|
+
|
|
166
|
+
puts '=' * 80
|
|
167
|
+
puts 'Examples completed!'
|
|
168
|
+
puts '=' * 80
|
|
@@ -1,17 +1,27 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'gemini-ai'
|
|
4
|
+
require 'base64'
|
|
5
|
+
require 'net/http'
|
|
6
|
+
require 'uri'
|
|
7
|
+
require_relative 'concerns/vision_support'
|
|
4
8
|
|
|
5
9
|
module LlmConductor
|
|
6
10
|
module Clients
|
|
7
11
|
# Google Gemini client implementation for accessing Gemini models via Google AI API
|
|
12
|
+
# Supports both text-only and multimodal (vision) requests
|
|
8
13
|
class GeminiClient < BaseClient
|
|
14
|
+
include Concerns::VisionSupport
|
|
15
|
+
|
|
9
16
|
private
|
|
10
17
|
|
|
11
18
|
def generate_content(prompt)
|
|
19
|
+
content = format_content(prompt)
|
|
20
|
+
parts = build_parts_for_gemini(content)
|
|
21
|
+
|
|
12
22
|
payload = {
|
|
13
23
|
contents: [
|
|
14
|
-
{ parts:
|
|
24
|
+
{ parts: }
|
|
15
25
|
]
|
|
16
26
|
}
|
|
17
27
|
|
|
@@ -19,6 +29,100 @@ module LlmConductor
|
|
|
19
29
|
response.dig('candidates', 0, 'content', 'parts', 0, 'text')
|
|
20
30
|
end
|
|
21
31
|
|
|
32
|
+
# Build parts array for Gemini API from formatted content
|
|
33
|
+
# Converts VisionSupport format to Gemini's specific format
|
|
34
|
+
# @param content [String, Array] Formatted content from VisionSupport
|
|
35
|
+
# @return [Array] Array of parts in Gemini format
|
|
36
|
+
def build_parts_for_gemini(content)
|
|
37
|
+
case content
|
|
38
|
+
when String
|
|
39
|
+
[{ text: content }]
|
|
40
|
+
when Array
|
|
41
|
+
content.map { |part| convert_to_gemini_part(part) }
|
|
42
|
+
else
|
|
43
|
+
[{ text: content.to_s }]
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Convert a VisionSupport formatted part to Gemini format
|
|
48
|
+
# @param part [Hash] Content part with type and data
|
|
49
|
+
# @return [Hash] Gemini-formatted part
|
|
50
|
+
def convert_to_gemini_part(part)
|
|
51
|
+
case part[:type]
|
|
52
|
+
when 'text'
|
|
53
|
+
{ text: part[:text] }
|
|
54
|
+
when 'image_url'
|
|
55
|
+
convert_image_url_to_inline_data(part)
|
|
56
|
+
when 'inline_data'
|
|
57
|
+
part # Already in Gemini format
|
|
58
|
+
else
|
|
59
|
+
part
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Convert image_url part to Gemini's inline_data format
|
|
64
|
+
# @param part [Hash] Part with image_url
|
|
65
|
+
# @return [Hash] Gemini inline_data format
|
|
66
|
+
def convert_image_url_to_inline_data(part)
|
|
67
|
+
url = part.dig(:image_url, :url)
|
|
68
|
+
{
|
|
69
|
+
inline_data: {
|
|
70
|
+
mime_type: detect_mime_type(url),
|
|
71
|
+
data: fetch_and_encode_image(url)
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Fetch image from URL and encode as base64
|
|
77
|
+
# Gemini API requires images to be base64-encoded
|
|
78
|
+
# @param url [String] Image URL
|
|
79
|
+
# @return [String] Base64-encoded image data
|
|
80
|
+
def fetch_and_encode_image(url)
|
|
81
|
+
uri = URI.parse(url)
|
|
82
|
+
response = fetch_image_from_uri(uri)
|
|
83
|
+
|
|
84
|
+
raise StandardError, "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
85
|
+
|
|
86
|
+
Base64.strict_encode64(response.body)
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
raise StandardError, "Error fetching image from #{url}: #{e.message}"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Fetch image from URI using Net::HTTP
|
|
92
|
+
# @param uri [URI] Parsed URI
|
|
93
|
+
# @return [Net::HTTPResponse] HTTP response
|
|
94
|
+
def fetch_image_from_uri(uri)
|
|
95
|
+
http = create_http_client(uri)
|
|
96
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
|
97
|
+
http.request(request)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Create HTTP client with SSL configuration
|
|
101
|
+
# @param uri [URI] Parsed URI
|
|
102
|
+
# @return [Net::HTTP] Configured HTTP client
|
|
103
|
+
def create_http_client(uri)
|
|
104
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
105
|
+
return http unless uri.scheme == 'https'
|
|
106
|
+
|
|
107
|
+
http.use_ssl = true
|
|
108
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
109
|
+
http
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Detect MIME type from URL file extension
|
|
113
|
+
# @param url [String] Image URL
|
|
114
|
+
# @return [String] MIME type (e.g., 'image/jpeg', 'image/png')
|
|
115
|
+
def detect_mime_type(url)
|
|
116
|
+
extension = File.extname(URI.parse(url).path).downcase
|
|
117
|
+
case extension
|
|
118
|
+
when '.jpg', '.jpeg' then 'image/jpeg'
|
|
119
|
+
when '.png' then 'image/png'
|
|
120
|
+
when '.gif' then 'image/gif'
|
|
121
|
+
when '.webp' then 'image/webp'
|
|
122
|
+
else 'image/jpeg' # Default to jpeg
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
22
126
|
def client
|
|
23
127
|
@client ||= begin
|
|
24
128
|
config = LlmConductor.configuration.provider_config(:gemini)
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm_conductor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ben Zheng
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2025-11-
|
|
10
|
+
date: 2025-11-13 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: activesupport
|
|
@@ -157,6 +157,7 @@ files:
|
|
|
157
157
|
- examples/claude_vision_usage.rb
|
|
158
158
|
- examples/data_builder_usage.rb
|
|
159
159
|
- examples/gemini_usage.rb
|
|
160
|
+
- examples/gemini_vision_usage.rb
|
|
160
161
|
- examples/gpt_vision_usage.rb
|
|
161
162
|
- examples/groq_usage.rb
|
|
162
163
|
- examples/openrouter_vision_usage.rb
|