llm_conductor 1.1.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -1
- data/README.md +87 -3
- data/VISION_USAGE.md +146 -9
- data/examples/claude_vision_usage.rb +138 -0
- data/examples/gpt_vision_usage.rb +156 -0
- data/examples/zai_usage.rb +163 -0
- data/lib/llm_conductor/client_factory.rb +4 -1
- data/lib/llm_conductor/clients/anthropic_client.rb +28 -1
- data/lib/llm_conductor/clients/concerns/vision_support.rb +159 -0
- data/lib/llm_conductor/clients/gpt_client.rb +7 -1
- data/lib/llm_conductor/clients/openrouter_client.rb +4 -81
- data/lib/llm_conductor/clients/zai_client.rb +76 -0
- data/lib/llm_conductor/configuration.rb +17 -0
- data/lib/llm_conductor/prompt_manager.rb +1 -3
- data/lib/llm_conductor/version.rb +1 -1
- data/lib/llm_conductor.rb +5 -3
- metadata +7 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bce592da24b8bb09f9702361a8d2de5051092290dd3b263f0026ddb877a8717b
|
|
4
|
+
data.tar.gz: 364a233ac3b1490010d949e15f83a3c45a5750ed117674ae2498508884cc365a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3ea0a7fc5d5fe1f729e6eb76b9b81eb5b24aaad96ba59ef954637e00184eded4f6fd44c591ee3921f86dd3131403fc496a77b355bd59e60158849c2e3af44511
|
|
7
|
+
data.tar.gz: 322cfca7d9e8917761af1b5de1033d9c11f58fceaa8d79aa18feee6b65050c4ab123c340479d44adeafa42f85b25c9e406e17ffda0af0ed6f871cb3d4d7d682f
|
data/.rubocop.yml
CHANGED
|
@@ -29,11 +29,15 @@ Style/HashSyntax:
|
|
|
29
29
|
Lint/ConstantDefinitionInBlock:
|
|
30
30
|
Enabled: false
|
|
31
31
|
|
|
32
|
+
Metrics/ClassLength:
|
|
33
|
+
Max: 120
|
|
34
|
+
|
|
32
35
|
Metrics/MethodLength:
|
|
33
36
|
Max: 15
|
|
34
37
|
Exclude:
|
|
35
38
|
- 'lib/llm_conductor/prompts.rb'
|
|
36
39
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
40
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
37
41
|
|
|
38
42
|
RSpec/ExampleLength:
|
|
39
43
|
Enabled: false
|
|
@@ -91,19 +95,25 @@ Metrics/AbcSize:
|
|
|
91
95
|
Exclude:
|
|
92
96
|
- 'lib/llm_conductor/prompts.rb'
|
|
93
97
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
98
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
94
99
|
|
|
95
100
|
Metrics/CyclomaticComplexity:
|
|
96
101
|
Exclude:
|
|
102
|
+
- 'lib/llm_conductor.rb'
|
|
97
103
|
- 'lib/llm_conductor/prompts.rb'
|
|
98
104
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
105
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
99
106
|
|
|
100
107
|
Metrics/PerceivedComplexity:
|
|
101
108
|
Exclude:
|
|
102
109
|
- 'lib/llm_conductor/prompts.rb'
|
|
103
110
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
111
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
104
112
|
|
|
105
113
|
Layout/LineLength:
|
|
106
|
-
Max:
|
|
114
|
+
Max: 125
|
|
115
|
+
Exclude:
|
|
116
|
+
- 'examples/*.rb'
|
|
107
117
|
|
|
108
118
|
# Performance cops (from .rubocop_todo.yml)
|
|
109
119
|
Performance/RedundantEqualityComparisonBlock:
|
data/README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# LLM Conductor
|
|
2
2
|
|
|
3
|
-
A powerful Ruby gem from [Ekohe](https://ekohe.com) for orchestrating multiple Language Model providers with a unified, modern interface. LLM Conductor provides seamless integration with OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, and
|
|
3
|
+
A powerful Ruby gem from [Ekohe](https://ekohe.com) for orchestrating multiple Language Model providers with a unified, modern interface. LLM Conductor provides seamless integration with OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, OpenRouter, and Z.ai (Zhipu AI) with advanced prompt management, data building patterns, vision/multimodal support, and comprehensive response handling.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
🚀 **Multi-Provider Support** - OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, and
|
|
7
|
+
🚀 **Multi-Provider Support** - OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, OpenRouter, and Z.ai with automatic vendor detection
|
|
8
8
|
🎯 **Unified Modern API** - Simple `LlmConductor.generate()` interface with rich Response objects
|
|
9
|
-
🖼️ **Vision/Multimodal Support** - Send images alongside text prompts for vision-enabled models (OpenRouter)
|
|
9
|
+
🖼️ **Vision/Multimodal Support** - Send images alongside text prompts for vision-enabled models (OpenRouter, Z.ai GLM-4.5V)
|
|
10
10
|
📝 **Advanced Prompt Management** - Registrable prompt classes with inheritance and templating
|
|
11
11
|
🏗️ **Data Builder Pattern** - Structured data preparation for complex LLM inputs
|
|
12
12
|
⚡ **Smart Configuration** - Rails-style configuration with environment variable support
|
|
@@ -120,6 +120,11 @@ LlmConductor.configure do |config|
|
|
|
120
120
|
uri_base: 'https://openrouter.ai/api/v1' # Optional, this is the default
|
|
121
121
|
)
|
|
122
122
|
|
|
123
|
+
config.zai(
|
|
124
|
+
api_key: ENV['ZAI_API_KEY'],
|
|
125
|
+
uri_base: 'https://api.z.ai/api/paas/v4' # Optional, this is the default
|
|
126
|
+
)
|
|
127
|
+
|
|
123
128
|
# Optional: Configure custom logger
|
|
124
129
|
config.logger = Logger.new($stdout) # Log to stdout
|
|
125
130
|
config.logger = Logger.new('log/llm_conductor.log') # Log to file
|
|
@@ -160,6 +165,7 @@ The gem automatically detects these environment variables:
|
|
|
160
165
|
- `GROQ_API_KEY` - Groq API key
|
|
161
166
|
- `OLLAMA_ADDRESS` - Ollama server address
|
|
162
167
|
- `OPENROUTER_API_KEY` - OpenRouter API key
|
|
168
|
+
- `ZAI_API_KEY` - Z.ai (Zhipu AI) API key
|
|
163
169
|
|
|
164
170
|
## Supported Providers & Models
|
|
165
171
|
|
|
@@ -309,6 +315,81 @@ LlmConductor.configure do |config|
|
|
|
309
315
|
end
|
|
310
316
|
```
|
|
311
317
|
|
|
318
|
+
### Z.ai (Zhipu AI) - GLM Models with Vision Support
|
|
319
|
+
Z.ai provides access to GLM (General Language Model) series including the powerful GLM-4.5V multimodal model with 64K context window and vision capabilities.
|
|
320
|
+
|
|
321
|
+
**Text models:**
|
|
322
|
+
- `glm-4-plus` - Enhanced text-only model
|
|
323
|
+
- `glm-4` - Standard GLM-4 model
|
|
324
|
+
|
|
325
|
+
**Vision-capable models:**
|
|
326
|
+
- `glm-4.5v` - Latest multimodal model with 64K context ✅ **RECOMMENDED**
|
|
327
|
+
- `glm-4v` - Previous generation vision model
|
|
328
|
+
|
|
329
|
+
```ruby
|
|
330
|
+
# Text-only request with GLM-4-plus
|
|
331
|
+
response = LlmConductor.generate(
|
|
332
|
+
model: 'glm-4-plus',
|
|
333
|
+
vendor: :zai,
|
|
334
|
+
prompt: 'Explain quantum computing in simple terms'
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Vision request with GLM-4.5V - single image
|
|
338
|
+
response = LlmConductor.generate(
|
|
339
|
+
model: 'glm-4.5v',
|
|
340
|
+
vendor: :zai,
|
|
341
|
+
prompt: {
|
|
342
|
+
text: 'What is in this image?',
|
|
343
|
+
images: 'https://example.com/image.jpg'
|
|
344
|
+
}
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# Vision request with multiple images
|
|
348
|
+
response = LlmConductor.generate(
|
|
349
|
+
model: 'glm-4.5v',
|
|
350
|
+
vendor: :zai,
|
|
351
|
+
prompt: {
|
|
352
|
+
text: 'Compare these images and identify differences',
|
|
353
|
+
images: [
|
|
354
|
+
'https://example.com/image1.jpg',
|
|
355
|
+
'https://example.com/image2.jpg'
|
|
356
|
+
]
|
|
357
|
+
}
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Vision request with detail level
|
|
361
|
+
response = LlmConductor.generate(
|
|
362
|
+
model: 'glm-4.5v',
|
|
363
|
+
vendor: :zai,
|
|
364
|
+
prompt: {
|
|
365
|
+
text: 'Analyze this document in detail',
|
|
366
|
+
images: [
|
|
367
|
+
{ url: 'https://example.com/document.jpg', detail: 'high' }
|
|
368
|
+
]
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Base64 encoded local images
|
|
373
|
+
require 'base64'
|
|
374
|
+
image_data = Base64.strict_encode64(File.read('path/to/image.jpg'))
|
|
375
|
+
response = LlmConductor.generate(
|
|
376
|
+
model: 'glm-4.5v',
|
|
377
|
+
vendor: :zai,
|
|
378
|
+
prompt: {
|
|
379
|
+
text: 'What is in this image?',
|
|
380
|
+
images: "data:image/jpeg;base64,#{image_data}"
|
|
381
|
+
}
|
|
382
|
+
)
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
**GLM-4.5V Features:**
|
|
386
|
+
- 64K token context window
|
|
387
|
+
- Multimodal understanding (text + images)
|
|
388
|
+
- Document understanding and OCR
|
|
389
|
+
- Image reasoning and analysis
|
|
390
|
+
- Base64 image support for local files
|
|
391
|
+
- OpenAI-compatible API format
|
|
392
|
+
|
|
312
393
|
### Vendor Detection
|
|
313
394
|
|
|
314
395
|
The gem automatically detects the appropriate provider based on model names:
|
|
@@ -316,6 +397,7 @@ The gem automatically detects the appropriate provider based on model names:
|
|
|
316
397
|
- **OpenAI**: Models starting with `gpt-` (e.g., `gpt-4`, `gpt-3.5-turbo`)
|
|
317
398
|
- **Anthropic**: Models starting with `claude-` (e.g., `claude-3-5-sonnet-20241022`)
|
|
318
399
|
- **Google Gemini**: Models starting with `gemini-` (e.g., `gemini-2.5-flash`, `gemini-2.0-flash`)
|
|
400
|
+
- **Z.ai**: Models starting with `glm-` (e.g., `glm-4.5v`, `glm-4-plus`, `glm-4v`)
|
|
319
401
|
- **Groq**: Models starting with `llama`, `mixtral`, `gemma`, or `qwen` (e.g., `llama-3.1-70b-versatile`, `mixtral-8x7b-32768`, `gemma-7b-it`, `qwen-2.5-72b-instruct`)
|
|
320
402
|
- **Ollama**: All other models (e.g., `llama3.2`, `mistral`, `codellama`)
|
|
321
403
|
|
|
@@ -569,6 +651,8 @@ Check the `/examples` directory for comprehensive usage examples:
|
|
|
569
651
|
- `rag_usage.rb` - RAG implementation examples
|
|
570
652
|
- `gemini_usage.rb` - Google Gemini integration
|
|
571
653
|
- `groq_usage.rb` - Groq integration with various models
|
|
654
|
+
- `openrouter_vision_usage.rb` - OpenRouter vision/multimodal examples
|
|
655
|
+
- `zai_usage.rb` - Z.ai GLM-4.5V vision and text examples
|
|
572
656
|
|
|
573
657
|
## Development
|
|
574
658
|
|
data/VISION_USAGE.md
CHANGED
|
@@ -1,9 +1,57 @@
|
|
|
1
1
|
# Vision/Multimodal Usage Guide
|
|
2
2
|
|
|
3
|
-
This guide explains how to use vision/multimodal capabilities with
|
|
3
|
+
This guide explains how to use vision/multimodal capabilities with LLM Conductor. Vision support is available for Claude (Anthropic), GPT (OpenAI), OpenRouter, and Z.ai clients.
|
|
4
4
|
|
|
5
5
|
## Quick Start
|
|
6
6
|
|
|
7
|
+
### Using Claude (Anthropic)
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
require 'llm_conductor'
|
|
11
|
+
|
|
12
|
+
# Configure
|
|
13
|
+
LlmConductor.configure do |config|
|
|
14
|
+
config.anthropic(api_key: ENV['ANTHROPIC_API_KEY'])
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Analyze an image
|
|
18
|
+
response = LlmConductor.generate(
|
|
19
|
+
model: 'claude-sonnet-4-20250514',
|
|
20
|
+
vendor: :anthropic,
|
|
21
|
+
prompt: {
|
|
22
|
+
text: 'What is in this image?',
|
|
23
|
+
images: 'https://example.com/image.jpg'
|
|
24
|
+
}
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
puts response.output
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Using GPT (OpenAI)
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
require 'llm_conductor'
|
|
34
|
+
|
|
35
|
+
# Configure
|
|
36
|
+
LlmConductor.configure do |config|
|
|
37
|
+
config.openai(api_key: ENV['OPENAI_API_KEY'])
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Analyze an image
|
|
41
|
+
response = LlmConductor.generate(
|
|
42
|
+
model: 'gpt-4o',
|
|
43
|
+
vendor: :openai,
|
|
44
|
+
prompt: {
|
|
45
|
+
text: 'What is in this image?',
|
|
46
|
+
images: 'https://example.com/image.jpg'
|
|
47
|
+
}
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
puts response.output
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Using OpenRouter
|
|
54
|
+
|
|
7
55
|
```ruby
|
|
8
56
|
require 'llm_conductor'
|
|
9
57
|
|
|
@@ -25,8 +73,50 @@ response = LlmConductor.generate(
|
|
|
25
73
|
puts response.output
|
|
26
74
|
```
|
|
27
75
|
|
|
76
|
+
### Using Z.ai (Zhipu AI)
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
require 'llm_conductor'
|
|
80
|
+
|
|
81
|
+
# Configure
|
|
82
|
+
LlmConductor.configure do |config|
|
|
83
|
+
config.zai(api_key: ENV['ZAI_API_KEY'])
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Analyze an image with GLM-4.5V
|
|
87
|
+
response = LlmConductor.generate(
|
|
88
|
+
model: 'glm-4.5v',
|
|
89
|
+
vendor: :zai,
|
|
90
|
+
prompt: {
|
|
91
|
+
text: 'What is in this image?',
|
|
92
|
+
images: 'https://example.com/image.jpg'
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
puts response.output
|
|
97
|
+
```
|
|
98
|
+
|
|
28
99
|
## Recommended Models
|
|
29
100
|
|
|
101
|
+
### Claude Models (Anthropic)
|
|
102
|
+
|
|
103
|
+
For vision tasks via Anthropic API:
|
|
104
|
+
|
|
105
|
+
- **`claude-sonnet-4-20250514`** - Claude Sonnet 4 (latest, best for vision) ✅
|
|
106
|
+
- **`claude-opus-4-20250514`** - Claude Opus 4 (maximum quality)
|
|
107
|
+
- **`claude-opus-4-1-20250805`** - Claude Opus 4.1 (newest flagship model)
|
|
108
|
+
|
|
109
|
+
### GPT Models (OpenAI)
|
|
110
|
+
|
|
111
|
+
For vision tasks via OpenAI API:
|
|
112
|
+
|
|
113
|
+
- **`gpt-4o`** - Latest GPT-4 Omni with advanced vision capabilities ✅
|
|
114
|
+
- **`gpt-4o-mini`** - Fast, cost-effective vision model
|
|
115
|
+
- **`gpt-4-turbo`** - Previous generation with vision support
|
|
116
|
+
- **`gpt-4-vision-preview`** - Legacy vision model (deprecated)
|
|
117
|
+
|
|
118
|
+
### OpenRouter Models
|
|
119
|
+
|
|
30
120
|
For vision tasks via OpenRouter, these models work reliably:
|
|
31
121
|
|
|
32
122
|
- **`openai/gpt-4o-mini`** - Fast, reliable, good balance of cost/quality ✅
|
|
@@ -34,6 +124,14 @@ For vision tasks via OpenRouter, these models work reliably:
|
|
|
34
124
|
- **`anthropic/claude-3.5-sonnet`** - High quality analysis
|
|
35
125
|
- **`openai/gpt-4o`** - Best quality (higher cost)
|
|
36
126
|
|
|
127
|
+
### Z.ai Models (Zhipu AI)
|
|
128
|
+
|
|
129
|
+
For vision tasks via Z.ai, these GLM models are recommended:
|
|
130
|
+
|
|
131
|
+
- **`glm-4.5v`** - GLM-4.5V multimodal model (64K context window) ✅
|
|
132
|
+
- **`glm-4-plus`** - Text-only model with enhanced capabilities
|
|
133
|
+
- **`glm-4v`** - Previous generation vision model
|
|
134
|
+
|
|
37
135
|
## Usage Formats
|
|
38
136
|
|
|
39
137
|
### 1. Single Image (Simple Format)
|
|
@@ -68,12 +166,12 @@ response = LlmConductor.generate(
|
|
|
68
166
|
|
|
69
167
|
### 3. Image with Detail Level
|
|
70
168
|
|
|
71
|
-
For high-resolution images, specify the detail level:
|
|
169
|
+
For high-resolution images, specify the detail level (supported by GPT and OpenRouter):
|
|
72
170
|
|
|
73
171
|
```ruby
|
|
74
172
|
response = LlmConductor.generate(
|
|
75
|
-
model: '
|
|
76
|
-
vendor: :
|
|
173
|
+
model: 'gpt-4o',
|
|
174
|
+
vendor: :openai,
|
|
77
175
|
prompt: {
|
|
78
176
|
text: 'Analyze this image in detail',
|
|
79
177
|
images: [
|
|
@@ -83,19 +181,22 @@ response = LlmConductor.generate(
|
|
|
83
181
|
)
|
|
84
182
|
```
|
|
85
183
|
|
|
86
|
-
Detail levels:
|
|
184
|
+
Detail levels (GPT and OpenRouter only):
|
|
87
185
|
- `'high'` - Better for detailed analysis (uses more tokens)
|
|
88
186
|
- `'low'` - Faster, cheaper (default if not specified)
|
|
89
187
|
- `'auto'` - Let the model decide
|
|
90
188
|
|
|
189
|
+
**Note:** Claude (Anthropic) and Z.ai don't support the `detail` parameter.
|
|
190
|
+
|
|
91
191
|
### 4. Raw Format (Advanced)
|
|
92
192
|
|
|
93
|
-
For maximum control, use
|
|
193
|
+
For maximum control, use provider-specific array formats:
|
|
94
194
|
|
|
195
|
+
**GPT/OpenRouter Format:**
|
|
95
196
|
```ruby
|
|
96
197
|
response = LlmConductor.generate(
|
|
97
|
-
model: '
|
|
98
|
-
vendor: :
|
|
198
|
+
model: 'gpt-4o',
|
|
199
|
+
vendor: :openai,
|
|
99
200
|
prompt: [
|
|
100
201
|
{ type: 'text', text: 'What is in this image?' },
|
|
101
202
|
{ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } },
|
|
@@ -104,6 +205,18 @@ response = LlmConductor.generate(
|
|
|
104
205
|
)
|
|
105
206
|
```
|
|
106
207
|
|
|
208
|
+
**Claude Format:**
|
|
209
|
+
```ruby
|
|
210
|
+
response = LlmConductor.generate(
|
|
211
|
+
model: 'claude-sonnet-4-20250514',
|
|
212
|
+
vendor: :anthropic,
|
|
213
|
+
prompt: [
|
|
214
|
+
{ type: 'image', source: { type: 'url', url: 'https://example.com/image.jpg' } },
|
|
215
|
+
{ type: 'text', text: 'What is in this image? Describe it in detail.' }
|
|
216
|
+
]
|
|
217
|
+
)
|
|
218
|
+
```
|
|
219
|
+
|
|
107
220
|
## Text-Only Requests (Backward Compatible)
|
|
108
221
|
|
|
109
222
|
The client still supports regular text-only requests:
|
|
@@ -169,11 +282,30 @@ response = LlmConductor.generate(
|
|
|
169
282
|
|
|
170
283
|
### Run Examples
|
|
171
284
|
|
|
285
|
+
For Claude:
|
|
286
|
+
```bash
|
|
287
|
+
export ANTHROPIC_API_KEY='your-key'
|
|
288
|
+
ruby examples/claude_vision_usage.rb
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
For GPT:
|
|
292
|
+
```bash
|
|
293
|
+
export OPENAI_API_KEY='your-key'
|
|
294
|
+
ruby examples/gpt_vision_usage.rb
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
For OpenRouter:
|
|
172
298
|
```bash
|
|
173
299
|
export OPENROUTER_API_KEY='your-key'
|
|
174
300
|
ruby examples/openrouter_vision_usage.rb
|
|
175
301
|
```
|
|
176
302
|
|
|
303
|
+
For Z.ai:
|
|
304
|
+
```bash
|
|
305
|
+
export ZAI_API_KEY='your-key'
|
|
306
|
+
ruby examples/zai_usage.rb
|
|
307
|
+
```
|
|
308
|
+
|
|
177
309
|
## Token Counting
|
|
178
310
|
|
|
179
311
|
Token counting for multimodal requests counts only the text portion. Image tokens vary by:
|
|
@@ -223,11 +355,16 @@ For production:
|
|
|
223
355
|
|
|
224
356
|
## Examples
|
|
225
357
|
|
|
226
|
-
|
|
358
|
+
- `examples/claude_vision_usage.rb` - Complete Claude vision examples with Claude Sonnet 4
|
|
359
|
+
- `examples/gpt_vision_usage.rb` - Complete GPT vision examples with GPT-4o
|
|
360
|
+
- `examples/openrouter_vision_usage.rb` - Complete OpenRouter vision examples
|
|
361
|
+
- `examples/zai_usage.rb` - Complete Z.ai GLM-4.5V examples including vision and text
|
|
227
362
|
|
|
228
363
|
## Further Reading
|
|
229
364
|
|
|
230
365
|
- [OpenRouter Documentation](https://openrouter.ai/docs)
|
|
231
366
|
- [OpenAI Vision API Reference](https://platform.openai.com/docs/guides/vision)
|
|
232
367
|
- [Anthropic Claude Vision](https://docs.anthropic.com/claude/docs/vision)
|
|
368
|
+
- [Z.ai API Platform](https://api.z.ai/)
|
|
369
|
+
- [GLM-4.5V Documentation](https://bigmodel.cn/)
|
|
233
370
|
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/llm_conductor'
|
|
5
|
+
|
|
6
|
+
# This example demonstrates using Claude Sonnet 4 vision capabilities
|
|
7
|
+
# Set your Anthropic API key: export ANTHROPIC_API_KEY='your-key-here'
|
|
8
|
+
|
|
9
|
+
puts '=' * 80
|
|
10
|
+
puts 'Claude Sonnet 4 Vision Usage Examples'
|
|
11
|
+
puts '=' * 80
|
|
12
|
+
puts
|
|
13
|
+
|
|
14
|
+
# Check for API key
|
|
15
|
+
api_key = ENV['ANTHROPIC_API_KEY']
|
|
16
|
+
if api_key.nil? || api_key.empty?
|
|
17
|
+
puts 'ERROR: ANTHROPIC_API_KEY environment variable is not set!'
|
|
18
|
+
puts
|
|
19
|
+
puts 'Please set your Anthropic API key:'
|
|
20
|
+
puts ' export ANTHROPIC_API_KEY="your-key-here"'
|
|
21
|
+
puts
|
|
22
|
+
puts 'You can get an API key from: https://console.anthropic.com/'
|
|
23
|
+
exit 1
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Configure the client
|
|
27
|
+
LlmConductor.configure do |config|
|
|
28
|
+
config.anthropic(api_key:)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Example 1: Single Image Analysis
|
|
32
|
+
puts "\n1. Single Image Analysis"
|
|
33
|
+
puts '-' * 80
|
|
34
|
+
|
|
35
|
+
begin
|
|
36
|
+
response = LlmConductor.generate(
|
|
37
|
+
model: 'claude-sonnet-4-20250514',
|
|
38
|
+
vendor: :anthropic,
|
|
39
|
+
prompt: {
|
|
40
|
+
text: 'What is in this image? Please describe it in detail.',
|
|
41
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
42
|
+
}
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
puts "Response: #{response.output}"
|
|
46
|
+
puts "Success: #{response.success?}"
|
|
47
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
48
|
+
puts "Metadata: #{response.metadata.inspect}" if response.metadata && !response.metadata.empty?
|
|
49
|
+
rescue StandardError => e
|
|
50
|
+
puts "ERROR: #{e.message}"
|
|
51
|
+
puts "Backtrace: #{e.backtrace.first(5).join("\n")}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Example 2: Multiple Images Comparison
|
|
55
|
+
puts "\n2. Multiple Images Comparison"
|
|
56
|
+
puts '-' * 80
|
|
57
|
+
|
|
58
|
+
response = LlmConductor.generate(
|
|
59
|
+
model: 'claude-sonnet-4-20250514',
|
|
60
|
+
vendor: :anthropic,
|
|
61
|
+
prompt: {
|
|
62
|
+
text: 'Compare these two images. What are the main differences?',
|
|
63
|
+
images: [
|
|
64
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
65
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/1024px-Placeholder_view_vector.svg.png'
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
puts "Response: #{response.output}"
|
|
71
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
72
|
+
|
|
73
|
+
# Example 3: Image with Specific Question
|
|
74
|
+
puts "\n3. Image with Specific Question"
|
|
75
|
+
puts '-' * 80
|
|
76
|
+
|
|
77
|
+
response = LlmConductor.generate(
|
|
78
|
+
model: 'claude-sonnet-4-20250514',
|
|
79
|
+
vendor: :anthropic,
|
|
80
|
+
prompt: {
|
|
81
|
+
text: 'Is there a wooden boardwalk visible in this image? If yes, describe its condition.',
|
|
82
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
puts "Response: #{response.output}"
|
|
87
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
88
|
+
|
|
89
|
+
# Example 4: Raw Format (Advanced)
|
|
90
|
+
puts "\n4. Raw Format (Advanced)"
|
|
91
|
+
puts '-' * 80
|
|
92
|
+
|
|
93
|
+
response = LlmConductor.generate(
|
|
94
|
+
model: 'claude-sonnet-4-20250514',
|
|
95
|
+
vendor: :anthropic,
|
|
96
|
+
prompt: [
|
|
97
|
+
{ type: 'image',
|
|
98
|
+
source: { type: 'url',
|
|
99
|
+
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg' } },
|
|
100
|
+
{ type: 'text', text: 'Describe the weather conditions in this image.' }
|
|
101
|
+
]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
puts "Response: #{response.output}"
|
|
105
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
106
|
+
|
|
107
|
+
# Example 5: Text-Only Request (Backward Compatible)
|
|
108
|
+
puts "\n5. Text-Only Request (Backward Compatible)"
|
|
109
|
+
puts '-' * 80
|
|
110
|
+
|
|
111
|
+
response = LlmConductor.generate(
|
|
112
|
+
model: 'claude-sonnet-4-20250514',
|
|
113
|
+
vendor: :anthropic,
|
|
114
|
+
prompt: 'What is the capital of France?'
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
puts "Response: #{response.output}"
|
|
118
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
119
|
+
|
|
120
|
+
# Example 6: Image Analysis with Detailed Instructions
|
|
121
|
+
puts "\n6. Image Analysis with Detailed Instructions"
|
|
122
|
+
puts '-' * 80
|
|
123
|
+
|
|
124
|
+
response = LlmConductor.generate(
|
|
125
|
+
model: 'claude-sonnet-4-20250514',
|
|
126
|
+
vendor: :anthropic,
|
|
127
|
+
prompt: {
|
|
128
|
+
text: 'Analyze this image and provide: 1) Main subjects, 2) Colors and lighting, 3) Mood or atmosphere, 4) Any notable details',
|
|
129
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1024px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
puts "Response: #{response.output}"
|
|
134
|
+
puts "Tokens: #{response.input_tokens} input, #{response.output_tokens} output"
|
|
135
|
+
|
|
136
|
+
puts "\n#{'=' * 80}"
|
|
137
|
+
puts 'All examples completed successfully!'
|
|
138
|
+
puts '=' * 80
|