llm_conductor 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/README.md +87 -3
- data/VISION_USAGE.md +47 -2
- data/examples/zai_usage.rb +163 -0
- data/lib/llm_conductor/client_factory.rb +4 -1
- data/lib/llm_conductor/clients/zai_client.rb +153 -0
- data/lib/llm_conductor/configuration.rb +17 -0
- data/lib/llm_conductor/prompt_manager.rb +1 -3
- data/lib/llm_conductor/version.rb +1 -1
- data/lib/llm_conductor.rb +5 -3
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c6ed179bb9142839bcc6feab8d06d61c27ff8279406bc7839f6d09ba14cb573f
|
|
4
|
+
data.tar.gz: a8ca32fecd9ac81326f7cefcf482f1b6a110b78ca2168c1c8ccbde5e034becb3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 581da83914c51a3966010d03491c3f57be4ed393bb572f2fdc9d0205f8680f4891f2b058ecf7642ea7bf26bea452a976946b6198d0419afb2e771de3bc112aea
|
|
7
|
+
data.tar.gz: 00eb70033cb739b7236b759a30219eb5eb6b72db7bba6c7ee519b98cf186e799cbf4f8696acf237d19a8fbfcca97dd9a189ce4f3b4f8f3d8a7d9ff1729d7eb86
|
data/.rubocop.yml
CHANGED
|
@@ -29,11 +29,15 @@ Style/HashSyntax:
|
|
|
29
29
|
Lint/ConstantDefinitionInBlock:
|
|
30
30
|
Enabled: false
|
|
31
31
|
|
|
32
|
+
Metrics/ClassLength:
|
|
33
|
+
Max: 120
|
|
34
|
+
|
|
32
35
|
Metrics/MethodLength:
|
|
33
36
|
Max: 15
|
|
34
37
|
Exclude:
|
|
35
38
|
- 'lib/llm_conductor/prompts.rb'
|
|
36
39
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
40
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
37
41
|
|
|
38
42
|
RSpec/ExampleLength:
|
|
39
43
|
Enabled: false
|
|
@@ -91,19 +95,23 @@ Metrics/AbcSize:
|
|
|
91
95
|
Exclude:
|
|
92
96
|
- 'lib/llm_conductor/prompts.rb'
|
|
93
97
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
98
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
94
99
|
|
|
95
100
|
Metrics/CyclomaticComplexity:
|
|
96
101
|
Exclude:
|
|
102
|
+
- 'lib/llm_conductor.rb'
|
|
97
103
|
- 'lib/llm_conductor/prompts.rb'
|
|
98
104
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
105
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
99
106
|
|
|
100
107
|
Metrics/PerceivedComplexity:
|
|
101
108
|
Exclude:
|
|
102
109
|
- 'lib/llm_conductor/prompts.rb'
|
|
103
110
|
- 'lib/llm_conductor/clients/openrouter_client.rb'
|
|
111
|
+
- 'lib/llm_conductor/clients/zai_client.rb'
|
|
104
112
|
|
|
105
113
|
Layout/LineLength:
|
|
106
|
-
Max:
|
|
114
|
+
Max: 125
|
|
107
115
|
|
|
108
116
|
# Performance cops (from .rubocop_todo.yml)
|
|
109
117
|
Performance/RedundantEqualityComparisonBlock:
|
data/README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# LLM Conductor
|
|
2
2
|
|
|
3
|
-
A powerful Ruby gem from [Ekohe](https://ekohe.com) for orchestrating multiple Language Model providers with a unified, modern interface. LLM Conductor provides seamless integration with OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, and
|
|
3
|
+
A powerful Ruby gem from [Ekohe](https://ekohe.com) for orchestrating multiple Language Model providers with a unified, modern interface. LLM Conductor provides seamless integration with OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, OpenRouter, and Z.ai (Zhipu AI) with advanced prompt management, data building patterns, vision/multimodal support, and comprehensive response handling.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
🚀 **Multi-Provider Support** - OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, and
|
|
7
|
+
🚀 **Multi-Provider Support** - OpenAI GPT, Anthropic Claude, Google Gemini, Groq, Ollama, OpenRouter, and Z.ai with automatic vendor detection
|
|
8
8
|
🎯 **Unified Modern API** - Simple `LlmConductor.generate()` interface with rich Response objects
|
|
9
|
-
🖼️ **Vision/Multimodal Support** - Send images alongside text prompts for vision-enabled models (OpenRouter)
|
|
9
|
+
🖼️ **Vision/Multimodal Support** - Send images alongside text prompts for vision-enabled models (OpenRouter, Z.ai GLM-4.5V)
|
|
10
10
|
📝 **Advanced Prompt Management** - Registrable prompt classes with inheritance and templating
|
|
11
11
|
🏗️ **Data Builder Pattern** - Structured data preparation for complex LLM inputs
|
|
12
12
|
⚡ **Smart Configuration** - Rails-style configuration with environment variable support
|
|
@@ -120,6 +120,11 @@ LlmConductor.configure do |config|
|
|
|
120
120
|
uri_base: 'https://openrouter.ai/api/v1' # Optional, this is the default
|
|
121
121
|
)
|
|
122
122
|
|
|
123
|
+
config.zai(
|
|
124
|
+
api_key: ENV['ZAI_API_KEY'],
|
|
125
|
+
uri_base: 'https://api.z.ai/api/paas/v4' # Optional, this is the default
|
|
126
|
+
)
|
|
127
|
+
|
|
123
128
|
# Optional: Configure custom logger
|
|
124
129
|
config.logger = Logger.new($stdout) # Log to stdout
|
|
125
130
|
config.logger = Logger.new('log/llm_conductor.log') # Log to file
|
|
@@ -160,6 +165,7 @@ The gem automatically detects these environment variables:
|
|
|
160
165
|
- `GROQ_API_KEY` - Groq API key
|
|
161
166
|
- `OLLAMA_ADDRESS` - Ollama server address
|
|
162
167
|
- `OPENROUTER_API_KEY` - OpenRouter API key
|
|
168
|
+
- `ZAI_API_KEY` - Z.ai (Zhipu AI) API key
|
|
163
169
|
|
|
164
170
|
## Supported Providers & Models
|
|
165
171
|
|
|
@@ -309,6 +315,81 @@ LlmConductor.configure do |config|
|
|
|
309
315
|
end
|
|
310
316
|
```
|
|
311
317
|
|
|
318
|
+
### Z.ai (Zhipu AI) - GLM Models with Vision Support
|
|
319
|
+
Z.ai provides access to GLM (General Language Model) series including the powerful GLM-4.5V multimodal model with 64K context window and vision capabilities.
|
|
320
|
+
|
|
321
|
+
**Text models:**
|
|
322
|
+
- `glm-4-plus` - Enhanced text-only model
|
|
323
|
+
- `glm-4` - Standard GLM-4 model
|
|
324
|
+
|
|
325
|
+
**Vision-capable models:**
|
|
326
|
+
- `glm-4.5v` - Latest multimodal model with 64K context ✅ **RECOMMENDED**
|
|
327
|
+
- `glm-4v` - Previous generation vision model
|
|
328
|
+
|
|
329
|
+
```ruby
|
|
330
|
+
# Text-only request with GLM-4-plus
|
|
331
|
+
response = LlmConductor.generate(
|
|
332
|
+
model: 'glm-4-plus',
|
|
333
|
+
vendor: :zai,
|
|
334
|
+
prompt: 'Explain quantum computing in simple terms'
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Vision request with GLM-4.5V - single image
|
|
338
|
+
response = LlmConductor.generate(
|
|
339
|
+
model: 'glm-4.5v',
|
|
340
|
+
vendor: :zai,
|
|
341
|
+
prompt: {
|
|
342
|
+
text: 'What is in this image?',
|
|
343
|
+
images: 'https://example.com/image.jpg'
|
|
344
|
+
}
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
# Vision request with multiple images
|
|
348
|
+
response = LlmConductor.generate(
|
|
349
|
+
model: 'glm-4.5v',
|
|
350
|
+
vendor: :zai,
|
|
351
|
+
prompt: {
|
|
352
|
+
text: 'Compare these images and identify differences',
|
|
353
|
+
images: [
|
|
354
|
+
'https://example.com/image1.jpg',
|
|
355
|
+
'https://example.com/image2.jpg'
|
|
356
|
+
]
|
|
357
|
+
}
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Vision request with detail level
|
|
361
|
+
response = LlmConductor.generate(
|
|
362
|
+
model: 'glm-4.5v',
|
|
363
|
+
vendor: :zai,
|
|
364
|
+
prompt: {
|
|
365
|
+
text: 'Analyze this document in detail',
|
|
366
|
+
images: [
|
|
367
|
+
{ url: 'https://example.com/document.jpg', detail: 'high' }
|
|
368
|
+
]
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Base64 encoded local images
|
|
373
|
+
require 'base64'
|
|
374
|
+
image_data = Base64.strict_encode64(File.read('path/to/image.jpg'))
|
|
375
|
+
response = LlmConductor.generate(
|
|
376
|
+
model: 'glm-4.5v',
|
|
377
|
+
vendor: :zai,
|
|
378
|
+
prompt: {
|
|
379
|
+
text: 'What is in this image?',
|
|
380
|
+
images: "data:image/jpeg;base64,#{image_data}"
|
|
381
|
+
}
|
|
382
|
+
)
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
**GLM-4.5V Features:**
|
|
386
|
+
- 64K token context window
|
|
387
|
+
- Multimodal understanding (text + images)
|
|
388
|
+
- Document understanding and OCR
|
|
389
|
+
- Image reasoning and analysis
|
|
390
|
+
- Base64 image support for local files
|
|
391
|
+
- OpenAI-compatible API format
|
|
392
|
+
|
|
312
393
|
### Vendor Detection
|
|
313
394
|
|
|
314
395
|
The gem automatically detects the appropriate provider based on model names:
|
|
@@ -316,6 +397,7 @@ The gem automatically detects the appropriate provider based on model names:
|
|
|
316
397
|
- **OpenAI**: Models starting with `gpt-` (e.g., `gpt-4`, `gpt-3.5-turbo`)
|
|
317
398
|
- **Anthropic**: Models starting with `claude-` (e.g., `claude-3-5-sonnet-20241022`)
|
|
318
399
|
- **Google Gemini**: Models starting with `gemini-` (e.g., `gemini-2.5-flash`, `gemini-2.0-flash`)
|
|
400
|
+
- **Z.ai**: Models starting with `glm-` (e.g., `glm-4.5v`, `glm-4-plus`, `glm-4v`)
|
|
319
401
|
- **Groq**: Models starting with `llama`, `mixtral`, `gemma`, or `qwen` (e.g., `llama-3.1-70b-versatile`, `mixtral-8x7b-32768`, `gemma-7b-it`, `qwen-2.5-72b-instruct`)
|
|
320
402
|
- **Ollama**: All other models (e.g., `llama3.2`, `mistral`, `codellama`)
|
|
321
403
|
|
|
@@ -569,6 +651,8 @@ Check the `/examples` directory for comprehensive usage examples:
|
|
|
569
651
|
- `rag_usage.rb` - RAG implementation examples
|
|
570
652
|
- `gemini_usage.rb` - Google Gemini integration
|
|
571
653
|
- `groq_usage.rb` - Groq integration with various models
|
|
654
|
+
- `openrouter_vision_usage.rb` - OpenRouter vision/multimodal examples
|
|
655
|
+
- `zai_usage.rb` - Z.ai GLM-4.5V vision and text examples
|
|
572
656
|
|
|
573
657
|
## Development
|
|
574
658
|
|
data/VISION_USAGE.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# Vision/Multimodal Usage Guide
|
|
2
2
|
|
|
3
|
-
This guide explains how to use vision/multimodal capabilities with the OpenRouter
|
|
3
|
+
This guide explains how to use vision/multimodal capabilities with the OpenRouter and Z.ai clients in LLM Conductor.
|
|
4
4
|
|
|
5
5
|
## Quick Start
|
|
6
6
|
|
|
7
|
+
### Using OpenRouter
|
|
8
|
+
|
|
7
9
|
```ruby
|
|
8
10
|
require 'llm_conductor'
|
|
9
11
|
|
|
@@ -25,8 +27,33 @@ response = LlmConductor.generate(
|
|
|
25
27
|
puts response.output
|
|
26
28
|
```
|
|
27
29
|
|
|
30
|
+
### Using Z.ai (Zhipu AI)
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
require 'llm_conductor'
|
|
34
|
+
|
|
35
|
+
# Configure
|
|
36
|
+
LlmConductor.configure do |config|
|
|
37
|
+
config.zai(api_key: ENV['ZAI_API_KEY'])
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Analyze an image with GLM-4.5V
|
|
41
|
+
response = LlmConductor.generate(
|
|
42
|
+
model: 'glm-4.5v',
|
|
43
|
+
vendor: :zai,
|
|
44
|
+
prompt: {
|
|
45
|
+
text: 'What is in this image?',
|
|
46
|
+
images: 'https://example.com/image.jpg'
|
|
47
|
+
}
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
puts response.output
|
|
51
|
+
```
|
|
52
|
+
|
|
28
53
|
## Recommended Models
|
|
29
54
|
|
|
55
|
+
### OpenRouter Models
|
|
56
|
+
|
|
30
57
|
For vision tasks via OpenRouter, these models work reliably:
|
|
31
58
|
|
|
32
59
|
- **`openai/gpt-4o-mini`** - Fast, reliable, good balance of cost/quality ✅
|
|
@@ -34,6 +61,14 @@ For vision tasks via OpenRouter, these models work reliably:
|
|
|
34
61
|
- **`anthropic/claude-3.5-sonnet`** - High quality analysis
|
|
35
62
|
- **`openai/gpt-4o`** - Best quality (higher cost)
|
|
36
63
|
|
|
64
|
+
### Z.ai Models (Zhipu AI)
|
|
65
|
+
|
|
66
|
+
For vision tasks via Z.ai, these GLM models are recommended:
|
|
67
|
+
|
|
68
|
+
- **`glm-4.5v`** - GLM-4.5V multimodal model (64K context window) ✅
|
|
69
|
+
- **`glm-4-plus`** - Text-only model with enhanced capabilities
|
|
70
|
+
- **`glm-4v`** - Previous generation vision model
|
|
71
|
+
|
|
37
72
|
## Usage Formats
|
|
38
73
|
|
|
39
74
|
### 1. Single Image (Simple Format)
|
|
@@ -169,11 +204,18 @@ response = LlmConductor.generate(
|
|
|
169
204
|
|
|
170
205
|
### Run Examples
|
|
171
206
|
|
|
207
|
+
For OpenRouter:
|
|
172
208
|
```bash
|
|
173
209
|
export OPENROUTER_API_KEY='your-key'
|
|
174
210
|
ruby examples/openrouter_vision_usage.rb
|
|
175
211
|
```
|
|
176
212
|
|
|
213
|
+
For Z.ai:
|
|
214
|
+
```bash
|
|
215
|
+
export ZAI_API_KEY='your-key'
|
|
216
|
+
ruby examples/zai_usage.rb
|
|
217
|
+
```
|
|
218
|
+
|
|
177
219
|
## Token Counting
|
|
178
220
|
|
|
179
221
|
Token counting for multimodal requests counts only the text portion. Image tokens vary by:
|
|
@@ -223,11 +265,14 @@ For production:
|
|
|
223
265
|
|
|
224
266
|
## Examples
|
|
225
267
|
|
|
226
|
-
|
|
268
|
+
- `examples/openrouter_vision_usage.rb` - Complete OpenRouter vision examples
|
|
269
|
+
- `examples/zai_usage.rb` - Complete Z.ai GLM-4.5V examples including vision and text
|
|
227
270
|
|
|
228
271
|
## Further Reading
|
|
229
272
|
|
|
230
273
|
- [OpenRouter Documentation](https://openrouter.ai/docs)
|
|
231
274
|
- [OpenAI Vision API Reference](https://platform.openai.com/docs/guides/vision)
|
|
232
275
|
- [Anthropic Claude Vision](https://docs.anthropic.com/claude/docs/vision)
|
|
276
|
+
- [Z.ai API Platform](https://api.z.ai/)
|
|
277
|
+
- [GLM-4.5V Documentation](https://bigmodel.cn/)
|
|
233
278
|
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example of Z.ai GLM model usage including multimodal/vision capabilities
|
|
5
|
+
require_relative '../lib/llm_conductor'
|
|
6
|
+
|
|
7
|
+
# Configure Z.ai
|
|
8
|
+
LlmConductor.configure do |config|
|
|
9
|
+
config.zai(
|
|
10
|
+
api_key: ENV['ZAI_API_KEY']
|
|
11
|
+
)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Example 1: Simple text-only request with GLM-4-plus
|
|
15
|
+
puts '=== Example 1: Text-only request with GLM-4-plus ==='
|
|
16
|
+
response = LlmConductor.generate(
|
|
17
|
+
model: 'glm-4-plus',
|
|
18
|
+
vendor: :zai,
|
|
19
|
+
prompt: 'What is the capital of France? Please answer in one sentence.'
|
|
20
|
+
)
|
|
21
|
+
puts response.output
|
|
22
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
23
|
+
|
|
24
|
+
# Example 2: Text request with GLM-4.5V (vision model, text-only mode)
|
|
25
|
+
puts '=== Example 2: Text-only request with GLM-4.5V ==='
|
|
26
|
+
response = LlmConductor.generate(
|
|
27
|
+
model: 'glm-4.5v',
|
|
28
|
+
vendor: :zai,
|
|
29
|
+
prompt: 'Explain the concept of machine learning in simple terms.'
|
|
30
|
+
)
|
|
31
|
+
puts response.output
|
|
32
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
33
|
+
|
|
34
|
+
# Example 3: Vision request with a single image
|
|
35
|
+
puts '=== Example 3: Single image analysis with GLM-4.5V ==='
|
|
36
|
+
response = LlmConductor.generate(
|
|
37
|
+
model: 'glm-4.5v',
|
|
38
|
+
vendor: :zai,
|
|
39
|
+
prompt: {
|
|
40
|
+
text: 'What do you see in this image? Please describe it in detail.',
|
|
41
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
42
|
+
}
|
|
43
|
+
)
|
|
44
|
+
puts response.output
|
|
45
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
46
|
+
|
|
47
|
+
# Example 4: Vision request with multiple images
|
|
48
|
+
puts '=== Example 4: Multiple images comparison with GLM-4.5V ==='
|
|
49
|
+
response = LlmConductor.generate(
|
|
50
|
+
model: 'glm-4.5v',
|
|
51
|
+
vendor: :zai,
|
|
52
|
+
prompt: {
|
|
53
|
+
text: 'Compare these two images and describe the differences you observe.',
|
|
54
|
+
images: [
|
|
55
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
56
|
+
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Placeholder_view_vector.svg/681px-Placeholder_view_vector.svg.png'
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
puts response.output
|
|
61
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
62
|
+
|
|
63
|
+
# Example 5: Image with detail level specification
|
|
64
|
+
puts '=== Example 5: Image with detail level ==='
|
|
65
|
+
response = LlmConductor.generate(
|
|
66
|
+
model: 'glm-4.5v',
|
|
67
|
+
vendor: :zai,
|
|
68
|
+
prompt: {
|
|
69
|
+
text: 'Describe this image in detail, including colors, objects, and atmosphere.',
|
|
70
|
+
images: [
|
|
71
|
+
{
|
|
72
|
+
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
73
|
+
detail: 'high'
|
|
74
|
+
}
|
|
75
|
+
]
|
|
76
|
+
}
|
|
77
|
+
)
|
|
78
|
+
puts response.output
|
|
79
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
80
|
+
|
|
81
|
+
# Example 6: Using raw array format (advanced)
|
|
82
|
+
puts '=== Example 6: Raw array format ==='
|
|
83
|
+
response = LlmConductor.generate(
|
|
84
|
+
model: 'glm-4.5v',
|
|
85
|
+
vendor: :zai,
|
|
86
|
+
prompt: [
|
|
87
|
+
{ type: 'text', text: 'What objects can you identify in this image?' },
|
|
88
|
+
{
|
|
89
|
+
type: 'image_url',
|
|
90
|
+
image_url: {
|
|
91
|
+
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
]
|
|
95
|
+
)
|
|
96
|
+
puts response.output
|
|
97
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
98
|
+
|
|
99
|
+
# Example 7: Base64 encoded image (for local images)
|
|
100
|
+
puts '=== Example 7: Using base64 encoded image ==='
|
|
101
|
+
# NOTE: In real usage, you would read and encode a local file
|
|
102
|
+
# require 'base64'
|
|
103
|
+
# image_data = Base64.strict_encode64(File.read('path/to/image.jpg'))
|
|
104
|
+
# image_url = "data:image/jpeg;base64,#{image_data}"
|
|
105
|
+
|
|
106
|
+
# For this example, we'll use a URL
|
|
107
|
+
response = LlmConductor.generate(
|
|
108
|
+
model: 'glm-4.5v',
|
|
109
|
+
vendor: :zai,
|
|
110
|
+
prompt: {
|
|
111
|
+
text: 'Analyze this image and extract any text you can see.',
|
|
112
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
puts response.output
|
|
116
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
117
|
+
|
|
118
|
+
# Example 8: Error handling
|
|
119
|
+
puts '=== Example 8: Error handling ==='
|
|
120
|
+
begin
|
|
121
|
+
response = LlmConductor.generate(
|
|
122
|
+
model: 'glm-4.5v',
|
|
123
|
+
vendor: :zai,
|
|
124
|
+
prompt: {
|
|
125
|
+
text: 'Analyze this image',
|
|
126
|
+
images: 'invalid-url'
|
|
127
|
+
}
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if response.success?
|
|
131
|
+
puts response.output
|
|
132
|
+
else
|
|
133
|
+
puts "Error: #{response.metadata[:error]}"
|
|
134
|
+
end
|
|
135
|
+
rescue StandardError => e
|
|
136
|
+
puts "Exception: #{e.message}"
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Example 9: Document understanding (OCR)
|
|
140
|
+
puts "\n=== Example 9: Document understanding ==="
|
|
141
|
+
response = LlmConductor.generate(
|
|
142
|
+
model: 'glm-4.5v',
|
|
143
|
+
vendor: :zai,
|
|
144
|
+
prompt: {
|
|
145
|
+
text: 'Please read any text visible in this image and transcribe it.',
|
|
146
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
147
|
+
}
|
|
148
|
+
)
|
|
149
|
+
puts response.output
|
|
150
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
151
|
+
|
|
152
|
+
# Example 10: Complex reasoning with image
|
|
153
|
+
puts '=== Example 10: Complex reasoning with image ==='
|
|
154
|
+
response = LlmConductor.generate(
|
|
155
|
+
model: 'glm-4.5v',
|
|
156
|
+
vendor: :zai,
|
|
157
|
+
prompt: {
|
|
158
|
+
text: 'Based on this image, what time of day do you think it is? Explain your reasoning.',
|
|
159
|
+
images: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
|
|
160
|
+
}
|
|
161
|
+
)
|
|
162
|
+
puts response.output
|
|
163
|
+
puts "Tokens used: #{response.total_tokens}\n\n"
|
|
@@ -19,7 +19,8 @@ module LlmConductor
|
|
|
19
19
|
ollama: Clients::OllamaClient,
|
|
20
20
|
gemini: Clients::GeminiClient,
|
|
21
21
|
google: Clients::GeminiClient,
|
|
22
|
-
groq: Clients::GroqClient
|
|
22
|
+
groq: Clients::GroqClient,
|
|
23
|
+
zai: Clients::ZaiClient
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
client_classes.fetch(vendor) do
|
|
@@ -35,6 +36,8 @@ module LlmConductor
|
|
|
35
36
|
:openai
|
|
36
37
|
when /^gemini/i
|
|
37
38
|
:gemini
|
|
39
|
+
when /^glm/i
|
|
40
|
+
:zai
|
|
38
41
|
when /^(llama|mixtral|gemma|qwen)/i
|
|
39
42
|
:groq
|
|
40
43
|
else
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmConductor
|
|
4
|
+
module Clients
|
|
5
|
+
# Z.ai client implementation for accessing GLM models including GLM-4.5V
|
|
6
|
+
# Supports both text-only and multimodal (vision) requests
|
|
7
|
+
#
|
|
8
|
+
# Note: Z.ai uses OpenAI-compatible API format but with /v4/ path instead of /v1/
|
|
9
|
+
# We use Faraday directly instead of the ruby-openai gem to properly handle the API path
|
|
10
|
+
class ZaiClient < BaseClient
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
# Override token calculation to handle multimodal content
|
|
14
|
+
def calculate_tokens(content)
|
|
15
|
+
case content
|
|
16
|
+
when String
|
|
17
|
+
super(content)
|
|
18
|
+
when Hash
|
|
19
|
+
# For multimodal content, count tokens only for text part
|
|
20
|
+
# Note: This is an approximation as images have variable token counts
|
|
21
|
+
text = content[:text] || content['text'] || ''
|
|
22
|
+
super(text)
|
|
23
|
+
when Array
|
|
24
|
+
# For pre-formatted arrays, extract and count text parts
|
|
25
|
+
text_parts = content.select { |part| part[:type] == 'text' || part['type'] == 'text' }
|
|
26
|
+
.map { |part| part[:text] || part['text'] || '' }
|
|
27
|
+
.join(' ')
|
|
28
|
+
super(text_parts)
|
|
29
|
+
else
|
|
30
|
+
super(content.to_s)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def generate_content(prompt)
|
|
35
|
+
content = format_content(prompt)
|
|
36
|
+
|
|
37
|
+
# Retry logic for transient errors (similar to OpenRouter)
|
|
38
|
+
max_retries = 3
|
|
39
|
+
retry_count = 0
|
|
40
|
+
|
|
41
|
+
begin
|
|
42
|
+
# Make direct HTTP request to Z.ai API since they use /v4/ instead of /v1/
|
|
43
|
+
response = http_client.post('chat/completions') do |req|
|
|
44
|
+
req.body = {
|
|
45
|
+
model:,
|
|
46
|
+
messages: [{ role: 'user', content: }]
|
|
47
|
+
}.to_json
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Response body is already parsed as Hash by Faraday's JSON middleware
|
|
51
|
+
response_data = response.body.is_a?(String) ? JSON.parse(response.body) : response.body
|
|
52
|
+
response_data.dig('choices', 0, 'message', 'content')
|
|
53
|
+
rescue Faraday::ServerError => e
|
|
54
|
+
retry_count += 1
|
|
55
|
+
|
|
56
|
+
# Log retry attempts if logger is configured
|
|
57
|
+
configuration.logger&.warn(
|
|
58
|
+
"Z.ai API error (attempt #{retry_count}/#{max_retries}): #{e.message}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
raise unless retry_count < max_retries
|
|
62
|
+
|
|
63
|
+
wait_time = 2**retry_count # Exponential backoff: 2, 4, 8 seconds
|
|
64
|
+
configuration.logger&.info("Retrying in #{wait_time}s...")
|
|
65
|
+
sleep(wait_time)
|
|
66
|
+
retry
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Format content based on whether it's a simple string or multimodal content
|
|
71
|
+
# @param prompt [String, Hash, Array] The prompt content
|
|
72
|
+
# @return [String, Array] Formatted content for the API
|
|
73
|
+
def format_content(prompt)
|
|
74
|
+
case prompt
|
|
75
|
+
when Hash
|
|
76
|
+
# Handle hash with text and/or images
|
|
77
|
+
format_multimodal_hash(prompt)
|
|
78
|
+
when Array
|
|
79
|
+
# Already formatted as array of content parts
|
|
80
|
+
prompt
|
|
81
|
+
else
|
|
82
|
+
# Simple string prompt
|
|
83
|
+
prompt.to_s
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Format a hash containing text and/or images into multimodal content array
|
|
88
|
+
# @param prompt_hash [Hash] Hash with :text and/or :images keys
|
|
89
|
+
# @return [Array] Array of content parts for the API
|
|
90
|
+
def format_multimodal_hash(prompt_hash)
|
|
91
|
+
content_parts = []
|
|
92
|
+
|
|
93
|
+
# Add text part if present
|
|
94
|
+
if prompt_hash[:text] || prompt_hash['text']
|
|
95
|
+
text = prompt_hash[:text] || prompt_hash['text']
|
|
96
|
+
content_parts << { type: 'text', text: }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Add image parts if present
|
|
100
|
+
images = prompt_hash[:images] || prompt_hash['images'] || []
|
|
101
|
+
images = [images] unless images.is_a?(Array)
|
|
102
|
+
|
|
103
|
+
images.each do |image|
|
|
104
|
+
content_parts << format_image_part(image)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
content_parts
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Format an image into the appropriate API structure
|
|
111
|
+
# @param image [String, Hash] Image URL or hash with url/detail keys
|
|
112
|
+
# @return [Hash] Formatted image part for the API
|
|
113
|
+
def format_image_part(image)
|
|
114
|
+
case image
|
|
115
|
+
when String
|
|
116
|
+
# Simple URL string or base64 data
|
|
117
|
+
{ type: 'image_url', image_url: { url: image } }
|
|
118
|
+
when Hash
|
|
119
|
+
# Hash with url and optional detail level
|
|
120
|
+
{
|
|
121
|
+
type: 'image_url',
|
|
122
|
+
image_url: {
|
|
123
|
+
url: image[:url] || image['url'],
|
|
124
|
+
detail: image[:detail] || image['detail']
|
|
125
|
+
}.compact
|
|
126
|
+
}
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# HTTP client for making requests to Z.ai API
|
|
131
|
+
# Z.ai uses /v4/ in their path, not /v1/ like OpenAI, so we use Faraday directly
|
|
132
|
+
def http_client
|
|
133
|
+
@http_client ||= begin
|
|
134
|
+
config = LlmConductor.configuration.provider_config(:zai)
|
|
135
|
+
base_url = config[:uri_base] || 'https://api.z.ai/api/paas/v4'
|
|
136
|
+
|
|
137
|
+
Faraday.new(url: base_url) do |f|
|
|
138
|
+
f.request :json
|
|
139
|
+
f.response :json
|
|
140
|
+
f.headers['Authorization'] = "Bearer #{config[:api_key]}"
|
|
141
|
+
f.headers['Content-Type'] = 'application/json'
|
|
142
|
+
f.adapter Faraday.default_adapter
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Legacy client method for compatibility (not used, but kept for reference)
|
|
148
|
+
def client
|
|
149
|
+
http_client
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -72,6 +72,14 @@ module LlmConductor
|
|
|
72
72
|
}
|
|
73
73
|
end
|
|
74
74
|
|
|
75
|
+
# Configure Z.ai provider
|
|
76
|
+
def zai(api_key: nil, **options)
|
|
77
|
+
@providers[:zai] = {
|
|
78
|
+
api_key: api_key || ENV['ZAI_API_KEY'],
|
|
79
|
+
**options
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
75
83
|
# Get provider configuration
|
|
76
84
|
def provider_config(provider)
|
|
77
85
|
@providers[provider.to_sym] || {}
|
|
@@ -126,6 +134,14 @@ module LlmConductor
|
|
|
126
134
|
groq(api_key: value)
|
|
127
135
|
end
|
|
128
136
|
|
|
137
|
+
def zai_api_key
|
|
138
|
+
provider_config(:zai)[:api_key]
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def zai_api_key=(value)
|
|
142
|
+
zai(api_key: value)
|
|
143
|
+
end
|
|
144
|
+
|
|
129
145
|
private
|
|
130
146
|
|
|
131
147
|
def setup_defaults_from_env
|
|
@@ -135,6 +151,7 @@ module LlmConductor
|
|
|
135
151
|
openrouter if ENV['OPENROUTER_API_KEY']
|
|
136
152
|
gemini if ENV['GEMINI_API_KEY']
|
|
137
153
|
groq if ENV['GROQ_API_KEY']
|
|
154
|
+
zai if ENV['ZAI_API_KEY']
|
|
138
155
|
ollama # Always configure Ollama with default URL
|
|
139
156
|
end
|
|
140
157
|
end
|
|
@@ -59,9 +59,7 @@ module LlmConductor
|
|
|
59
59
|
def validate_prompt_class!(prompt_class)
|
|
60
60
|
raise InvalidPromptClassError, 'Prompt must be a class' unless prompt_class.is_a?(Class)
|
|
61
61
|
|
|
62
|
-
unless prompt_class < Prompts::BasePrompt
|
|
63
|
-
raise InvalidPromptClassError, 'Prompt class must inherit from BasePrompt'
|
|
64
|
-
end
|
|
62
|
+
raise InvalidPromptClassError, 'Prompt class must inherit from BasePrompt' unless prompt_class < Prompts::BasePrompt
|
|
65
63
|
|
|
66
64
|
return if prompt_class.instance_methods(false).include?(:render)
|
|
67
65
|
|
data/lib/llm_conductor.rb
CHANGED
|
@@ -14,10 +14,11 @@ require_relative 'llm_conductor/clients/groq_client'
|
|
|
14
14
|
require_relative 'llm_conductor/clients/ollama_client'
|
|
15
15
|
require_relative 'llm_conductor/clients/openrouter_client'
|
|
16
16
|
require_relative 'llm_conductor/clients/gemini_client'
|
|
17
|
+
require_relative 'llm_conductor/clients/zai_client'
|
|
17
18
|
require_relative 'llm_conductor/client_factory'
|
|
18
19
|
|
|
19
20
|
# LLM Conductor provides a unified interface for multiple Language Model providers
|
|
20
|
-
# including OpenAI GPT, Anthropic Claude, Google Gemini, Groq, OpenRouter, and Ollama
|
|
21
|
+
# including OpenAI GPT, Anthropic Claude, Google Gemini, Groq, OpenRouter, Z.ai, and Ollama
|
|
21
22
|
# with built-in prompt templates, token counting, and extensible client architecture.
|
|
22
23
|
module LlmConductor
|
|
23
24
|
class Error < StandardError; end
|
|
@@ -63,16 +64,17 @@ module LlmConductor
|
|
|
63
64
|
when :ollama then Clients::OllamaClient
|
|
64
65
|
when :gemini, :google then Clients::GeminiClient
|
|
65
66
|
when :groq then Clients::GroqClient
|
|
67
|
+
when :zai then Clients::ZaiClient
|
|
66
68
|
else
|
|
67
69
|
raise ArgumentError,
|
|
68
70
|
"Unsupported vendor: #{vendor}. " \
|
|
69
|
-
'Supported vendors: anthropic, openai, openrouter, ollama, gemini, groq'
|
|
71
|
+
'Supported vendors: anthropic, openai, openrouter, ollama, gemini, groq, zai'
|
|
70
72
|
end
|
|
71
73
|
end
|
|
72
74
|
end
|
|
73
75
|
|
|
74
76
|
# List of supported vendors
|
|
75
|
-
SUPPORTED_VENDORS = %i[anthropic openai openrouter ollama gemini groq].freeze
|
|
77
|
+
SUPPORTED_VENDORS = %i[anthropic openai openrouter ollama gemini groq zai].freeze
|
|
76
78
|
|
|
77
79
|
# List of supported prompt types
|
|
78
80
|
SUPPORTED_PROMPT_TYPES = %i[
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm_conductor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ben Zheng
|
|
@@ -161,6 +161,7 @@ files:
|
|
|
161
161
|
- examples/prompt_registration.rb
|
|
162
162
|
- examples/rag_usage.rb
|
|
163
163
|
- examples/simple_usage.rb
|
|
164
|
+
- examples/zai_usage.rb
|
|
164
165
|
- lib/llm_conductor.rb
|
|
165
166
|
- lib/llm_conductor/client_factory.rb
|
|
166
167
|
- lib/llm_conductor/clients/anthropic_client.rb
|
|
@@ -170,6 +171,7 @@ files:
|
|
|
170
171
|
- lib/llm_conductor/clients/groq_client.rb
|
|
171
172
|
- lib/llm_conductor/clients/ollama_client.rb
|
|
172
173
|
- lib/llm_conductor/clients/openrouter_client.rb
|
|
174
|
+
- lib/llm_conductor/clients/zai_client.rb
|
|
173
175
|
- lib/llm_conductor/configuration.rb
|
|
174
176
|
- lib/llm_conductor/data_builder.rb
|
|
175
177
|
- lib/llm_conductor/prompt_manager.rb
|