smart_prompt 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -10
- data/README.cn.md +307 -64
- data/README.md +311 -64
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +363 -281
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +81 -191
- data/lib/smart_prompt/engine.rb +36 -19
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/openai_adapter.rb +1 -25
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +25 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +22 -2
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +84 -8
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# TTS Example for SmartPrompt
|
|
2
|
+
# This example demonstrates how to use the new TTSAdapter
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
|
|
6
|
+
# Configuration for TTS capabilities
|
|
7
|
+
config = {
|
|
8
|
+
"adapters" => {
|
|
9
|
+
"multimodal" => "MultimodalAdapter",
|
|
10
|
+
"image_generation" => "ImageGenerationAdapter",
|
|
11
|
+
"video_generation" => "VideoGenerationAdapter",
|
|
12
|
+
"tts" => "TTSAdapter"
|
|
13
|
+
},
|
|
14
|
+
"llms" => {
|
|
15
|
+
"qwen_vl" => {
|
|
16
|
+
"adapter" => "multimodal",
|
|
17
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
18
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
19
|
+
"model" => "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
20
|
+
},
|
|
21
|
+
"image_gen" => {
|
|
22
|
+
"adapter" => "image_generation",
|
|
23
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
24
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
25
|
+
"model" => "stabilityai/stable-diffusion-xl-base-1.0"
|
|
26
|
+
},
|
|
27
|
+
"video_gen" => {
|
|
28
|
+
"adapter" => "video_generation",
|
|
29
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
30
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
31
|
+
"model" => "Wan-AI/Wan2.2-T2V-A14B"
|
|
32
|
+
},
|
|
33
|
+
"tts_service" => {
|
|
34
|
+
"adapter" => "tts",
|
|
35
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
36
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
37
|
+
"model" => "FunAudioLLM/CosyVoice2-0.5B"
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"default_llm" => "qwen_vl",
|
|
41
|
+
"template_path" => "./templates",
|
|
42
|
+
"worker_path" => "./workers",
|
|
43
|
+
"logger_file" => "./logs/smart_prompt.log"
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Write config to file
|
|
47
|
+
File.write('tts_config.yml', config.to_yaml)
|
|
48
|
+
|
|
49
|
+
# Initialize engine
|
|
50
|
+
engine = SmartPrompt::Engine.new('tts_config.yml')
|
|
51
|
+
|
|
52
|
+
puts "=== SmartPrompt TTS Demo ==="
|
|
53
|
+
|
|
54
|
+
# Example 1: Basic text-to-speech synthesis
|
|
55
|
+
puts "\n=== Example 1: Basic TTS Synthesis ==="
|
|
56
|
+
begin
|
|
57
|
+
result = engine.call_worker(:tts_synthesizer, {
|
|
58
|
+
text: "欢迎使用智能提示系统,这是一个文本转语音功能的演示。",
|
|
59
|
+
voice: "alloy",
|
|
60
|
+
speed: 1.0,
|
|
61
|
+
response_format: "mp3",
|
|
62
|
+
save_to_file: true,
|
|
63
|
+
output_dir: "./generated_audio",
|
|
64
|
+
filename_prefix: "basic_tts"
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
puts "TTS synthesis successful!"
|
|
68
|
+
puts "Audio file: #{result[:audio_file][:file_path]}"
|
|
69
|
+
puts "Text length: #{result[:audio_file][:text_length]} characters"
|
|
70
|
+
puts "Voice: #{result[:audio_file][:voice]}"
|
|
71
|
+
|
|
72
|
+
rescue => e
|
|
73
|
+
puts "Error in TTS synthesis: #{e.message}"
|
|
74
|
+
puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Example 2: Multilingual TTS
|
|
78
|
+
puts "\n=== Example 2: Multilingual TTS ==="
|
|
79
|
+
begin
|
|
80
|
+
# English text
|
|
81
|
+
result_en = engine.call_worker(:multilingual_tts, {
|
|
82
|
+
text: "Hello, this is a demonstration of text-to-speech functionality.",
|
|
83
|
+
voice: "echo",
|
|
84
|
+
save_to_file: true,
|
|
85
|
+
output_dir: "./multilingual_audio",
|
|
86
|
+
filename_prefix: "english_tts"
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
puts "English TTS successful!"
|
|
90
|
+
puts "Detected language: #{result_en[:detected_language]}"
|
|
91
|
+
puts "Audio file: #{result_en[:audio_file][:file_path]}"
|
|
92
|
+
|
|
93
|
+
# Chinese text
|
|
94
|
+
result_zh = engine.call_worker(:multilingual_tts, {
|
|
95
|
+
text: "这是一个中文文本转语音的演示,支持多种语言。",
|
|
96
|
+
voice: "nova",
|
|
97
|
+
save_to_file: true,
|
|
98
|
+
output_dir: "./multilingual_audio",
|
|
99
|
+
filename_prefix: "chinese_tts"
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
puts "Chinese TTS successful!"
|
|
103
|
+
puts "Detected language: #{result_zh[:detected_language]}"
|
|
104
|
+
puts "Audio file: #{result_zh[:audio_file][:file_path]}"
|
|
105
|
+
|
|
106
|
+
rescue => e
|
|
107
|
+
puts "Error in multilingual TTS: #{e.message}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Example 3: Voice selection demo
|
|
111
|
+
puts "\n=== Example 3: Voice Selection Demo ==="
|
|
112
|
+
begin
|
|
113
|
+
result = engine.call_worker(:voice_selector, {
|
|
114
|
+
text: "这是一个不同音色的演示,您可以听到不同声音的朗读效果。",
|
|
115
|
+
save_to_file: true,
|
|
116
|
+
output_dir: "./voice_demos"
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
puts "Voice selection demo successful!"
|
|
120
|
+
puts "Available voices: #{result[:available_voices].keys.join(', ')}"
|
|
121
|
+
puts "Selected voice: #{result[:selected_voice]}"
|
|
122
|
+
puts "Audio file: #{result[:audio_file][:file_path]}"
|
|
123
|
+
|
|
124
|
+
rescue => e
|
|
125
|
+
puts "Error in voice selection: #{e.message}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Example 4: Speed variation demo
|
|
129
|
+
puts "\n=== Example 4: Speed Variation Demo ==="
|
|
130
|
+
begin
|
|
131
|
+
result = engine.call_worker(:speed_variation_tts, {
|
|
132
|
+
text: "这是一个语速变化的演示,您可以听到不同语速的朗读效果。",
|
|
133
|
+
voice: "alloy",
|
|
134
|
+
speeds: [0.5, 0.75, 1.0, 1.5, 2.0],
|
|
135
|
+
save_to_file: true,
|
|
136
|
+
output_dir: "./speed_variations"
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
puts "Speed variation demo successful!"
|
|
140
|
+
puts "Generated #{result[:speed_variations].size} audio files at different speeds"
|
|
141
|
+
result[:speed_variations].each do |variation|
|
|
142
|
+
puts " - Speed #{variation[:speed]}: #{variation[:audio_file][:file_path]}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
rescue => e
|
|
146
|
+
puts "Error in speed variation: #{e.message}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Example 5: Custom voice management
|
|
150
|
+
puts "\n=== Example 5: Custom Voice Management ==="
|
|
151
|
+
begin
|
|
152
|
+
# List available voices
|
|
153
|
+
result = engine.call_worker(:custom_voice_manager, {
|
|
154
|
+
action: "list"
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
puts "Voice management demo successful!"
|
|
158
|
+
puts "Predefined voices: #{result[:predefined_voices].keys.join(', ')}"
|
|
159
|
+
puts "Custom voices: #{result[:custom_voices].size}"
|
|
160
|
+
|
|
161
|
+
# Note: Creating custom voices requires reference audio files
|
|
162
|
+
# Uncomment the following lines if you have reference audio files:
|
|
163
|
+
#
|
|
164
|
+
# result = engine.call_worker(:custom_voice_manager, {
|
|
165
|
+
# action: "create",
|
|
166
|
+
# name: "my_custom_voice",
|
|
167
|
+
# reference_audio_file: "./reference_audio.wav",
|
|
168
|
+
# description: "My custom voice created from reference audio"
|
|
169
|
+
# })
|
|
170
|
+
#
|
|
171
|
+
# puts "Custom voice created: #{result[:voice_data][:voice_id]}"
|
|
172
|
+
|
|
173
|
+
rescue => e
|
|
174
|
+
puts "Error in voice management: #{e.message}"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Example 6: Batch TTS processing
|
|
178
|
+
puts "\n=== Example 6: Batch TTS Processing ==="
|
|
179
|
+
begin
|
|
180
|
+
result = engine.call_worker(:batch_tts, {
|
|
181
|
+
texts: [
|
|
182
|
+
"这是第一条文本内容。",
|
|
183
|
+
"这是第二条文本内容,用于批量处理演示。",
|
|
184
|
+
"这是第三条文本内容,展示批量文本转语音功能。"
|
|
185
|
+
],
|
|
186
|
+
voice: "alloy",
|
|
187
|
+
save_to_file: true,
|
|
188
|
+
output_dir: "./batch_audio"
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
puts "Batch TTS processing successful!"
|
|
192
|
+
puts "Generated #{result[:batch_results].size} audio files"
|
|
193
|
+
result[:batch_results].each do |batch_result|
|
|
194
|
+
puts " - Text #{batch_result[:index] + 1}: #{batch_result[:audio_file][:file_path]}"
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
rescue => e
|
|
198
|
+
puts "Error in batch TTS: #{e.message}"
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Example 7: Direct adapter usage
|
|
202
|
+
puts "\n=== Example 7: Direct Adapter Usage ==="
|
|
203
|
+
begin
|
|
204
|
+
# Get the adapter directly
|
|
205
|
+
adapter = engine.llms["tts_service"]
|
|
206
|
+
|
|
207
|
+
# Synthesize speech directly
|
|
208
|
+
audio_data = adapter.synthesize_speech(
|
|
209
|
+
"这是直接使用适配器的演示,不通过Worker。",
|
|
210
|
+
voice: "echo",
|
|
211
|
+
speed: 1.2,
|
|
212
|
+
response_format: "mp3"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
puts "Direct adapter usage successful!"
|
|
216
|
+
puts "Generated audio data with format: #{audio_data[:format]}"
|
|
217
|
+
puts "Text length: #{audio_data[:text_length]} characters"
|
|
218
|
+
|
|
219
|
+
# Save to file
|
|
220
|
+
output_path = "./direct_audio/direct_tts_#{Time.now.to_i}.mp3"
|
|
221
|
+
result = adapter.synthesize_to_file(
|
|
222
|
+
"这是直接保存到文件的演示。",
|
|
223
|
+
output_path,
|
|
224
|
+
voice: "nova",
|
|
225
|
+
speed: 1.0
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
puts "Direct file synthesis successful!"
|
|
229
|
+
puts "Audio file: #{result[:file_path]}"
|
|
230
|
+
|
|
231
|
+
rescue => e
|
|
232
|
+
puts "Error in direct adapter usage: #{e.message}"
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
puts "\n=== All examples completed ==="
|
|
236
|
+
puts "\nImportant Notes:"
|
|
237
|
+
puts "1. TTS requires valid SILICONFLOW_API_KEY environment variable"
|
|
238
|
+
puts "2. Audio files are saved in various formats (mp3, wav, etc.)"
|
|
239
|
+
puts "3. Custom voice creation requires reference audio files"
|
|
240
|
+
puts "4. Multiple languages are supported (Chinese, English, Japanese, Korean)"
|
|
241
|
+
puts "5. Speed can be adjusted from 0.25x to 4.0x"
|
|
242
|
+
|
|
243
|
+
# Clean up
|
|
244
|
+
File.delete('tts_config.yml') if File.exist?('tts_config.yml')
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Video Generation Example for SmartPrompt
|
|
2
|
+
# This example demonstrates how to use the new VideoGenerationAdapter
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
|
|
6
|
+
# Configuration for video generation capabilities
|
|
7
|
+
config = {
|
|
8
|
+
"adapters" => {
|
|
9
|
+
"multimodal" => "MultimodalAdapter",
|
|
10
|
+
"image_generation" => "ImageGenerationAdapter",
|
|
11
|
+
"video_generation" => "VideoGenerationAdapter"
|
|
12
|
+
},
|
|
13
|
+
"llms" => {
|
|
14
|
+
"qwen_vl" => {
|
|
15
|
+
"adapter" => "multimodal",
|
|
16
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
17
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
18
|
+
"model" => "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
19
|
+
},
|
|
20
|
+
"image_gen" => {
|
|
21
|
+
"adapter" => "image_generation",
|
|
22
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
23
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
24
|
+
"model" => "stabilityai/stable-diffusion-xl-base-1.0"
|
|
25
|
+
},
|
|
26
|
+
"video_gen" => {
|
|
27
|
+
"adapter" => "video_generation",
|
|
28
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
29
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
30
|
+
"model" => "Wan-AI/Wan2.2-T2V-A14B"
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"default_llm" => "qwen_vl",
|
|
34
|
+
"template_path" => "./templates",
|
|
35
|
+
"worker_path" => "./workers",
|
|
36
|
+
"logger_file" => "./logs/smart_prompt.log"
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Write config to file
|
|
40
|
+
File.write('video_generation_config.yml', config.to_yaml)
|
|
41
|
+
|
|
42
|
+
# Initialize engine
|
|
43
|
+
engine = SmartPrompt::Engine.new('video_generation_config.yml')
|
|
44
|
+
|
|
45
|
+
puts "=== SmartPrompt Video Generation Demo ==="
|
|
46
|
+
|
|
47
|
+
# Example 1: Simple text-to-video generation
|
|
48
|
+
puts "\n=== Example 1: Text-to-Video Generation ==="
|
|
49
|
+
begin
|
|
50
|
+
result = engine.call_worker(:video_generator, {
|
|
51
|
+
prompt: "A beautiful sunset over ocean waves, cinematic quality, slow motion",
|
|
52
|
+
duration: 4,
|
|
53
|
+
resolution: "720p",
|
|
54
|
+
fps: 24,
|
|
55
|
+
wait_for_completion: false, # Set to true to wait for completion
|
|
56
|
+
download_to_file: false, # Set to true to download video
|
|
57
|
+
output_dir: "./generated_videos",
|
|
58
|
+
filename_prefix: "sunset_video"
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
puts "Video generation job submitted successfully!"
|
|
62
|
+
puts "Job ID: #{result[:video_data][:job_id]}"
|
|
63
|
+
puts "Status: #{result[:video_data][:status]}"
|
|
64
|
+
puts "Created at: #{result[:video_data][:created_at]}"
|
|
65
|
+
|
|
66
|
+
rescue => e
|
|
67
|
+
puts "Error in video generation: #{e.message}"
|
|
68
|
+
puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
|
|
69
|
+
puts "Note: Video generation may take several minutes to complete"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Example 2: Creative video generation with style
|
|
73
|
+
puts "\n=== Example 2: Creative Video Generation ==="
|
|
74
|
+
begin
|
|
75
|
+
result = engine.call_worker(:creative_video_generator, {
|
|
76
|
+
prompt: "A magical forest with glowing fairies and sparkling lights",
|
|
77
|
+
video_style: "fantasy animation, Studio Ghibli style",
|
|
78
|
+
duration: 4,
|
|
79
|
+
resolution: "720p",
|
|
80
|
+
fps: 24,
|
|
81
|
+
wait_for_completion: false,
|
|
82
|
+
download_to_file: false
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
puts "Creative video generation job submitted successfully!"
|
|
86
|
+
puts "Job ID: #{result[:video_data][:job_id]}"
|
|
87
|
+
puts "Status: #{result[:video_data][:status]}"
|
|
88
|
+
|
|
89
|
+
rescue => e
|
|
90
|
+
puts "Error in creative video generation: #{e.message}"
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Example 3: Product video generation
|
|
94
|
+
puts "\n=== Example 3: Product Video Generation ==="
|
|
95
|
+
begin
|
|
96
|
+
result = engine.call_worker(:product_video_generator, {
|
|
97
|
+
prompt: "A modern smartphone rotating slowly on a marble surface",
|
|
98
|
+
duration: 4,
|
|
99
|
+
resolution: "720p",
|
|
100
|
+
fps: 24,
|
|
101
|
+
wait_for_completion: false,
|
|
102
|
+
download_to_file: false
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
puts "Product video generation job submitted successfully!"
|
|
106
|
+
puts "Job ID: #{result[:video_data][:job_id]}"
|
|
107
|
+
puts "Status: #{result[:video_data][:status]}"
|
|
108
|
+
|
|
109
|
+
rescue => e
|
|
110
|
+
puts "Error in product video generation: #{e.message}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Example 4: Check video status (if we have a job ID from previous examples)
|
|
114
|
+
puts "\n=== Example 4: Video Status Check ==="
|
|
115
|
+
begin
|
|
116
|
+
# This example requires a valid job_id from a previous video generation
|
|
117
|
+
# For demonstration, we'll show the method but skip execution
|
|
118
|
+
puts "To check video status, use:"
|
|
119
|
+
puts "result = engine.call_worker(:video_status_checker, {"
|
|
120
|
+
puts " job_id: 'YOUR_JOB_ID_HERE',"
|
|
121
|
+
puts " download_to_file: true"
|
|
122
|
+
puts "})"
|
|
123
|
+
|
|
124
|
+
rescue => e
|
|
125
|
+
puts "Error in status check: #{e.message}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Example 5: Direct adapter usage (without worker)
|
|
129
|
+
puts "\n=== Example 5: Direct Adapter Usage ==="
|
|
130
|
+
begin
|
|
131
|
+
# Get the adapter directly
|
|
132
|
+
adapter = engine.llms["video_gen"]
|
|
133
|
+
|
|
134
|
+
# Generate video directly
|
|
135
|
+
video_data = adapter.generate_video(
|
|
136
|
+
"A butterfly flying through a flower garden, nature documentary style",
|
|
137
|
+
duration: 4,
|
|
138
|
+
resolution: "720p",
|
|
139
|
+
fps: 24
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
puts "Direct adapter usage successful!"
|
|
143
|
+
puts "Job ID: #{video_data[:job_id]}"
|
|
144
|
+
puts "Status: #{video_data[:status]}"
|
|
145
|
+
|
|
146
|
+
# Example of checking status
|
|
147
|
+
# status = adapter.check_video_status(video_data[:job_id])
|
|
148
|
+
# puts "Current status: #{status[:status]}, Progress: #{status[:progress]}"
|
|
149
|
+
|
|
150
|
+
rescue => e
|
|
151
|
+
puts "Error in direct adapter usage: #{e.message}"
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Example 6: Batch video generation
|
|
155
|
+
puts "\n=== Example 6: Batch Video Generation ==="
|
|
156
|
+
begin
|
|
157
|
+
result = engine.call_worker(:batch_video_generator, {
|
|
158
|
+
prompts: [
|
|
159
|
+
"A cat playing with a ball of yarn",
|
|
160
|
+
"A dog running through a field",
|
|
161
|
+
"A bird flying in the sky"
|
|
162
|
+
],
|
|
163
|
+
duration: 3,
|
|
164
|
+
resolution: "720p",
|
|
165
|
+
fps: 24,
|
|
166
|
+
wait_for_completion: false
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
puts "Batch video generation submitted successfully!"
|
|
170
|
+
puts "Generated #{result[:batch_results].size} video jobs"
|
|
171
|
+
result[:batch_results].each do |result|
|
|
172
|
+
puts " - Prompt: #{result[:prompt][0..50]}..."
|
|
173
|
+
puts " Job ID: #{result[:video_data][:job_id]}"
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
rescue => e
|
|
177
|
+
puts "Error in batch video generation: #{e.message}"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
puts "\n=== All examples completed ==="
|
|
181
|
+
puts "\nImportant Notes:"
|
|
182
|
+
puts "1. Video generation is an asynchronous process"
|
|
183
|
+
puts "2. Jobs may take several minutes to complete"
|
|
184
|
+
puts "3. Use wait_for_completion: true to wait for completion"
|
|
185
|
+
puts "4. Use download_to_file: true to automatically download videos"
|
|
186
|
+
puts "5. Check status periodically using video_status_checker worker"
|
|
187
|
+
|
|
188
|
+
# Clean up
|
|
189
|
+
File.delete('video_generation_config.yml') if File.exist?('video_generation_config.yml')
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# 智谱 AI (BigModel / GLM) Example for SmartPrompt
|
|
2
|
+
#
|
|
3
|
+
# Demonstrates every Zhipu model category through one ZhipuAIAdapter:
|
|
4
|
+
# 1. 文本对话 (chat) — sync + streaming
|
|
5
|
+
# 2. 图文多模态 (vision)
|
|
6
|
+
# 3. 向量模型 (embeddings)
|
|
7
|
+
# 4. 文生图 (CogView)
|
|
8
|
+
# 5. 文生视频 (CogVideoX, async submit -> poll -> download)
|
|
9
|
+
# 6. 语音合成 (GLM-TTS)
|
|
10
|
+
# 7. 语音识别 (GLM-ASR-2512)
|
|
11
|
+
#
|
|
12
|
+
# Requires a valid Zhipu API key in ZHIPUAI_API_KEY (https://open.bigmodel.cn/).
|
|
13
|
+
# Defaults use free-tier models so it works out-of-box once the key is set.
|
|
14
|
+
|
|
15
|
+
require_relative "../lib/smart_prompt"
|
|
16
|
+
|
|
17
|
+
api_key = ENV["ZHIPUAI_API_KEY"]
|
|
18
|
+
base = "https://open.bigmodel.cn/api/paas/v4"
|
|
19
|
+
|
|
20
|
+
config = {
|
|
21
|
+
"adapters" => { "zhipu" => "ZhipuAIAdapter" },
|
|
22
|
+
"llms" => {
|
|
23
|
+
"glm" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-4-flash" },
|
|
24
|
+
"glm_vision" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-4v-flash" },
|
|
25
|
+
"embedding" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "embedding-3", "dimensions" => 1024 },
|
|
26
|
+
"cogview" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "cogview-3-flash" },
|
|
27
|
+
"cogvideo" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "cogvideox-flash" },
|
|
28
|
+
"glm_tts" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-tts" },
|
|
29
|
+
"glm_asr" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-asr-2512" },
|
|
30
|
+
},
|
|
31
|
+
"default_llm" => "glm",
|
|
32
|
+
"template_path" => "./templates",
|
|
33
|
+
"worker_path" => "./workers",
|
|
34
|
+
"logger_file" => "./logs/smart_prompt.log",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
File.write("zhipu_config.yml", config.to_yaml)
|
|
38
|
+
engine = SmartPrompt::Engine.new("zhipu_config.yml")
|
|
39
|
+
|
|
40
|
+
puts "=== SmartPrompt 智谱 GLM Demo ==="
|
|
41
|
+
unless api_key
|
|
42
|
+
puts "Note: ZHIPUAI_API_KEY is not set — the API calls below will fail at the network layer."
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# 1. Chat (sync)
|
|
46
|
+
puts "\n=== Example 1: 文本对话 (sync) ==="
|
|
47
|
+
begin
|
|
48
|
+
result = engine.call_worker(:glm_chat, { prompt: "用一句话介绍智谱GLM。" })
|
|
49
|
+
puts "Reply: #{result}"
|
|
50
|
+
rescue => e
|
|
51
|
+
puts "Error: #{e.message}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# 2. Chat (streaming)
|
|
55
|
+
puts "\n=== Example 2: 文本对话 (streaming) ==="
|
|
56
|
+
begin
|
|
57
|
+
engine.call_worker_by_stream(:glm_chat, { prompt: "写两句关于春天的诗。" }) do |chunk, _|
|
|
58
|
+
print chunk.dig("choices", 0, "delta", "content").to_s
|
|
59
|
+
end
|
|
60
|
+
puts
|
|
61
|
+
rescue => e
|
|
62
|
+
puts "Error: #{e.message}"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# 3. Multimodal vision
|
|
66
|
+
puts "\n=== Example 3: 图文多模态 ==="
|
|
67
|
+
begin
|
|
68
|
+
result = engine.call_worker(:glm_vision, {
|
|
69
|
+
image_url: "https://img1.baidu.com/it/u=1966616150,2146512490&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=282",
|
|
70
|
+
question: "图片里有什么?",
|
|
71
|
+
})
|
|
72
|
+
puts "Vision result: #{result}"
|
|
73
|
+
rescue => e
|
|
74
|
+
puts "Error: #{e.message}"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# 4. Embeddings (embedding-3)
|
|
78
|
+
puts "\n=== Example 4: 向量模型 ==="
|
|
79
|
+
begin
|
|
80
|
+
vector = engine.call_worker(:glm_embed, { text: "智谱GLM大模型", length: 1024 })
|
|
81
|
+
puts "Embedding dim: #{vector.is_a?(Array) ? vector.size : vector} (first 5: #{vector.first(5) rescue vector})"
|
|
82
|
+
rescue => e
|
|
83
|
+
puts "Error: #{e.message}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# 5. Text-to-image (CogView)
|
|
87
|
+
puts "\n=== Example 5: 文生图 ==="
|
|
88
|
+
begin
|
|
89
|
+
result = engine.call_worker(:cogview_image, {
|
|
90
|
+
prompt: "一只在书房里读书的猫,水墨画风格",
|
|
91
|
+
size: "1024x1024",
|
|
92
|
+
save_to_file: true,
|
|
93
|
+
output_dir: "./generated_images",
|
|
94
|
+
filename_prefix: "zhipu_cat",
|
|
95
|
+
})
|
|
96
|
+
if result.is_a?(Hash) && result[:images]
|
|
97
|
+
puts "Generated #{result[:images].size} image(s); first URL: #{result[:images].first[:url]}"
|
|
98
|
+
puts "Saved files: #{result[:saved_files]}"
|
|
99
|
+
else
|
|
100
|
+
puts "Result: #{result}"
|
|
101
|
+
end
|
|
102
|
+
rescue => e
|
|
103
|
+
puts "Error: #{e.message}"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# 6. Text-to-video (CogVideoX, async) — may take a minute or two.
|
|
107
|
+
puts "\n=== Example 6: 文生视频 (async) ==="
|
|
108
|
+
begin
|
|
109
|
+
result = engine.call_worker(:cogvideo_video, {
|
|
110
|
+
prompt: "一只猫在阳光下打盹",
|
|
111
|
+
wait_for_completion: true,
|
|
112
|
+
download_to_file: true,
|
|
113
|
+
output_dir: "./generated_videos",
|
|
114
|
+
timeout: 600,
|
|
115
|
+
})
|
|
116
|
+
if result[:video]
|
|
117
|
+
puts "Video ready: #{result[:video][:video_url]}"
|
|
118
|
+
puts "Downloaded: #{result[:downloaded_file]}" if result[:downloaded_file]
|
|
119
|
+
else
|
|
120
|
+
puts "Submitted task: #{result[:submitted]}"
|
|
121
|
+
end
|
|
122
|
+
rescue => e
|
|
123
|
+
puts "Error: #{e.message}"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# 7. TTS (GLM-TTS)
|
|
127
|
+
puts "\n=== Example 7: 语音合成 (TTS) ==="
|
|
128
|
+
begin
|
|
129
|
+
info = engine.call_worker(:glm_tts, { text: "你好,这是智谱语音合成的测试。", output_path: "./generated_audio/zhipu_tts.wav" })
|
|
130
|
+
puts "Audio saved: #{info[:file_path]}"
|
|
131
|
+
rescue => e
|
|
132
|
+
puts "Error: #{e.message}"
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# 8. ASR (GLM-ASR-2512) — needs a real audio file path.
|
|
136
|
+
puts "\n=== Example 8: 语音识别 (ASR) ==="
|
|
137
|
+
audio = ENV["ZHIPU_ASR_SAMPLE"] || "./generated_audio/zhipu_tts.wav"
|
|
138
|
+
if File.exist?(audio)
|
|
139
|
+
begin
|
|
140
|
+
result = engine.call_worker(:glm_asr, { audio_file: audio })
|
|
141
|
+
puts "Transcription: #{result[:text]}"
|
|
142
|
+
rescue => e
|
|
143
|
+
puts "Error: #{e.message}"
|
|
144
|
+
end
|
|
145
|
+
else
|
|
146
|
+
puts "Skipped: set ZHIPU_ASR_SAMPLE to an audio file path (or run TTS first) to test ASR."
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
puts "\n=== All examples completed ==="
|
|
150
|
+
|
|
151
|
+
File.delete("zhipu_config.yml") if File.exist?("zhipu_config.yml")
|