smart_prompt 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -10
  3. data/README.cn.md +307 -64
  4. data/README.md +311 -64
  5. data/Rakefile +10 -1
  6. data/config/anthropic_config.yml +151 -0
  7. data/config/image_generation_config.yml +22 -0
  8. data/config/multimodal_config.yml +85 -0
  9. data/config/sensenova_config.yml +63 -0
  10. data/config/zhipu_config.yml +73 -0
  11. data/examples/anthropic_basic_chat.rb +143 -0
  12. data/examples/anthropic_example.rb +232 -0
  13. data/examples/anthropic_multimodal.rb +212 -0
  14. data/examples/anthropic_streaming.rb +312 -0
  15. data/examples/anthropic_tool_calling.rb +393 -0
  16. data/examples/automatic_cleanup_example.rb +109 -0
  17. data/examples/history_management_examples.rb +522 -0
  18. data/examples/image_generation_example.rb +130 -0
  19. data/examples/monitoring_example.rb +121 -0
  20. data/examples/multimodal_example.rb +63 -0
  21. data/examples/relevance_based_strategy_example.rb +87 -0
  22. data/examples/sensenova_example.rb +129 -0
  23. data/examples/stt_example.rb +287 -0
  24. data/examples/tts_example.rb +244 -0
  25. data/examples/video_generation_example.rb +189 -0
  26. data/examples/zhipu_example.rb +151 -0
  27. data/lib/smart_prompt/anthropic_adapter.rb +363 -281
  28. data/lib/smart_prompt/compression_engine.rb +201 -0
  29. data/lib/smart_prompt/context_strategy.rb +22 -0
  30. data/lib/smart_prompt/conversation.rb +81 -191
  31. data/lib/smart_prompt/engine.rb +36 -19
  32. data/lib/smart_prompt/history_manager.rb +596 -0
  33. data/lib/smart_prompt/hybrid_strategy.rb +222 -0
  34. data/lib/smart_prompt/image_generation_adapter.rb +297 -0
  35. data/lib/smart_prompt/lru_cache.rb +133 -0
  36. data/lib/smart_prompt/message.rb +57 -0
  37. data/lib/smart_prompt/multimodal_adapter.rb +277 -0
  38. data/lib/smart_prompt/openai_adapter.rb +1 -25
  39. data/lib/smart_prompt/persistence_layer.rb +197 -0
  40. data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
  41. data/lib/smart_prompt/sensenova_adapter.rb +410 -0
  42. data/lib/smart_prompt/session.rb +140 -0
  43. data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
  44. data/lib/smart_prompt/stt_adapter.rb +381 -0
  45. data/lib/smart_prompt/summary_based_strategy.rb +152 -0
  46. data/lib/smart_prompt/token_counter.rb +74 -0
  47. data/lib/smart_prompt/tts_adapter.rb +403 -0
  48. data/lib/smart_prompt/version.rb +1 -1
  49. data/lib/smart_prompt/video_generation_adapter.rb +330 -0
  50. data/lib/smart_prompt/worker.rb +25 -3
  51. data/lib/smart_prompt/zhipu_adapter.rb +616 -0
  52. data/lib/smart_prompt.rb +22 -2
  53. data/workers/history_management_examples.rb +407 -0
  54. data/workers/image_generation_workers.rb +119 -0
  55. data/workers/multimodal_workers.rb +110 -0
  56. data/workers/sensenova_workers.rb +62 -0
  57. data/workers/stt_workers.rb +195 -0
  58. data/workers/tts_workers.rb +388 -0
  59. data/workers/video_generation_workers.rb +264 -0
  60. data/workers/zhipu_workers.rb +113 -0
  61. metadata +84 -8
@@ -0,0 +1,244 @@
1
+ # TTS Example for SmartPrompt
2
+ # This example demonstrates how to use the new TTSAdapter
3
+
4
+ require_relative '../lib/smart_prompt'
5
+
6
+ # Configuration for TTS capabilities
7
+ config = {
8
+ "adapters" => {
9
+ "multimodal" => "MultimodalAdapter",
10
+ "image_generation" => "ImageGenerationAdapter",
11
+ "video_generation" => "VideoGenerationAdapter",
12
+ "tts" => "TTSAdapter"
13
+ },
14
+ "llms" => {
15
+ "qwen_vl" => {
16
+ "adapter" => "multimodal",
17
+ "url" => "https://api.siliconflow.cn/v1/",
18
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
19
+ "model" => "Qwen/Qwen2.5-VL-7B-Instruct"
20
+ },
21
+ "image_gen" => {
22
+ "adapter" => "image_generation",
23
+ "url" => "https://api.siliconflow.cn/v1/",
24
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
25
+ "model" => "stabilityai/stable-diffusion-xl-base-1.0"
26
+ },
27
+ "video_gen" => {
28
+ "adapter" => "video_generation",
29
+ "url" => "https://api.siliconflow.cn/v1/",
30
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
31
+ "model" => "Wan-AI/Wan2.2-T2V-A14B"
32
+ },
33
+ "tts_service" => {
34
+ "adapter" => "tts",
35
+ "url" => "https://api.siliconflow.cn/v1/",
36
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
37
+ "model" => "FunAudioLLM/CosyVoice2-0.5B"
38
+ }
39
+ },
40
+ "default_llm" => "qwen_vl",
41
+ "template_path" => "./templates",
42
+ "worker_path" => "./workers",
43
+ "logger_file" => "./logs/smart_prompt.log"
44
+ }
45
+
46
+ # Write config to file
47
+ File.write('tts_config.yml', config.to_yaml)
48
+
49
+ # Initialize engine
50
+ engine = SmartPrompt::Engine.new('tts_config.yml')
51
+
52
+ puts "=== SmartPrompt TTS Demo ==="
53
+
54
+ # Example 1: Basic text-to-speech synthesis
55
+ puts "\n=== Example 1: Basic TTS Synthesis ==="
56
+ begin
57
+ result = engine.call_worker(:tts_synthesizer, {
58
+ text: "欢迎使用智能提示系统,这是一个文本转语音功能的演示。",
59
+ voice: "alloy",
60
+ speed: 1.0,
61
+ response_format: "mp3",
62
+ save_to_file: true,
63
+ output_dir: "./generated_audio",
64
+ filename_prefix: "basic_tts"
65
+ })
66
+
67
+ puts "TTS synthesis successful!"
68
+ puts "Audio file: #{result[:audio_file][:file_path]}"
69
+ puts "Text length: #{result[:audio_file][:text_length]} characters"
70
+ puts "Voice: #{result[:audio_file][:voice]}"
71
+
72
+ rescue => e
73
+ puts "Error in TTS synthesis: #{e.message}"
74
+ puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
75
+ end
76
+
77
+ # Example 2: Multilingual TTS
78
+ puts "\n=== Example 2: Multilingual TTS ==="
79
+ begin
80
+ # English text
81
+ result_en = engine.call_worker(:multilingual_tts, {
82
+ text: "Hello, this is a demonstration of text-to-speech functionality.",
83
+ voice: "echo",
84
+ save_to_file: true,
85
+ output_dir: "./multilingual_audio",
86
+ filename_prefix: "english_tts"
87
+ })
88
+
89
+ puts "English TTS successful!"
90
+ puts "Detected language: #{result_en[:detected_language]}"
91
+ puts "Audio file: #{result_en[:audio_file][:file_path]}"
92
+
93
+ # Chinese text
94
+ result_zh = engine.call_worker(:multilingual_tts, {
95
+ text: "这是一个中文文本转语音的演示,支持多种语言。",
96
+ voice: "nova",
97
+ save_to_file: true,
98
+ output_dir: "./multilingual_audio",
99
+ filename_prefix: "chinese_tts"
100
+ })
101
+
102
+ puts "Chinese TTS successful!"
103
+ puts "Detected language: #{result_zh[:detected_language]}"
104
+ puts "Audio file: #{result_zh[:audio_file][:file_path]}"
105
+
106
+ rescue => e
107
+ puts "Error in multilingual TTS: #{e.message}"
108
+ end
109
+
110
+ # Example 3: Voice selection demo
111
+ puts "\n=== Example 3: Voice Selection Demo ==="
112
+ begin
113
+ result = engine.call_worker(:voice_selector, {
114
+ text: "这是一个不同音色的演示,您可以听到不同声音的朗读效果。",
115
+ save_to_file: true,
116
+ output_dir: "./voice_demos"
117
+ })
118
+
119
+ puts "Voice selection demo successful!"
120
+ puts "Available voices: #{result[:available_voices].keys.join(', ')}"
121
+ puts "Selected voice: #{result[:selected_voice]}"
122
+ puts "Audio file: #{result[:audio_file][:file_path]}"
123
+
124
+ rescue => e
125
+ puts "Error in voice selection: #{e.message}"
126
+ end
127
+
128
+ # Example 4: Speed variation demo
129
+ puts "\n=== Example 4: Speed Variation Demo ==="
130
+ begin
131
+ result = engine.call_worker(:speed_variation_tts, {
132
+ text: "这是一个语速变化的演示,您可以听到不同语速的朗读效果。",
133
+ voice: "alloy",
134
+ speeds: [0.5, 0.75, 1.0, 1.5, 2.0],
135
+ save_to_file: true,
136
+ output_dir: "./speed_variations"
137
+ })
138
+
139
+ puts "Speed variation demo successful!"
140
+ puts "Generated #{result[:speed_variations].size} audio files at different speeds"
141
+ result[:speed_variations].each do |variation|
142
+ puts " - Speed #{variation[:speed]}: #{variation[:audio_file][:file_path]}"
143
+ end
144
+
145
+ rescue => e
146
+ puts "Error in speed variation: #{e.message}"
147
+ end
148
+
149
+ # Example 5: Custom voice management
150
+ puts "\n=== Example 5: Custom Voice Management ==="
151
+ begin
152
+ # List available voices
153
+ result = engine.call_worker(:custom_voice_manager, {
154
+ action: "list"
155
+ })
156
+
157
+ puts "Voice management demo successful!"
158
+ puts "Predefined voices: #{result[:predefined_voices].keys.join(', ')}"
159
+ puts "Custom voices: #{result[:custom_voices].size}"
160
+
161
+ # Note: Creating custom voices requires reference audio files
162
+ # Uncomment the following lines if you have reference audio files:
163
+ #
164
+ # result = engine.call_worker(:custom_voice_manager, {
165
+ # action: "create",
166
+ # name: "my_custom_voice",
167
+ # reference_audio_file: "./reference_audio.wav",
168
+ # description: "My custom voice created from reference audio"
169
+ # })
170
+ #
171
+ # puts "Custom voice created: #{result[:voice_data][:voice_id]}"
172
+
173
+ rescue => e
174
+ puts "Error in voice management: #{e.message}"
175
+ end
176
+
177
+ # Example 6: Batch TTS processing
178
+ puts "\n=== Example 6: Batch TTS Processing ==="
179
+ begin
180
+ result = engine.call_worker(:batch_tts, {
181
+ texts: [
182
+ "这是第一条文本内容。",
183
+ "这是第二条文本内容,用于批量处理演示。",
184
+ "这是第三条文本内容,展示批量文本转语音功能。"
185
+ ],
186
+ voice: "alloy",
187
+ save_to_file: true,
188
+ output_dir: "./batch_audio"
189
+ })
190
+
191
+ puts "Batch TTS processing successful!"
192
+ puts "Generated #{result[:batch_results].size} audio files"
193
+ result[:batch_results].each do |batch_result|
194
+ puts " - Text #{batch_result[:index] + 1}: #{batch_result[:audio_file][:file_path]}"
195
+ end
196
+
197
+ rescue => e
198
+ puts "Error in batch TTS: #{e.message}"
199
+ end
200
+
201
+ # Example 7: Direct adapter usage
202
+ puts "\n=== Example 7: Direct Adapter Usage ==="
203
+ begin
204
+ # Get the adapter directly
205
+ adapter = engine.llms["tts_service"]
206
+
207
+ # Synthesize speech directly
208
+ audio_data = adapter.synthesize_speech(
209
+ "这是直接使用适配器的演示,不通过Worker。",
210
+ voice: "echo",
211
+ speed: 1.2,
212
+ response_format: "mp3"
213
+ )
214
+
215
+ puts "Direct adapter usage successful!"
216
+ puts "Generated audio data with format: #{audio_data[:format]}"
217
+ puts "Text length: #{audio_data[:text_length]} characters"
218
+
219
+ # Save to file
220
+ output_path = "./direct_audio/direct_tts_#{Time.now.to_i}.mp3"
221
+ result = adapter.synthesize_to_file(
222
+ "这是直接保存到文件的演示。",
223
+ output_path,
224
+ voice: "nova",
225
+ speed: 1.0
226
+ )
227
+
228
+ puts "Direct file synthesis successful!"
229
+ puts "Audio file: #{result[:file_path]}"
230
+
231
+ rescue => e
232
+ puts "Error in direct adapter usage: #{e.message}"
233
+ end
234
+
235
+ puts "\n=== All examples completed ==="
236
+ puts "\nImportant Notes:"
237
+ puts "1. TTS requires valid SILICONFLOW_API_KEY environment variable"
238
+ puts "2. Audio files are saved in various formats (mp3, wav, etc.)"
239
+ puts "3. Custom voice creation requires reference audio files"
240
+ puts "4. Multiple languages are supported (Chinese, English, Japanese, Korean)"
241
+ puts "5. Speed can be adjusted from 0.25x to 4.0x"
242
+
243
+ # Clean up
244
+ File.delete('tts_config.yml') if File.exist?('tts_config.yml')
@@ -0,0 +1,189 @@
1
+ # Video Generation Example for SmartPrompt
2
+ # This example demonstrates how to use the new VideoGenerationAdapter
3
+
4
+ require_relative '../lib/smart_prompt'
5
+
6
+ # Configuration for video generation capabilities
7
+ config = {
8
+ "adapters" => {
9
+ "multimodal" => "MultimodalAdapter",
10
+ "image_generation" => "ImageGenerationAdapter",
11
+ "video_generation" => "VideoGenerationAdapter"
12
+ },
13
+ "llms" => {
14
+ "qwen_vl" => {
15
+ "adapter" => "multimodal",
16
+ "url" => "https://api.siliconflow.cn/v1/",
17
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
18
+ "model" => "Qwen/Qwen2.5-VL-7B-Instruct"
19
+ },
20
+ "image_gen" => {
21
+ "adapter" => "image_generation",
22
+ "url" => "https://api.siliconflow.cn/v1/",
23
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
24
+ "model" => "stabilityai/stable-diffusion-xl-base-1.0"
25
+ },
26
+ "video_gen" => {
27
+ "adapter" => "video_generation",
28
+ "url" => "https://api.siliconflow.cn/v1/",
29
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
30
+ "model" => "Wan-AI/Wan2.2-T2V-A14B"
31
+ }
32
+ },
33
+ "default_llm" => "qwen_vl",
34
+ "template_path" => "./templates",
35
+ "worker_path" => "./workers",
36
+ "logger_file" => "./logs/smart_prompt.log"
37
+ }
38
+
39
+ # Write config to file
40
+ File.write('video_generation_config.yml', config.to_yaml)
41
+
42
+ # Initialize engine
43
+ engine = SmartPrompt::Engine.new('video_generation_config.yml')
44
+
45
+ puts "=== SmartPrompt Video Generation Demo ==="
46
+
47
+ # Example 1: Simple text-to-video generation
48
+ puts "\n=== Example 1: Text-to-Video Generation ==="
49
+ begin
50
+ result = engine.call_worker(:video_generator, {
51
+ prompt: "A beautiful sunset over ocean waves, cinematic quality, slow motion",
52
+ duration: 4,
53
+ resolution: "720p",
54
+ fps: 24,
55
+ wait_for_completion: false, # Set to true to wait for completion
56
+ download_to_file: false, # Set to true to download video
57
+ output_dir: "./generated_videos",
58
+ filename_prefix: "sunset_video"
59
+ })
60
+
61
+ puts "Video generation job submitted successfully!"
62
+ puts "Job ID: #{result[:video_data][:job_id]}"
63
+ puts "Status: #{result[:video_data][:status]}"
64
+ puts "Created at: #{result[:video_data][:created_at]}"
65
+
66
+ rescue => e
67
+ puts "Error in video generation: #{e.message}"
68
+ puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
69
+ puts "Note: Video generation may take several minutes to complete"
70
+ end
71
+
72
+ # Example 2: Creative video generation with style
73
+ puts "\n=== Example 2: Creative Video Generation ==="
74
+ begin
75
+ result = engine.call_worker(:creative_video_generator, {
76
+ prompt: "A magical forest with glowing fairies and sparkling lights",
77
+ video_style: "fantasy animation, Studio Ghibli style",
78
+ duration: 4,
79
+ resolution: "720p",
80
+ fps: 24,
81
+ wait_for_completion: false,
82
+ download_to_file: false
83
+ })
84
+
85
+ puts "Creative video generation job submitted successfully!"
86
+ puts "Job ID: #{result[:video_data][:job_id]}"
87
+ puts "Status: #{result[:video_data][:status]}"
88
+
89
+ rescue => e
90
+ puts "Error in creative video generation: #{e.message}"
91
+ end
92
+
93
+ # Example 3: Product video generation
94
+ puts "\n=== Example 3: Product Video Generation ==="
95
+ begin
96
+ result = engine.call_worker(:product_video_generator, {
97
+ prompt: "A modern smartphone rotating slowly on a marble surface",
98
+ duration: 4,
99
+ resolution: "720p",
100
+ fps: 24,
101
+ wait_for_completion: false,
102
+ download_to_file: false
103
+ })
104
+
105
+ puts "Product video generation job submitted successfully!"
106
+ puts "Job ID: #{result[:video_data][:job_id]}"
107
+ puts "Status: #{result[:video_data][:status]}"
108
+
109
+ rescue => e
110
+ puts "Error in product video generation: #{e.message}"
111
+ end
112
+
113
+ # Example 4: Check video status (if we have a job ID from previous examples)
114
+ puts "\n=== Example 4: Video Status Check ==="
115
+ begin
116
+ # This example requires a valid job_id from a previous video generation
117
+ # For demonstration, we'll show the method but skip execution
118
+ puts "To check video status, use:"
119
+ puts "result = engine.call_worker(:video_status_checker, {"
120
+ puts " job_id: 'YOUR_JOB_ID_HERE',"
121
+ puts " download_to_file: true"
122
+ puts "})"
123
+
124
+ rescue => e
125
+ puts "Error in status check: #{e.message}"
126
+ end
127
+
128
+ # Example 5: Direct adapter usage (without worker)
129
+ puts "\n=== Example 5: Direct Adapter Usage ==="
130
+ begin
131
+ # Get the adapter directly
132
+ adapter = engine.llms["video_gen"]
133
+
134
+ # Generate video directly
135
+ video_data = adapter.generate_video(
136
+ "A butterfly flying through a flower garden, nature documentary style",
137
+ duration: 4,
138
+ resolution: "720p",
139
+ fps: 24
140
+ )
141
+
142
+ puts "Direct adapter usage successful!"
143
+ puts "Job ID: #{video_data[:job_id]}"
144
+ puts "Status: #{video_data[:status]}"
145
+
146
+ # Example of checking status
147
+ # status = adapter.check_video_status(video_data[:job_id])
148
+ # puts "Current status: #{status[:status]}, Progress: #{status[:progress]}"
149
+
150
+ rescue => e
151
+ puts "Error in direct adapter usage: #{e.message}"
152
+ end
153
+
154
+ # Example 6: Batch video generation
155
+ puts "\n=== Example 6: Batch Video Generation ==="
156
+ begin
157
+ result = engine.call_worker(:batch_video_generator, {
158
+ prompts: [
159
+ "A cat playing with a ball of yarn",
160
+ "A dog running through a field",
161
+ "A bird flying in the sky"
162
+ ],
163
+ duration: 3,
164
+ resolution: "720p",
165
+ fps: 24,
166
+ wait_for_completion: false
167
+ })
168
+
169
+ puts "Batch video generation submitted successfully!"
170
+ puts "Generated #{result[:batch_results].size} video jobs"
171
+ result[:batch_results].each do |result|
172
+ puts " - Prompt: #{result[:prompt][0..50]}..."
173
+ puts " Job ID: #{result[:video_data][:job_id]}"
174
+ end
175
+
176
+ rescue => e
177
+ puts "Error in batch video generation: #{e.message}"
178
+ end
179
+
180
+ puts "\n=== All examples completed ==="
181
+ puts "\nImportant Notes:"
182
+ puts "1. Video generation is an asynchronous process"
183
+ puts "2. Jobs may take several minutes to complete"
184
+ puts "3. Use wait_for_completion: true to wait for completion"
185
+ puts "4. Use download_to_file: true to automatically download videos"
186
+ puts "5. Check status periodically using video_status_checker worker"
187
+
188
+ # Clean up
189
+ File.delete('video_generation_config.yml') if File.exist?('video_generation_config.yml')
@@ -0,0 +1,151 @@
1
+ # 智谱 AI (BigModel / GLM) Example for SmartPrompt
2
+ #
3
+ # Demonstrates every Zhipu model category through one ZhipuAIAdapter:
4
+ # 1. 文本对话 (chat) — sync + streaming
5
+ # 2. 图文多模态 (vision)
6
+ # 3. 向量模型 (embeddings)
7
+ # 4. 文生图 (CogView)
8
+ # 5. 文生视频 (CogVideoX, async submit -> poll -> download)
9
+ # 6. 语音合成 (GLM-TTS)
10
+ # 7. 语音识别 (GLM-ASR-2512)
11
+ #
12
+ # Requires a valid Zhipu API key in ZHIPUAI_API_KEY (https://open.bigmodel.cn/).
13
+ # Defaults use free-tier models so it works out-of-box once the key is set.
14
+
15
+ require_relative "../lib/smart_prompt"
16
+
17
+ api_key = ENV["ZHIPUAI_API_KEY"]
18
+ base = "https://open.bigmodel.cn/api/paas/v4"
19
+
20
+ config = {
21
+ "adapters" => { "zhipu" => "ZhipuAIAdapter" },
22
+ "llms" => {
23
+ "glm" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-4-flash" },
24
+ "glm_vision" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-4v-flash" },
25
+ "embedding" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "embedding-3", "dimensions" => 1024 },
26
+ "cogview" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "cogview-3-flash" },
27
+ "cogvideo" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "cogvideox-flash" },
28
+ "glm_tts" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-tts" },
29
+ "glm_asr" => { "adapter" => "zhipu", "url" => base, "api_key" => api_key, "model" => "glm-asr-2512" },
30
+ },
31
+ "default_llm" => "glm",
32
+ "template_path" => "./templates",
33
+ "worker_path" => "./workers",
34
+ "logger_file" => "./logs/smart_prompt.log",
35
+ }
36
+
37
+ File.write("zhipu_config.yml", config.to_yaml)
38
+ engine = SmartPrompt::Engine.new("zhipu_config.yml")
39
+
40
+ puts "=== SmartPrompt 智谱 GLM Demo ==="
41
+ unless api_key
42
+ puts "Note: ZHIPUAI_API_KEY is not set — the API calls below will fail at the network layer."
43
+ end
44
+
45
+ # 1. Chat (sync)
46
+ puts "\n=== Example 1: 文本对话 (sync) ==="
47
+ begin
48
+ result = engine.call_worker(:glm_chat, { prompt: "用一句话介绍智谱GLM。" })
49
+ puts "Reply: #{result}"
50
+ rescue => e
51
+ puts "Error: #{e.message}"
52
+ end
53
+
54
+ # 2. Chat (streaming)
55
+ puts "\n=== Example 2: 文本对话 (streaming) ==="
56
+ begin
57
+ engine.call_worker_by_stream(:glm_chat, { prompt: "写两句关于春天的诗。" }) do |chunk, _|
58
+ print chunk.dig("choices", 0, "delta", "content").to_s
59
+ end
60
+ puts
61
+ rescue => e
62
+ puts "Error: #{e.message}"
63
+ end
64
+
65
+ # 3. Multimodal vision
66
+ puts "\n=== Example 3: 图文多模态 ==="
67
+ begin
68
+ result = engine.call_worker(:glm_vision, {
69
+ image_url: "https://img1.baidu.com/it/u=1966616150,2146512490&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=282",
70
+ question: "图片里有什么?",
71
+ })
72
+ puts "Vision result: #{result}"
73
+ rescue => e
74
+ puts "Error: #{e.message}"
75
+ end
76
+
77
+ # 4. Embeddings (embedding-3)
78
+ puts "\n=== Example 4: 向量模型 ==="
79
+ begin
80
+ vector = engine.call_worker(:glm_embed, { text: "智谱GLM大模型", length: 1024 })
81
+ puts "Embedding dim: #{vector.is_a?(Array) ? vector.size : vector} (first 5: #{vector.first(5) rescue vector})"
82
+ rescue => e
83
+ puts "Error: #{e.message}"
84
+ end
85
+
86
+ # 5. Text-to-image (CogView)
87
+ puts "\n=== Example 5: 文生图 ==="
88
+ begin
89
+ result = engine.call_worker(:cogview_image, {
90
+ prompt: "一只在书房里读书的猫,水墨画风格",
91
+ size: "1024x1024",
92
+ save_to_file: true,
93
+ output_dir: "./generated_images",
94
+ filename_prefix: "zhipu_cat",
95
+ })
96
+ if result.is_a?(Hash) && result[:images]
97
+ puts "Generated #{result[:images].size} image(s); first URL: #{result[:images].first[:url]}"
98
+ puts "Saved files: #{result[:saved_files]}"
99
+ else
100
+ puts "Result: #{result}"
101
+ end
102
+ rescue => e
103
+ puts "Error: #{e.message}"
104
+ end
105
+
106
+ # 6. Text-to-video (CogVideoX, async) — may take a minute or two.
107
+ puts "\n=== Example 6: 文生视频 (async) ==="
108
+ begin
109
+ result = engine.call_worker(:cogvideo_video, {
110
+ prompt: "一只猫在阳光下打盹",
111
+ wait_for_completion: true,
112
+ download_to_file: true,
113
+ output_dir: "./generated_videos",
114
+ timeout: 600,
115
+ })
116
+ if result[:video]
117
+ puts "Video ready: #{result[:video][:video_url]}"
118
+ puts "Downloaded: #{result[:downloaded_file]}" if result[:downloaded_file]
119
+ else
120
+ puts "Submitted task: #{result[:submitted]}"
121
+ end
122
+ rescue => e
123
+ puts "Error: #{e.message}"
124
+ end
125
+
126
+ # 7. TTS (GLM-TTS)
127
+ puts "\n=== Example 7: 语音合成 (TTS) ==="
128
+ begin
129
+ info = engine.call_worker(:glm_tts, { text: "你好,这是智谱语音合成的测试。", output_path: "./generated_audio/zhipu_tts.wav" })
130
+ puts "Audio saved: #{info[:file_path]}"
131
+ rescue => e
132
+ puts "Error: #{e.message}"
133
+ end
134
+
135
+ # 8. ASR (GLM-ASR-2512) — needs a real audio file path.
136
+ puts "\n=== Example 8: 语音识别 (ASR) ==="
137
+ audio = ENV["ZHIPU_ASR_SAMPLE"] || "./generated_audio/zhipu_tts.wav"
138
+ if File.exist?(audio)
139
+ begin
140
+ result = engine.call_worker(:glm_asr, { audio_file: audio })
141
+ puts "Transcription: #{result[:text]}"
142
+ rescue => e
143
+ puts "Error: #{e.message}"
144
+ end
145
+ else
146
+ puts "Skipped: set ZHIPU_ASR_SAMPLE to an audio file path (or run TTS first) to test ASR."
147
+ end
148
+
149
+ puts "\n=== All examples completed ==="
150
+
151
+ File.delete("zhipu_config.yml") if File.exist?("zhipu_config.yml")