smart_prompt 0.4.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/README.cn.md +305 -11
  4. data/README.md +309 -11
  5. data/Rakefile +10 -1
  6. data/config/anthropic_config.yml +151 -0
  7. data/config/image_generation_config.yml +22 -0
  8. data/config/multimodal_config.yml +85 -0
  9. data/config/sensenova_config.yml +63 -0
  10. data/config/zhipu_config.yml +73 -0
  11. data/docs/ANTHROPIC_EXAMPLES.md +559 -0
  12. data/docs/CONVERSATION_INTEGRATION_SUMMARY.md +155 -0
  13. data/docs/HISTORY_EXAMPLES_README.md +533 -0
  14. data/docs/HISTORY_MANAGEMENT_GUIDE.md +797 -0
  15. data/docs/MONITORING_GUIDE.md +278 -0
  16. data/docs/MULTIMODAL_README.md +265 -0
  17. data/docs/RELEVANCE_BASED_STRATEGY_IMPLEMENTATION.md +124 -0
  18. data/docs/STT_README.md +302 -0
  19. data/docs/TTS_README.md +303 -0
  20. data/docs/VIDEO_GENERATION_README.md +246 -0
  21. data/docs/delete_files_list.md +124 -0
  22. data/examples/anthropic_basic_chat.rb +143 -0
  23. data/examples/anthropic_example.rb +232 -0
  24. data/examples/anthropic_multimodal.rb +212 -0
  25. data/examples/anthropic_streaming.rb +312 -0
  26. data/examples/anthropic_tool_calling.rb +393 -0
  27. data/examples/automatic_cleanup_example.rb +109 -0
  28. data/examples/history_management_examples.rb +522 -0
  29. data/examples/image_generation_example.rb +130 -0
  30. data/examples/monitoring_example.rb +121 -0
  31. data/examples/multimodal_example.rb +63 -0
  32. data/examples/relevance_based_strategy_example.rb +87 -0
  33. data/examples/sensenova_example.rb +129 -0
  34. data/examples/stt_example.rb +287 -0
  35. data/examples/tts_example.rb +244 -0
  36. data/examples/video_generation_example.rb +189 -0
  37. data/examples/zhipu_example.rb +151 -0
  38. data/lib/smart_prompt/anthropic_adapter.rb +407 -298
  39. data/lib/smart_prompt/compression_engine.rb +201 -0
  40. data/lib/smart_prompt/context_strategy.rb +22 -0
  41. data/lib/smart_prompt/conversation.rb +47 -4
  42. data/lib/smart_prompt/engine.rb +29 -2
  43. data/lib/smart_prompt/history_manager.rb +596 -0
  44. data/lib/smart_prompt/hybrid_strategy.rb +222 -0
  45. data/lib/smart_prompt/image_generation_adapter.rb +297 -0
  46. data/lib/smart_prompt/lru_cache.rb +133 -0
  47. data/lib/smart_prompt/message.rb +57 -0
  48. data/lib/smart_prompt/multimodal_adapter.rb +277 -0
  49. data/lib/smart_prompt/persistence_layer.rb +197 -0
  50. data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
  51. data/lib/smart_prompt/sensenova_adapter.rb +410 -0
  52. data/lib/smart_prompt/session.rb +140 -0
  53. data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
  54. data/lib/smart_prompt/stt_adapter.rb +381 -0
  55. data/lib/smart_prompt/summary_based_strategy.rb +152 -0
  56. data/lib/smart_prompt/token_counter.rb +74 -0
  57. data/lib/smart_prompt/tts_adapter.rb +403 -0
  58. data/lib/smart_prompt/version.rb +1 -1
  59. data/lib/smart_prompt/video_generation_adapter.rb +330 -0
  60. data/lib/smart_prompt/worker.rb +28 -3
  61. data/lib/smart_prompt/zhipu_adapter.rb +616 -0
  62. data/lib/smart_prompt.rb +21 -0
  63. data/workers/history_management_examples.rb +407 -0
  64. data/workers/image_generation_workers.rb +119 -0
  65. data/workers/multimodal_workers.rb +110 -0
  66. data/workers/sensenova_workers.rb +62 -0
  67. data/workers/stt_workers.rb +195 -0
  68. data/workers/tts_workers.rb +388 -0
  69. data/workers/video_generation_workers.rb +264 -0
  70. data/workers/zhipu_workers.rb +113 -0
  71. metadata +88 -1
@@ -0,0 +1,287 @@
1
+ # STT Example for SmartPrompt
2
+ # This example demonstrates how to use the new STTAdapter
3
+
4
+ require_relative '../lib/smart_prompt'
5
+
6
+ # Configuration for STT capabilities
7
+ config = {
8
+ "adapters" => {
9
+ "multimodal" => "MultimodalAdapter",
10
+ "image_generation" => "ImageGenerationAdapter",
11
+ "video_generation" => "VideoGenerationAdapter",
12
+ "tts" => "TTSAdapter",
13
+ "stt" => "STTAdapter"
14
+ },
15
+ "llms" => {
16
+ "qwen_vl" => {
17
+ "adapter" => "multimodal",
18
+ "url" => "https://api.siliconflow.cn/v1/",
19
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
20
+ "model" => "Qwen/Qwen2.5-VL-7B-Instruct"
21
+ },
22
+ "image_gen" => {
23
+ "adapter" => "image_generation",
24
+ "url" => "https://api.siliconflow.cn/v1/",
25
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
26
+ "model" => "stabilityai/stable-diffusion-xl-base-1.0"
27
+ },
28
+ "video_gen" => {
29
+ "adapter" => "video_generation",
30
+ "url" => "https://api.siliconflow.cn/v1/",
31
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
32
+ "model" => "Wan-AI/Wan2.2-T2V-A14B"
33
+ },
34
+ "tts_service" => {
35
+ "adapter" => "tts",
36
+ "url" => "https://api.siliconflow.cn/v1/",
37
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
38
+ "model" => "FunAudioLLM/CosyVoice2-0.5B"
39
+ },
40
+ "stt_service" => {
41
+ "adapter" => "stt",
42
+ "url" => "https://api.siliconflow.cn/v1/",
43
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
44
+ "model" => "FunAudioLLM/CosyVoice2-0.5B"
45
+ }
46
+ },
47
+ "default_llm" => "qwen_vl",
48
+ "template_path" => "./templates",
49
+ "worker_path" => "./workers",
50
+ "logger_file" => "./logs/smart_prompt.log"
51
+ }
52
+
53
+ # Write config to file
54
+ File.write('stt_config.yml', config.to_yaml)
55
+
56
+ # Initialize engine
57
+ engine = SmartPrompt::Engine.new('stt_config.yml')
58
+
59
+ puts "=== SmartPrompt STT Demo ==="
60
+
61
+ # Example 1: Basic speech-to-text transcription
62
+ puts "\n=== Example 1: Basic STT Transcription ==="
63
+ begin
64
+ # Note: This example requires an actual audio file
65
+ # Replace with a real audio file path for testing
66
+ audio_file_path = "./test_audio.wav"
67
+
68
+ if File.exist?(audio_file_path)
69
+ result = engine.call_worker(:stt_transcriber, {
70
+ audio_file: audio_file_path,
71
+ language: "zh",
72
+ response_format: "json"
73
+ })
74
+
75
+ puts "STT transcription successful!"
76
+ puts "Transcribed text: #{result[:transcription][:text]}"
77
+ puts "Language: #{result[:transcription][:language]}"
78
+ puts "Duration: #{result[:transcription][:duration]} seconds"
79
+ puts "File size: #{result[:transcription][:file_size]} bytes"
80
+ else
81
+ puts "Audio file not found: #{audio_file_path}"
82
+ puts "Please create a test audio file to run this example"
83
+ end
84
+
85
+ rescue => e
86
+ puts "Error in STT transcription: #{e.message}"
87
+ puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
88
+ end
89
+
90
+ # Example 2: URL-based transcription
91
+ puts "\n=== Example 2: URL-based STT Transcription ==="
92
+ begin
93
+ # Note: Replace with a real audio URL for testing
94
+ audio_url = "https://example.com/audio.wav"
95
+
96
+ result = engine.call_worker(:stt_url_transcriber, {
97
+ audio_url: audio_url,
98
+ language: "en",
99
+ response_format: "text"
100
+ })
101
+
102
+ puts "URL-based STT transcription successful!"
103
+ puts "Transcribed text: #{result[:transcription][:text]}"
104
+ puts "Audio URL: #{result[:transcription][:audio_url]}"
105
+
106
+ rescue => e
107
+ puts "Error in URL-based STT: #{e.message}"
108
+ puts "Note: This requires a valid audio URL"
109
+ end
110
+
111
+ # Example 3: Batch transcription
112
+ puts "\n=== Example 3: Batch STT Processing ==="
113
+ begin
114
+ # Note: Replace with real audio files for testing
115
+ audio_files = ["./audio1.wav", "./audio2.wav", "./audio3.wav"]
116
+ existing_files = audio_files.select { |f| File.exist?(f) }
117
+
118
+ if existing_files.any?
119
+ result = engine.call_worker(:batch_stt, {
120
+ audio_files: existing_files,
121
+ language: "zh"
122
+ })
123
+
124
+ puts "Batch STT processing successful!"
125
+ puts "Total files: #{result[:batch_result][:total_files]}"
126
+ puts "Successful: #{result[:batch_result][:successful]}"
127
+ puts "Failed: #{result[:batch_result][:failed]}"
128
+
129
+ result[:batch_result][:results].each do |file_result|
130
+ if file_result[:success]
131
+ puts " - #{File.basename(file_result[:file])}: #{file_result[:transcription][:text].length} characters"
132
+ else
133
+ puts " - #{File.basename(file_result[:file])}: ERROR - #{file_result[:error]}"
134
+ end
135
+ end
136
+ else
137
+ puts "No audio files found for batch processing"
138
+ puts "Please create test audio files to run this example"
139
+ end
140
+
141
+ rescue => e
142
+ puts "Error in batch STT: #{e.message}"
143
+ end
144
+
145
+ # Example 4: Audio file information
146
+ puts "\n=== Example 4: Audio File Information ==="
147
+ begin
148
+ audio_file_path = "./test_audio.wav"
149
+
150
+ if File.exist?(audio_file_path)
151
+ result = engine.call_worker(:audio_info, {
152
+ audio_file: audio_file_path
153
+ })
154
+
155
+ puts "Audio file information retrieved!"
156
+ puts "File name: #{result[:audio_info][:file_name]}"
157
+ puts "File size: #{result[:audio_info][:file_size]} bytes"
158
+ puts "Format: #{result[:audio_info][:format]}"
159
+ puts "Estimated duration: #{result[:audio_info][:estimated_duration]} seconds"
160
+ puts "Supported: #{result[:audio_info][:supported]}"
161
+ else
162
+ puts "Audio file not found: #{audio_file_path}"
163
+ end
164
+
165
+ rescue => e
166
+ puts "Error getting audio info: #{e.message}"
167
+ end
168
+
169
+ # Example 5: Language detection
170
+ puts "\n=== Example 5: Language Detection ==="
171
+ begin
172
+ # Test with Chinese text
173
+ result = engine.call_worker(:language_detector, {
174
+ text: "这是一个中文文本,用于语言检测演示。"
175
+ })
176
+
177
+ puts "Language detection successful!"
178
+ puts "Text: #{result[:text]}"
179
+ puts "Detected language: #{result[:detected_language]}"
180
+
181
+ # Test with English text
182
+ result_en = engine.call_worker(:language_detector, {
183
+ text: "This is an English text for language detection demonstration."
184
+ })
185
+
186
+ puts "English text detected as: #{result_en[:detected_language]}"
187
+
188
+ rescue => e
189
+ puts "Error in language detection: #{e.message}"
190
+ end
191
+
192
+ # Example 6: Multi-language STT
193
+ puts "\n=== Example 6: Multi-language STT ==="
194
+ begin
195
+ audio_file_path = "./test_audio.wav"
196
+
197
+ if File.exist?(audio_file_path)
198
+ result = engine.call_worker(:multilingual_stt, {
199
+ audio_file: audio_file_path
200
+ })
201
+
202
+ puts "Multi-language STT successful!"
203
+ puts "Detected language: #{result[:detected_language]}"
204
+ puts "Initial transcription: #{result[:initial_transcription][:text]}"
205
+
206
+ if result[:improved_transcription]
207
+ puts "Improved transcription: #{result[:improved_transcription][:text]}"
208
+ end
209
+ else
210
+ puts "Audio file not found: #{audio_file_path}"
211
+ end
212
+
213
+ rescue => e
214
+ puts "Error in multi-language STT: #{e.message}"
215
+ end
216
+
217
+ # Example 7: Format conversion
218
+ puts "\n=== Example 7: STT Format Conversion ==="
219
+ begin
220
+ audio_file_path = "./test_audio.wav"
221
+
222
+ if File.exist?(audio_file_path)
223
+ result = engine.call_worker(:stt_format_converter, {
224
+ audio_file: audio_file_path,
225
+ formats: ["json", "text", "srt", "vtt"]
226
+ })
227
+
228
+ puts "Format conversion successful!"
229
+ result[:format_results].each do |format, transcription|
230
+ puts " - #{format.upcase}: #{transcription[:text].length} characters"
231
+ end
232
+ else
233
+ puts "Audio file not found: #{audio_file_path}"
234
+ end
235
+
236
+ rescue => e
237
+ puts "Error in format conversion: #{e.message}"
238
+ end
239
+
240
+ # Example 8: Direct adapter usage
241
+ puts "\n=== Example 8: Direct Adapter Usage ==="
242
+ begin
243
+ # Get the adapter directly
244
+ adapter = engine.llms["stt_service"]
245
+
246
+ audio_file_path = "./test_audio.wav"
247
+
248
+ if File.exist?(audio_file_path)
249
+ # Transcribe audio directly
250
+ transcription_data = adapter.transcribe_audio(
251
+ audio_file_path,
252
+ language: "zh",
253
+ temperature: 0.0,
254
+ response_format: "json"
255
+ )
256
+
257
+ puts "Direct adapter usage successful!"
258
+ puts "Transcribed text: #{transcription_data[:text]}"
259
+ puts "Language: #{transcription_data[:language]}"
260
+ puts "Duration: #{transcription_data[:duration]} seconds"
261
+
262
+ # Get audio information
263
+ audio_info = adapter.get_audio_info(audio_file_path)
264
+ puts "Audio info - Format: #{audio_info[:format]}, Size: #{audio_info[:file_size]} bytes"
265
+
266
+ # Detect language
267
+ detected_language = adapter.detect_language(transcription_data[:text])
268
+ puts "Detected language: #{detected_language}"
269
+
270
+ else
271
+ puts "Audio file not found: #{audio_file_path}"
272
+ end
273
+
274
+ rescue => e
275
+ puts "Error in direct adapter usage: #{e.message}"
276
+ end
277
+
278
+ puts "\n=== All examples completed ==="
279
+ puts "\nImportant Notes:"
280
+ puts "1. STT requires valid SILICONFLOW_API_KEY environment variable"
281
+ puts "2. Audio files must be in supported formats (mp3, wav, webm, etc.)"
282
+ puts "3. Maximum file size: 25MB"
283
+ puts "4. Supported languages: Chinese, English, Japanese, Korean"
284
+ puts "5. Response formats: json, text, srt, vtt"
285
+
286
+ # Clean up
287
+ File.delete('stt_config.yml') if File.exist?('stt_config.yml')
@@ -0,0 +1,244 @@
1
+ # TTS Example for SmartPrompt
2
+ # This example demonstrates how to use the new TTSAdapter
3
+
4
+ require_relative '../lib/smart_prompt'
5
+
6
+ # Configuration for TTS capabilities
7
+ config = {
8
+ "adapters" => {
9
+ "multimodal" => "MultimodalAdapter",
10
+ "image_generation" => "ImageGenerationAdapter",
11
+ "video_generation" => "VideoGenerationAdapter",
12
+ "tts" => "TTSAdapter"
13
+ },
14
+ "llms" => {
15
+ "qwen_vl" => {
16
+ "adapter" => "multimodal",
17
+ "url" => "https://api.siliconflow.cn/v1/",
18
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
19
+ "model" => "Qwen/Qwen2.5-VL-7B-Instruct"
20
+ },
21
+ "image_gen" => {
22
+ "adapter" => "image_generation",
23
+ "url" => "https://api.siliconflow.cn/v1/",
24
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
25
+ "model" => "stabilityai/stable-diffusion-xl-base-1.0"
26
+ },
27
+ "video_gen" => {
28
+ "adapter" => "video_generation",
29
+ "url" => "https://api.siliconflow.cn/v1/",
30
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
31
+ "model" => "Wan-AI/Wan2.2-T2V-A14B"
32
+ },
33
+ "tts_service" => {
34
+ "adapter" => "tts",
35
+ "url" => "https://api.siliconflow.cn/v1/",
36
+ "api_key" => ENV["SILICONFLOW_API_KEY"],
37
+ "model" => "FunAudioLLM/CosyVoice2-0.5B"
38
+ }
39
+ },
40
+ "default_llm" => "qwen_vl",
41
+ "template_path" => "./templates",
42
+ "worker_path" => "./workers",
43
+ "logger_file" => "./logs/smart_prompt.log"
44
+ }
45
+
46
+ # Write config to file
47
+ File.write('tts_config.yml', config.to_yaml)
48
+
49
+ # Initialize engine
50
+ engine = SmartPrompt::Engine.new('tts_config.yml')
51
+
52
+ puts "=== SmartPrompt TTS Demo ==="
53
+
54
+ # Example 1: Basic text-to-speech synthesis
55
+ puts "\n=== Example 1: Basic TTS Synthesis ==="
56
+ begin
57
+ result = engine.call_worker(:tts_synthesizer, {
58
+ text: "欢迎使用智能提示系统,这是一个文本转语音功能的演示。",
59
+ voice: "alloy",
60
+ speed: 1.0,
61
+ response_format: "mp3",
62
+ save_to_file: true,
63
+ output_dir: "./generated_audio",
64
+ filename_prefix: "basic_tts"
65
+ })
66
+
67
+ puts "TTS synthesis successful!"
68
+ puts "Audio file: #{result[:audio_file][:file_path]}"
69
+ puts "Text length: #{result[:audio_file][:text_length]} characters"
70
+ puts "Voice: #{result[:audio_file][:voice]}"
71
+
72
+ rescue => e
73
+ puts "Error in TTS synthesis: #{e.message}"
74
+ puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
75
+ end
76
+
77
+ # Example 2: Multilingual TTS
78
+ puts "\n=== Example 2: Multilingual TTS ==="
79
+ begin
80
+ # English text
81
+ result_en = engine.call_worker(:multilingual_tts, {
82
+ text: "Hello, this is a demonstration of text-to-speech functionality.",
83
+ voice: "echo",
84
+ save_to_file: true,
85
+ output_dir: "./multilingual_audio",
86
+ filename_prefix: "english_tts"
87
+ })
88
+
89
+ puts "English TTS successful!"
90
+ puts "Detected language: #{result_en[:detected_language]}"
91
+ puts "Audio file: #{result_en[:audio_file][:file_path]}"
92
+
93
+ # Chinese text
94
+ result_zh = engine.call_worker(:multilingual_tts, {
95
+ text: "这是一个中文文本转语音的演示,支持多种语言。",
96
+ voice: "nova",
97
+ save_to_file: true,
98
+ output_dir: "./multilingual_audio",
99
+ filename_prefix: "chinese_tts"
100
+ })
101
+
102
+ puts "Chinese TTS successful!"
103
+ puts "Detected language: #{result_zh[:detected_language]}"
104
+ puts "Audio file: #{result_zh[:audio_file][:file_path]}"
105
+
106
+ rescue => e
107
+ puts "Error in multilingual TTS: #{e.message}"
108
+ end
109
+
110
+ # Example 3: Voice selection demo
111
+ puts "\n=== Example 3: Voice Selection Demo ==="
112
+ begin
113
+ result = engine.call_worker(:voice_selector, {
114
+ text: "这是一个不同音色的演示,您可以听到不同声音的朗读效果。",
115
+ save_to_file: true,
116
+ output_dir: "./voice_demos"
117
+ })
118
+
119
+ puts "Voice selection demo successful!"
120
+ puts "Available voices: #{result[:available_voices].keys.join(', ')}"
121
+ puts "Selected voice: #{result[:selected_voice]}"
122
+ puts "Audio file: #{result[:audio_file][:file_path]}"
123
+
124
+ rescue => e
125
+ puts "Error in voice selection: #{e.message}"
126
+ end
127
+
128
+ # Example 4: Speed variation demo
129
+ puts "\n=== Example 4: Speed Variation Demo ==="
130
+ begin
131
+ result = engine.call_worker(:speed_variation_tts, {
132
+ text: "这是一个语速变化的演示,您可以听到不同语速的朗读效果。",
133
+ voice: "alloy",
134
+ speeds: [0.5, 0.75, 1.0, 1.5, 2.0],
135
+ save_to_file: true,
136
+ output_dir: "./speed_variations"
137
+ })
138
+
139
+ puts "Speed variation demo successful!"
140
+ puts "Generated #{result[:speed_variations].size} audio files at different speeds"
141
+ result[:speed_variations].each do |variation|
142
+ puts " - Speed #{variation[:speed]}: #{variation[:audio_file][:file_path]}"
143
+ end
144
+
145
+ rescue => e
146
+ puts "Error in speed variation: #{e.message}"
147
+ end
148
+
149
+ # Example 5: Custom voice management
150
+ puts "\n=== Example 5: Custom Voice Management ==="
151
+ begin
152
+ # List available voices
153
+ result = engine.call_worker(:custom_voice_manager, {
154
+ action: "list"
155
+ })
156
+
157
+ puts "Voice management demo successful!"
158
+ puts "Predefined voices: #{result[:predefined_voices].keys.join(', ')}"
159
+ puts "Custom voices: #{result[:custom_voices].size}"
160
+
161
+ # Note: Creating custom voices requires reference audio files
162
+ # Uncomment the following lines if you have reference audio files:
163
+ #
164
+ # result = engine.call_worker(:custom_voice_manager, {
165
+ # action: "create",
166
+ # name: "my_custom_voice",
167
+ # reference_audio_file: "./reference_audio.wav",
168
+ # description: "My custom voice created from reference audio"
169
+ # })
170
+ #
171
+ # puts "Custom voice created: #{result[:voice_data][:voice_id]}"
172
+
173
+ rescue => e
174
+ puts "Error in voice management: #{e.message}"
175
+ end
176
+
177
+ # Example 6: Batch TTS processing
178
+ puts "\n=== Example 6: Batch TTS Processing ==="
179
+ begin
180
+ result = engine.call_worker(:batch_tts, {
181
+ texts: [
182
+ "这是第一条文本内容。",
183
+ "这是第二条文本内容,用于批量处理演示。",
184
+ "这是第三条文本内容,展示批量文本转语音功能。"
185
+ ],
186
+ voice: "alloy",
187
+ save_to_file: true,
188
+ output_dir: "./batch_audio"
189
+ })
190
+
191
+ puts "Batch TTS processing successful!"
192
+ puts "Generated #{result[:batch_results].size} audio files"
193
+ result[:batch_results].each do |batch_result|
194
+ puts " - Text #{batch_result[:index] + 1}: #{batch_result[:audio_file][:file_path]}"
195
+ end
196
+
197
+ rescue => e
198
+ puts "Error in batch TTS: #{e.message}"
199
+ end
200
+
201
+ # Example 7: Direct adapter usage
202
+ puts "\n=== Example 7: Direct Adapter Usage ==="
203
+ begin
204
+ # Get the adapter directly
205
+ adapter = engine.llms["tts_service"]
206
+
207
+ # Synthesize speech directly
208
+ audio_data = adapter.synthesize_speech(
209
+ "这是直接使用适配器的演示,不通过Worker。",
210
+ voice: "echo",
211
+ speed: 1.2,
212
+ response_format: "mp3"
213
+ )
214
+
215
+ puts "Direct adapter usage successful!"
216
+ puts "Generated audio data with format: #{audio_data[:format]}"
217
+ puts "Text length: #{audio_data[:text_length]} characters"
218
+
219
+ # Save to file
220
+ output_path = "./direct_audio/direct_tts_#{Time.now.to_i}.mp3"
221
+ result = adapter.synthesize_to_file(
222
+ "这是直接保存到文件的演示。",
223
+ output_path,
224
+ voice: "nova",
225
+ speed: 1.0
226
+ )
227
+
228
+ puts "Direct file synthesis successful!"
229
+ puts "Audio file: #{result[:file_path]}"
230
+
231
+ rescue => e
232
+ puts "Error in direct adapter usage: #{e.message}"
233
+ end
234
+
235
+ puts "\n=== All examples completed ==="
236
+ puts "\nImportant Notes:"
237
+ puts "1. TTS requires valid SILICONFLOW_API_KEY environment variable"
238
+ puts "2. Audio files are saved in various formats (mp3, wav, etc.)"
239
+ puts "3. Custom voice creation requires reference audio files"
240
+ puts "4. Multiple languages are supported (Chinese, English, Japanese, Korean)"
241
+ puts "5. Speed can be adjusted from 0.25x to 4.0x"
242
+
243
+ # Clean up
244
+ File.delete('tts_config.yml') if File.exist?('tts_config.yml')