smart_prompt 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/README.cn.md +305 -11
- data/README.md +309 -11
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/docs/ANTHROPIC_EXAMPLES.md +559 -0
- data/docs/CONVERSATION_INTEGRATION_SUMMARY.md +155 -0
- data/docs/HISTORY_EXAMPLES_README.md +533 -0
- data/docs/HISTORY_MANAGEMENT_GUIDE.md +797 -0
- data/docs/MONITORING_GUIDE.md +278 -0
- data/docs/MULTIMODAL_README.md +265 -0
- data/docs/RELEVANCE_BASED_STRATEGY_IMPLEMENTATION.md +124 -0
- data/docs/STT_README.md +302 -0
- data/docs/TTS_README.md +303 -0
- data/docs/VIDEO_GENERATION_README.md +246 -0
- data/docs/delete_files_list.md +124 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +407 -298
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +47 -4
- data/lib/smart_prompt/engine.rb +29 -2
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +28 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +21 -0
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +88 -1
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
# STT Example for SmartPrompt
|
|
2
|
+
# This example demonstrates how to use the new STTAdapter
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
|
|
6
|
+
# Configuration for STT capabilities
|
|
7
|
+
config = {
|
|
8
|
+
"adapters" => {
|
|
9
|
+
"multimodal" => "MultimodalAdapter",
|
|
10
|
+
"image_generation" => "ImageGenerationAdapter",
|
|
11
|
+
"video_generation" => "VideoGenerationAdapter",
|
|
12
|
+
"tts" => "TTSAdapter",
|
|
13
|
+
"stt" => "STTAdapter"
|
|
14
|
+
},
|
|
15
|
+
"llms" => {
|
|
16
|
+
"qwen_vl" => {
|
|
17
|
+
"adapter" => "multimodal",
|
|
18
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
19
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
20
|
+
"model" => "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
21
|
+
},
|
|
22
|
+
"image_gen" => {
|
|
23
|
+
"adapter" => "image_generation",
|
|
24
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
25
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
26
|
+
"model" => "stabilityai/stable-diffusion-xl-base-1.0"
|
|
27
|
+
},
|
|
28
|
+
"video_gen" => {
|
|
29
|
+
"adapter" => "video_generation",
|
|
30
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
31
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
32
|
+
"model" => "Wan-AI/Wan2.2-T2V-A14B"
|
|
33
|
+
},
|
|
34
|
+
"tts_service" => {
|
|
35
|
+
"adapter" => "tts",
|
|
36
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
37
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
38
|
+
"model" => "FunAudioLLM/CosyVoice2-0.5B"
|
|
39
|
+
},
|
|
40
|
+
"stt_service" => {
|
|
41
|
+
"adapter" => "stt",
|
|
42
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
43
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
44
|
+
"model" => "FunAudioLLM/CosyVoice2-0.5B"
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"default_llm" => "qwen_vl",
|
|
48
|
+
"template_path" => "./templates",
|
|
49
|
+
"worker_path" => "./workers",
|
|
50
|
+
"logger_file" => "./logs/smart_prompt.log"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Write config to file
|
|
54
|
+
File.write('stt_config.yml', config.to_yaml)
|
|
55
|
+
|
|
56
|
+
# Initialize engine
|
|
57
|
+
engine = SmartPrompt::Engine.new('stt_config.yml')
|
|
58
|
+
|
|
59
|
+
puts "=== SmartPrompt STT Demo ==="
|
|
60
|
+
|
|
61
|
+
# Example 1: Basic speech-to-text transcription
|
|
62
|
+
puts "\n=== Example 1: Basic STT Transcription ==="
|
|
63
|
+
begin
|
|
64
|
+
# Note: This example requires an actual audio file
|
|
65
|
+
# Replace with a real audio file path for testing
|
|
66
|
+
audio_file_path = "./test_audio.wav"
|
|
67
|
+
|
|
68
|
+
if File.exist?(audio_file_path)
|
|
69
|
+
result = engine.call_worker(:stt_transcriber, {
|
|
70
|
+
audio_file: audio_file_path,
|
|
71
|
+
language: "zh",
|
|
72
|
+
response_format: "json"
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
puts "STT transcription successful!"
|
|
76
|
+
puts "Transcribed text: #{result[:transcription][:text]}"
|
|
77
|
+
puts "Language: #{result[:transcription][:language]}"
|
|
78
|
+
puts "Duration: #{result[:transcription][:duration]} seconds"
|
|
79
|
+
puts "File size: #{result[:transcription][:file_size]} bytes"
|
|
80
|
+
else
|
|
81
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
82
|
+
puts "Please create a test audio file to run this example"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
rescue => e
|
|
86
|
+
puts "Error in STT transcription: #{e.message}"
|
|
87
|
+
puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Example 2: URL-based transcription
|
|
91
|
+
puts "\n=== Example 2: URL-based STT Transcription ==="
|
|
92
|
+
begin
|
|
93
|
+
# Note: Replace with a real audio URL for testing
|
|
94
|
+
audio_url = "https://example.com/audio.wav"
|
|
95
|
+
|
|
96
|
+
result = engine.call_worker(:stt_url_transcriber, {
|
|
97
|
+
audio_url: audio_url,
|
|
98
|
+
language: "en",
|
|
99
|
+
response_format: "text"
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
puts "URL-based STT transcription successful!"
|
|
103
|
+
puts "Transcribed text: #{result[:transcription][:text]}"
|
|
104
|
+
puts "Audio URL: #{result[:transcription][:audio_url]}"
|
|
105
|
+
|
|
106
|
+
rescue => e
|
|
107
|
+
puts "Error in URL-based STT: #{e.message}"
|
|
108
|
+
puts "Note: This requires a valid audio URL"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Example 3: Batch transcription
|
|
112
|
+
puts "\n=== Example 3: Batch STT Processing ==="
|
|
113
|
+
begin
|
|
114
|
+
# Note: Replace with real audio files for testing
|
|
115
|
+
audio_files = ["./audio1.wav", "./audio2.wav", "./audio3.wav"]
|
|
116
|
+
existing_files = audio_files.select { |f| File.exist?(f) }
|
|
117
|
+
|
|
118
|
+
if existing_files.any?
|
|
119
|
+
result = engine.call_worker(:batch_stt, {
|
|
120
|
+
audio_files: existing_files,
|
|
121
|
+
language: "zh"
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
puts "Batch STT processing successful!"
|
|
125
|
+
puts "Total files: #{result[:batch_result][:total_files]}"
|
|
126
|
+
puts "Successful: #{result[:batch_result][:successful]}"
|
|
127
|
+
puts "Failed: #{result[:batch_result][:failed]}"
|
|
128
|
+
|
|
129
|
+
result[:batch_result][:results].each do |file_result|
|
|
130
|
+
if file_result[:success]
|
|
131
|
+
puts " - #{File.basename(file_result[:file])}: #{file_result[:transcription][:text].length} characters"
|
|
132
|
+
else
|
|
133
|
+
puts " - #{File.basename(file_result[:file])}: ERROR - #{file_result[:error]}"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
else
|
|
137
|
+
puts "No audio files found for batch processing"
|
|
138
|
+
puts "Please create test audio files to run this example"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
rescue => e
|
|
142
|
+
puts "Error in batch STT: #{e.message}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Example 4: Audio file information
|
|
146
|
+
puts "\n=== Example 4: Audio File Information ==="
|
|
147
|
+
begin
|
|
148
|
+
audio_file_path = "./test_audio.wav"
|
|
149
|
+
|
|
150
|
+
if File.exist?(audio_file_path)
|
|
151
|
+
result = engine.call_worker(:audio_info, {
|
|
152
|
+
audio_file: audio_file_path
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
puts "Audio file information retrieved!"
|
|
156
|
+
puts "File name: #{result[:audio_info][:file_name]}"
|
|
157
|
+
puts "File size: #{result[:audio_info][:file_size]} bytes"
|
|
158
|
+
puts "Format: #{result[:audio_info][:format]}"
|
|
159
|
+
puts "Estimated duration: #{result[:audio_info][:estimated_duration]} seconds"
|
|
160
|
+
puts "Supported: #{result[:audio_info][:supported]}"
|
|
161
|
+
else
|
|
162
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
rescue => e
|
|
166
|
+
puts "Error getting audio info: #{e.message}"
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Example 5: Language detection
|
|
170
|
+
puts "\n=== Example 5: Language Detection ==="
|
|
171
|
+
begin
|
|
172
|
+
# Test with Chinese text
|
|
173
|
+
result = engine.call_worker(:language_detector, {
|
|
174
|
+
text: "这是一个中文文本,用于语言检测演示。"
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
puts "Language detection successful!"
|
|
178
|
+
puts "Text: #{result[:text]}"
|
|
179
|
+
puts "Detected language: #{result[:detected_language]}"
|
|
180
|
+
|
|
181
|
+
# Test with English text
|
|
182
|
+
result_en = engine.call_worker(:language_detector, {
|
|
183
|
+
text: "This is an English text for language detection demonstration."
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
puts "English text detected as: #{result_en[:detected_language]}"
|
|
187
|
+
|
|
188
|
+
rescue => e
|
|
189
|
+
puts "Error in language detection: #{e.message}"
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Example 6: Multi-language STT
|
|
193
|
+
puts "\n=== Example 6: Multi-language STT ==="
|
|
194
|
+
begin
|
|
195
|
+
audio_file_path = "./test_audio.wav"
|
|
196
|
+
|
|
197
|
+
if File.exist?(audio_file_path)
|
|
198
|
+
result = engine.call_worker(:multilingual_stt, {
|
|
199
|
+
audio_file: audio_file_path
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
puts "Multi-language STT successful!"
|
|
203
|
+
puts "Detected language: #{result[:detected_language]}"
|
|
204
|
+
puts "Initial transcription: #{result[:initial_transcription][:text]}"
|
|
205
|
+
|
|
206
|
+
if result[:improved_transcription]
|
|
207
|
+
puts "Improved transcription: #{result[:improved_transcription][:text]}"
|
|
208
|
+
end
|
|
209
|
+
else
|
|
210
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
rescue => e
|
|
214
|
+
puts "Error in multi-language STT: #{e.message}"
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Example 7: Format conversion
|
|
218
|
+
puts "\n=== Example 7: STT Format Conversion ==="
|
|
219
|
+
begin
|
|
220
|
+
audio_file_path = "./test_audio.wav"
|
|
221
|
+
|
|
222
|
+
if File.exist?(audio_file_path)
|
|
223
|
+
result = engine.call_worker(:stt_format_converter, {
|
|
224
|
+
audio_file: audio_file_path,
|
|
225
|
+
formats: ["json", "text", "srt", "vtt"]
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
puts "Format conversion successful!"
|
|
229
|
+
result[:format_results].each do |format, transcription|
|
|
230
|
+
puts " - #{format.upcase}: #{transcription[:text].length} characters"
|
|
231
|
+
end
|
|
232
|
+
else
|
|
233
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
rescue => e
|
|
237
|
+
puts "Error in format conversion: #{e.message}"
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Example 8: Direct adapter usage
|
|
241
|
+
puts "\n=== Example 8: Direct Adapter Usage ==="
|
|
242
|
+
begin
|
|
243
|
+
# Get the adapter directly
|
|
244
|
+
adapter = engine.llms["stt_service"]
|
|
245
|
+
|
|
246
|
+
audio_file_path = "./test_audio.wav"
|
|
247
|
+
|
|
248
|
+
if File.exist?(audio_file_path)
|
|
249
|
+
# Transcribe audio directly
|
|
250
|
+
transcription_data = adapter.transcribe_audio(
|
|
251
|
+
audio_file_path,
|
|
252
|
+
language: "zh",
|
|
253
|
+
temperature: 0.0,
|
|
254
|
+
response_format: "json"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
puts "Direct adapter usage successful!"
|
|
258
|
+
puts "Transcribed text: #{transcription_data[:text]}"
|
|
259
|
+
puts "Language: #{transcription_data[:language]}"
|
|
260
|
+
puts "Duration: #{transcription_data[:duration]} seconds"
|
|
261
|
+
|
|
262
|
+
# Get audio information
|
|
263
|
+
audio_info = adapter.get_audio_info(audio_file_path)
|
|
264
|
+
puts "Audio info - Format: #{audio_info[:format]}, Size: #{audio_info[:file_size]} bytes"
|
|
265
|
+
|
|
266
|
+
# Detect language
|
|
267
|
+
detected_language = adapter.detect_language(transcription_data[:text])
|
|
268
|
+
puts "Detected language: #{detected_language}"
|
|
269
|
+
|
|
270
|
+
else
|
|
271
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
rescue => e
|
|
275
|
+
puts "Error in direct adapter usage: #{e.message}"
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
puts "\n=== All examples completed ==="
|
|
279
|
+
puts "\nImportant Notes:"
|
|
280
|
+
puts "1. STT requires valid SILICONFLOW_API_KEY environment variable"
|
|
281
|
+
puts "2. Audio files must be in supported formats (mp3, wav, webm, etc.)"
|
|
282
|
+
puts "3. Maximum file size: 25MB"
|
|
283
|
+
puts "4. Supported languages: Chinese, English, Japanese, Korean"
|
|
284
|
+
puts "5. Response formats: json, text, srt, vtt"
|
|
285
|
+
|
|
286
|
+
# Clean up
|
|
287
|
+
File.delete('stt_config.yml') if File.exist?('stt_config.yml')
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# TTS Example for SmartPrompt
|
|
2
|
+
# This example demonstrates how to use the new TTSAdapter
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
|
|
6
|
+
# Configuration for TTS capabilities
|
|
7
|
+
config = {
|
|
8
|
+
"adapters" => {
|
|
9
|
+
"multimodal" => "MultimodalAdapter",
|
|
10
|
+
"image_generation" => "ImageGenerationAdapter",
|
|
11
|
+
"video_generation" => "VideoGenerationAdapter",
|
|
12
|
+
"tts" => "TTSAdapter"
|
|
13
|
+
},
|
|
14
|
+
"llms" => {
|
|
15
|
+
"qwen_vl" => {
|
|
16
|
+
"adapter" => "multimodal",
|
|
17
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
18
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
19
|
+
"model" => "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
20
|
+
},
|
|
21
|
+
"image_gen" => {
|
|
22
|
+
"adapter" => "image_generation",
|
|
23
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
24
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
25
|
+
"model" => "stabilityai/stable-diffusion-xl-base-1.0"
|
|
26
|
+
},
|
|
27
|
+
"video_gen" => {
|
|
28
|
+
"adapter" => "video_generation",
|
|
29
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
30
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
31
|
+
"model" => "Wan-AI/Wan2.2-T2V-A14B"
|
|
32
|
+
},
|
|
33
|
+
"tts_service" => {
|
|
34
|
+
"adapter" => "tts",
|
|
35
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
36
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
37
|
+
"model" => "FunAudioLLM/CosyVoice2-0.5B"
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"default_llm" => "qwen_vl",
|
|
41
|
+
"template_path" => "./templates",
|
|
42
|
+
"worker_path" => "./workers",
|
|
43
|
+
"logger_file" => "./logs/smart_prompt.log"
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Write config to file
|
|
47
|
+
File.write('tts_config.yml', config.to_yaml)
|
|
48
|
+
|
|
49
|
+
# Initialize engine
|
|
50
|
+
engine = SmartPrompt::Engine.new('tts_config.yml')
|
|
51
|
+
|
|
52
|
+
puts "=== SmartPrompt TTS Demo ==="
|
|
53
|
+
|
|
54
|
+
# Example 1: Basic text-to-speech synthesis
|
|
55
|
+
puts "\n=== Example 1: Basic TTS Synthesis ==="
|
|
56
|
+
begin
|
|
57
|
+
result = engine.call_worker(:tts_synthesizer, {
|
|
58
|
+
text: "欢迎使用智能提示系统,这是一个文本转语音功能的演示。",
|
|
59
|
+
voice: "alloy",
|
|
60
|
+
speed: 1.0,
|
|
61
|
+
response_format: "mp3",
|
|
62
|
+
save_to_file: true,
|
|
63
|
+
output_dir: "./generated_audio",
|
|
64
|
+
filename_prefix: "basic_tts"
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
puts "TTS synthesis successful!"
|
|
68
|
+
puts "Audio file: #{result[:audio_file][:file_path]}"
|
|
69
|
+
puts "Text length: #{result[:audio_file][:text_length]} characters"
|
|
70
|
+
puts "Voice: #{result[:audio_file][:voice]}"
|
|
71
|
+
|
|
72
|
+
rescue => e
|
|
73
|
+
puts "Error in TTS synthesis: #{e.message}"
|
|
74
|
+
puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Example 2: Multilingual TTS
|
|
78
|
+
puts "\n=== Example 2: Multilingual TTS ==="
|
|
79
|
+
begin
|
|
80
|
+
# English text
|
|
81
|
+
result_en = engine.call_worker(:multilingual_tts, {
|
|
82
|
+
text: "Hello, this is a demonstration of text-to-speech functionality.",
|
|
83
|
+
voice: "echo",
|
|
84
|
+
save_to_file: true,
|
|
85
|
+
output_dir: "./multilingual_audio",
|
|
86
|
+
filename_prefix: "english_tts"
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
puts "English TTS successful!"
|
|
90
|
+
puts "Detected language: #{result_en[:detected_language]}"
|
|
91
|
+
puts "Audio file: #{result_en[:audio_file][:file_path]}"
|
|
92
|
+
|
|
93
|
+
# Chinese text
|
|
94
|
+
result_zh = engine.call_worker(:multilingual_tts, {
|
|
95
|
+
text: "这是一个中文文本转语音的演示,支持多种语言。",
|
|
96
|
+
voice: "nova",
|
|
97
|
+
save_to_file: true,
|
|
98
|
+
output_dir: "./multilingual_audio",
|
|
99
|
+
filename_prefix: "chinese_tts"
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
puts "Chinese TTS successful!"
|
|
103
|
+
puts "Detected language: #{result_zh[:detected_language]}"
|
|
104
|
+
puts "Audio file: #{result_zh[:audio_file][:file_path]}"
|
|
105
|
+
|
|
106
|
+
rescue => e
|
|
107
|
+
puts "Error in multilingual TTS: #{e.message}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Example 3: Voice selection demo
|
|
111
|
+
puts "\n=== Example 3: Voice Selection Demo ==="
|
|
112
|
+
begin
|
|
113
|
+
result = engine.call_worker(:voice_selector, {
|
|
114
|
+
text: "这是一个不同音色的演示,您可以听到不同声音的朗读效果。",
|
|
115
|
+
save_to_file: true,
|
|
116
|
+
output_dir: "./voice_demos"
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
puts "Voice selection demo successful!"
|
|
120
|
+
puts "Available voices: #{result[:available_voices].keys.join(', ')}"
|
|
121
|
+
puts "Selected voice: #{result[:selected_voice]}"
|
|
122
|
+
puts "Audio file: #{result[:audio_file][:file_path]}"
|
|
123
|
+
|
|
124
|
+
rescue => e
|
|
125
|
+
puts "Error in voice selection: #{e.message}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Example 4: Speed variation demo
|
|
129
|
+
puts "\n=== Example 4: Speed Variation Demo ==="
|
|
130
|
+
begin
|
|
131
|
+
result = engine.call_worker(:speed_variation_tts, {
|
|
132
|
+
text: "这是一个语速变化的演示,您可以听到不同语速的朗读效果。",
|
|
133
|
+
voice: "alloy",
|
|
134
|
+
speeds: [0.5, 0.75, 1.0, 1.5, 2.0],
|
|
135
|
+
save_to_file: true,
|
|
136
|
+
output_dir: "./speed_variations"
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
puts "Speed variation demo successful!"
|
|
140
|
+
puts "Generated #{result[:speed_variations].size} audio files at different speeds"
|
|
141
|
+
result[:speed_variations].each do |variation|
|
|
142
|
+
puts " - Speed #{variation[:speed]}: #{variation[:audio_file][:file_path]}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
rescue => e
|
|
146
|
+
puts "Error in speed variation: #{e.message}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Example 5: Custom voice management
|
|
150
|
+
puts "\n=== Example 5: Custom Voice Management ==="
|
|
151
|
+
begin
|
|
152
|
+
# List available voices
|
|
153
|
+
result = engine.call_worker(:custom_voice_manager, {
|
|
154
|
+
action: "list"
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
puts "Voice management demo successful!"
|
|
158
|
+
puts "Predefined voices: #{result[:predefined_voices].keys.join(', ')}"
|
|
159
|
+
puts "Custom voices: #{result[:custom_voices].size}"
|
|
160
|
+
|
|
161
|
+
# Note: Creating custom voices requires reference audio files
|
|
162
|
+
# Uncomment the following lines if you have reference audio files:
|
|
163
|
+
#
|
|
164
|
+
# result = engine.call_worker(:custom_voice_manager, {
|
|
165
|
+
# action: "create",
|
|
166
|
+
# name: "my_custom_voice",
|
|
167
|
+
# reference_audio_file: "./reference_audio.wav",
|
|
168
|
+
# description: "My custom voice created from reference audio"
|
|
169
|
+
# })
|
|
170
|
+
#
|
|
171
|
+
# puts "Custom voice created: #{result[:voice_data][:voice_id]}"
|
|
172
|
+
|
|
173
|
+
rescue => e
|
|
174
|
+
puts "Error in voice management: #{e.message}"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Example 6: Batch TTS processing
|
|
178
|
+
puts "\n=== Example 6: Batch TTS Processing ==="
|
|
179
|
+
begin
|
|
180
|
+
result = engine.call_worker(:batch_tts, {
|
|
181
|
+
texts: [
|
|
182
|
+
"这是第一条文本内容。",
|
|
183
|
+
"这是第二条文本内容,用于批量处理演示。",
|
|
184
|
+
"这是第三条文本内容,展示批量文本转语音功能。"
|
|
185
|
+
],
|
|
186
|
+
voice: "alloy",
|
|
187
|
+
save_to_file: true,
|
|
188
|
+
output_dir: "./batch_audio"
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
puts "Batch TTS processing successful!"
|
|
192
|
+
puts "Generated #{result[:batch_results].size} audio files"
|
|
193
|
+
result[:batch_results].each do |batch_result|
|
|
194
|
+
puts " - Text #{batch_result[:index] + 1}: #{batch_result[:audio_file][:file_path]}"
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
rescue => e
|
|
198
|
+
puts "Error in batch TTS: #{e.message}"
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Example 7: Direct adapter usage
|
|
202
|
+
puts "\n=== Example 7: Direct Adapter Usage ==="
|
|
203
|
+
begin
|
|
204
|
+
# Get the adapter directly
|
|
205
|
+
adapter = engine.llms["tts_service"]
|
|
206
|
+
|
|
207
|
+
# Synthesize speech directly
|
|
208
|
+
audio_data = adapter.synthesize_speech(
|
|
209
|
+
"这是直接使用适配器的演示,不通过Worker。",
|
|
210
|
+
voice: "echo",
|
|
211
|
+
speed: 1.2,
|
|
212
|
+
response_format: "mp3"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
puts "Direct adapter usage successful!"
|
|
216
|
+
puts "Generated audio data with format: #{audio_data[:format]}"
|
|
217
|
+
puts "Text length: #{audio_data[:text_length]} characters"
|
|
218
|
+
|
|
219
|
+
# Save to file
|
|
220
|
+
output_path = "./direct_audio/direct_tts_#{Time.now.to_i}.mp3"
|
|
221
|
+
result = adapter.synthesize_to_file(
|
|
222
|
+
"这是直接保存到文件的演示。",
|
|
223
|
+
output_path,
|
|
224
|
+
voice: "nova",
|
|
225
|
+
speed: 1.0
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
puts "Direct file synthesis successful!"
|
|
229
|
+
puts "Audio file: #{result[:file_path]}"
|
|
230
|
+
|
|
231
|
+
rescue => e
|
|
232
|
+
puts "Error in direct adapter usage: #{e.message}"
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
puts "\n=== All examples completed ==="
|
|
236
|
+
puts "\nImportant Notes:"
|
|
237
|
+
puts "1. TTS requires valid SILICONFLOW_API_KEY environment variable"
|
|
238
|
+
puts "2. Audio files are saved in various formats (mp3, wav, etc.)"
|
|
239
|
+
puts "3. Custom voice creation requires reference audio files"
|
|
240
|
+
puts "4. Multiple languages are supported (Chinese, English, Japanese, Korean)"
|
|
241
|
+
puts "5. Speed can be adjusted from 0.25x to 4.0x"
|
|
242
|
+
|
|
243
|
+
# Clean up
|
|
244
|
+
File.delete('tts_config.yml') if File.exist?('tts_config.yml')
|