smart_prompt 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -10
  3. data/README.cn.md +307 -64
  4. data/README.md +311 -64
  5. data/Rakefile +10 -1
  6. data/config/anthropic_config.yml +151 -0
  7. data/config/image_generation_config.yml +22 -0
  8. data/config/multimodal_config.yml +85 -0
  9. data/config/sensenova_config.yml +63 -0
  10. data/config/zhipu_config.yml +73 -0
  11. data/examples/anthropic_basic_chat.rb +143 -0
  12. data/examples/anthropic_example.rb +232 -0
  13. data/examples/anthropic_multimodal.rb +212 -0
  14. data/examples/anthropic_streaming.rb +312 -0
  15. data/examples/anthropic_tool_calling.rb +393 -0
  16. data/examples/automatic_cleanup_example.rb +109 -0
  17. data/examples/history_management_examples.rb +522 -0
  18. data/examples/image_generation_example.rb +130 -0
  19. data/examples/monitoring_example.rb +121 -0
  20. data/examples/multimodal_example.rb +63 -0
  21. data/examples/relevance_based_strategy_example.rb +87 -0
  22. data/examples/sensenova_example.rb +129 -0
  23. data/examples/stt_example.rb +287 -0
  24. data/examples/tts_example.rb +244 -0
  25. data/examples/video_generation_example.rb +189 -0
  26. data/examples/zhipu_example.rb +151 -0
  27. data/lib/smart_prompt/anthropic_adapter.rb +363 -281
  28. data/lib/smart_prompt/compression_engine.rb +201 -0
  29. data/lib/smart_prompt/context_strategy.rb +22 -0
  30. data/lib/smart_prompt/conversation.rb +81 -191
  31. data/lib/smart_prompt/engine.rb +36 -19
  32. data/lib/smart_prompt/history_manager.rb +596 -0
  33. data/lib/smart_prompt/hybrid_strategy.rb +222 -0
  34. data/lib/smart_prompt/image_generation_adapter.rb +297 -0
  35. data/lib/smart_prompt/lru_cache.rb +133 -0
  36. data/lib/smart_prompt/message.rb +57 -0
  37. data/lib/smart_prompt/multimodal_adapter.rb +277 -0
  38. data/lib/smart_prompt/openai_adapter.rb +1 -25
  39. data/lib/smart_prompt/persistence_layer.rb +197 -0
  40. data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
  41. data/lib/smart_prompt/sensenova_adapter.rb +410 -0
  42. data/lib/smart_prompt/session.rb +140 -0
  43. data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
  44. data/lib/smart_prompt/stt_adapter.rb +381 -0
  45. data/lib/smart_prompt/summary_based_strategy.rb +152 -0
  46. data/lib/smart_prompt/token_counter.rb +74 -0
  47. data/lib/smart_prompt/tts_adapter.rb +403 -0
  48. data/lib/smart_prompt/version.rb +1 -1
  49. data/lib/smart_prompt/video_generation_adapter.rb +330 -0
  50. data/lib/smart_prompt/worker.rb +25 -3
  51. data/lib/smart_prompt/zhipu_adapter.rb +616 -0
  52. data/lib/smart_prompt.rb +22 -2
  53. data/workers/history_management_examples.rb +407 -0
  54. data/workers/image_generation_workers.rb +119 -0
  55. data/workers/multimodal_workers.rb +110 -0
  56. data/workers/sensenova_workers.rb +62 -0
  57. data/workers/stt_workers.rb +195 -0
  58. data/workers/tts_workers.rb +388 -0
  59. data/workers/video_generation_workers.rb +264 -0
  60. data/workers/zhipu_workers.rb +113 -0
  61. metadata +84 -8
@@ -0,0 +1,195 @@
1
+ # STT Workers for SmartPrompt
2
+ # These workers demonstrate the new speech-to-text capabilities
3
+
4
+ # Basic speech-to-text worker
5
+ SmartPrompt.define_worker :stt_transcriber do
6
+ use "stt_service"
7
+ model "FunAudioLLM/CosyVoice2-0.5B"
8
+
9
+ # Prepare parameters for STT transcription
10
+ stt_params = {
11
+ audio_file: params[:audio_file],
12
+ language: params[:language],
13
+ prompt: params[:prompt],
14
+ temperature: params[:temperature] || 0.0,
15
+ response_format: params[:response_format] || "json"
16
+ }
17
+
18
+ # Call the STT adapter directly
19
+ adapter = engine.llms["stt_service"]
20
+
21
+ # Transcribe audio
22
+ transcription_data = adapter.transcribe_audio(
23
+ stt_params[:audio_file],
24
+ model: params[:model],
25
+ language: stt_params[:language],
26
+ prompt: stt_params[:prompt],
27
+ temperature: stt_params[:temperature],
28
+ response_format: stt_params[:response_format]
29
+ )
30
+
31
+ { transcription: transcription_data }
32
+ end
33
+
34
+ # URL-based speech-to-text worker
35
+ SmartPrompt.define_worker :stt_url_transcriber do
36
+ use "stt_service"
37
+ model "FunAudioLLM/CosyVoice2-0.5B"
38
+
39
+ # Prepare parameters for URL-based STT
40
+ stt_params = {
41
+ audio_url: params[:audio_url],
42
+ language: params[:language],
43
+ prompt: params[:prompt],
44
+ temperature: params[:temperature] || 0.0,
45
+ response_format: params[:response_format] || "json"
46
+ }
47
+
48
+ adapter = engine.llms["stt_service"]
49
+
50
+ # Transcribe audio from URL
51
+ transcription_data = adapter.transcribe_audio_url(
52
+ stt_params[:audio_url],
53
+ model: params[:model],
54
+ language: stt_params[:language],
55
+ prompt: stt_params[:prompt],
56
+ temperature: stt_params[:temperature],
57
+ response_format: stt_params[:response_format]
58
+ )
59
+
60
+ { transcription: transcription_data }
61
+ end
62
+
63
+ # Batch speech-to-text worker
64
+ SmartPrompt.define_worker :batch_stt do
65
+ use "stt_service"
66
+ model "FunAudioLLM/CosyVoice2-0.5B"
67
+
68
+ # Process multiple audio files
69
+ audio_files = params[:audio_files] || [params[:audio_file]]
70
+
71
+ adapter = engine.llms["stt_service"]
72
+
73
+ # Batch transcribe audio files
74
+ batch_result = adapter.transcribe_batch(
75
+ audio_files,
76
+ model: params[:model],
77
+ language: params[:language],
78
+ prompt: params[:prompt],
79
+ temperature: params[:temperature] || 0.0
80
+ )
81
+
82
+ { batch_result: batch_result }
83
+ end
84
+
85
+ # Audio file information worker
86
+ SmartPrompt.define_worker :audio_info do
87
+ use "stt_service"
88
+
89
+ adapter = engine.llms["stt_service"]
90
+
91
+ # Get audio file information
92
+ audio_info = adapter.get_audio_info(params[:audio_file])
93
+
94
+ { audio_info: audio_info }
95
+ end
96
+
97
+ # Language detection worker
98
+ SmartPrompt.define_worker :language_detector do
99
+ use "stt_service"
100
+
101
+ adapter = engine.llms["stt_service"]
102
+
103
+ if params[:audio_file]
104
+ # Transcribe and detect language
105
+ transcription_data = adapter.transcribe_audio(
106
+ params[:audio_file],
107
+ model: params[:model],
108
+ language: params[:language],
109
+ temperature: params[:temperature] || 0.0
110
+ )
111
+
112
+ # Detect language from transcribed text
113
+ detected_language = adapter.detect_language(transcription_data[:text])
114
+
115
+ {
116
+ transcription: transcription_data,
117
+ detected_language: detected_language
118
+ }
119
+ elsif params[:text]
120
+ # Detect language from text directly
121
+ detected_language = adapter.detect_language(params[:text])
122
+
123
+ {
124
+ text: params[:text],
125
+ detected_language: detected_language
126
+ }
127
+ else
128
+ { error: "Either audio_file or text parameter is required" }
129
+ end
130
+ end
131
+
132
+ # Multi-language STT worker
133
+ SmartPrompt.define_worker :multilingual_stt do
134
+ use "stt_service"
135
+ model "FunAudioLLM/CosyVoice2-0.5B"
136
+
137
+ adapter = engine.llms["stt_service"]
138
+
139
+ # First transcribe without language specification
140
+ transcription_data = adapter.transcribe_audio(
141
+ params[:audio_file],
142
+ model: params[:model],
143
+ temperature: params[:temperature] || 0.0
144
+ )
145
+
146
+ # Detect language from transcribed text
147
+ detected_language = adapter.detect_language(transcription_data[:text])
148
+
149
+ # Re-transcribe with detected language for better accuracy
150
+ if detected_language && detected_language != "en"
151
+ improved_transcription = adapter.transcribe_audio(
152
+ params[:audio_file],
153
+ model: params[:model],
154
+ language: detected_language,
155
+ temperature: params[:temperature] || 0.0
156
+ )
157
+
158
+ {
159
+ initial_transcription: transcription_data,
160
+ improved_transcription: improved_transcription,
161
+ detected_language: detected_language
162
+ }
163
+ else
164
+ {
165
+ transcription: transcription_data,
166
+ detected_language: detected_language
167
+ }
168
+ end
169
+ end
170
+
171
+ # Format conversion worker
172
+ SmartPrompt.define_worker :stt_format_converter do
173
+ use "stt_service"
174
+ model "FunAudioLLM/CosyVoice2-0.5B"
175
+
176
+ adapter = engine.llms["stt_service"]
177
+
178
+ # Generate transcriptions in different formats
179
+ formats = params[:formats] || %w[json text srt vtt]
180
+ results = {}
181
+
182
+ formats.each do |format|
183
+ transcription_data = adapter.transcribe_audio(
184
+ params[:audio_file],
185
+ model: params[:model],
186
+ language: params[:language],
187
+ temperature: params[:temperature] || 0.0,
188
+ response_format: format
189
+ )
190
+
191
+ results[format] = transcription_data
192
+ end
193
+
194
+ { format_results: results }
195
+ end
@@ -0,0 +1,388 @@
1
+ # TTS Workers for SmartPrompt
2
+ # These workers demonstrate the new text-to-speech capabilities
3
+
4
+ # Basic text-to-speech worker
5
+ SmartPrompt.define_worker :tts_synthesizer do
6
+ use "tts_service"
7
+ model "FunAudioLLM/CosyVoice2-0.5B"
8
+
9
+ # Prepare parameters for TTS synthesis
10
+ tts_params = {
11
+ text: params[:text],
12
+ voice: params[:voice] || "alloy",
13
+ speed: params[:speed] || 1.0,
14
+ response_format: params[:response_format] || "mp3",
15
+ language: params[:language]
16
+ }
17
+
18
+ # Call the TTS adapter directly
19
+ adapter = engine.llms["tts_service"]
20
+
21
+ if params[:save_to_file]
22
+ # Synthesize and save to file
23
+ output_dir = params[:output_dir] || "./generated_audio"
24
+ filename_prefix = params[:filename_prefix] || "tts_audio"
25
+ output_path = File.join(output_dir, "#{filename_prefix}_#{Time.now.to_i}.#{tts_params[:response_format]}")
26
+
27
+ result = adapter.synthesize_to_file(
28
+ tts_params[:text],
29
+ output_path,
30
+ voice: tts_params[:voice],
31
+ model: params[:model],
32
+ speed: tts_params[:speed],
33
+ response_format: tts_params[:response_format],
34
+ language: tts_params[:language]
35
+ )
36
+
37
+ { audio_file: result }
38
+ else
39
+ # Synthesize and return audio data
40
+ audio_data = adapter.synthesize_speech(
41
+ tts_params[:text],
42
+ voice: tts_params[:voice],
43
+ model: params[:model],
44
+ speed: tts_params[:speed],
45
+ response_format: tts_params[:response_format],
46
+ language: tts_params[:language]
47
+ )
48
+
49
+ { audio_data: audio_data }
50
+ end
51
+ end
52
+
53
+ # Multi-language TTS worker
54
+ SmartPrompt.define_worker :multilingual_tts do
55
+ use "tts_service"
56
+ model "FunAudioLLM/CosyVoice2-0.5B"
57
+
58
+ # Prepare parameters for multilingual TTS
59
+ tts_params = {
60
+ text: params[:text],
61
+ voice: params[:voice] || "alloy",
62
+ speed: params[:speed] || 1.0,
63
+ response_format: params[:response_format] || "mp3",
64
+ language: params[:language]
65
+ }
66
+
67
+ # Auto-detect language if not specified
68
+ unless tts_params[:language]
69
+ # Simple language detection based on text content
70
+ if params[:text] =~ /[\u4e00-\u9fff]/
71
+ tts_params[:language] = "zh"
72
+ elsif params[:text] =~ /[\u3040-\u309f\u30a0-\u30ff]/
73
+ tts_params[:language] = "ja"
74
+ elsif params[:text] =~ /[\uac00-\ud7af]/
75
+ tts_params[:language] = "ko"
76
+ else
77
+ tts_params[:language] = "en"
78
+ end
79
+ end
80
+
81
+ adapter = engine.llms["tts_service"]
82
+
83
+ if params[:save_to_file]
84
+ output_dir = params[:output_dir] || "./multilingual_audio"
85
+ filename_prefix = params[:filename_prefix] || "multilingual_tts"
86
+ output_path = File.join(output_dir, "#{filename_prefix}_#{tts_params[:language]}_#{Time.now.to_i}.#{tts_params[:response_format]}")
87
+
88
+ result = adapter.synthesize_to_file(
89
+ tts_params[:text],
90
+ output_path,
91
+ voice: tts_params[:voice],
92
+ model: params[:model],
93
+ speed: tts_params[:speed],
94
+ response_format: tts_params[:response_format],
95
+ language: tts_params[:language]
96
+ )
97
+
98
+ {
99
+ audio_file: result,
100
+ detected_language: tts_params[:language]
101
+ }
102
+ else
103
+ audio_data = adapter.synthesize_speech(
104
+ tts_params[:text],
105
+ voice: tts_params[:voice],
106
+ model: params[:model],
107
+ speed: tts_params[:speed],
108
+ response_format: tts_params[:response_format],
109
+ language: tts_params[:language]
110
+ )
111
+
112
+ {
113
+ audio_data: audio_data,
114
+ detected_language: tts_params[:language]
115
+ }
116
+ end
117
+ end
118
+
119
+ # Voice selection worker
120
+ SmartPrompt.define_worker :voice_selector do
121
+ use "tts_service"
122
+
123
+ adapter = engine.llms["tts_service"]
124
+
125
+ # Get available voices
126
+ available_voices = adapter.available_voices
127
+
128
+ # If voice parameter is provided, use it for synthesis
129
+ if params[:text]
130
+ tts_params = {
131
+ text: params[:text],
132
+ voice: params[:voice] || "alloy",
133
+ speed: params[:speed] || 1.0,
134
+ response_format: params[:response_format] || "mp3"
135
+ }
136
+
137
+ if params[:save_to_file]
138
+ output_dir = params[:output_dir] || "./voice_samples"
139
+ filename_prefix = params[:filename_prefix] || "voice_sample"
140
+ output_path = File.join(output_dir, "#{filename_prefix}_#{tts_params[:voice]}_#{Time.now.to_i}.#{tts_params[:response_format]}")
141
+
142
+ result = adapter.synthesize_to_file(
143
+ tts_params[:text],
144
+ output_path,
145
+ voice: tts_params[:voice],
146
+ model: params[:model],
147
+ speed: tts_params[:speed],
148
+ response_format: tts_params[:response_format]
149
+ )
150
+
151
+ {
152
+ available_voices: available_voices,
153
+ selected_voice: tts_params[:voice],
154
+ audio_file: result
155
+ }
156
+ else
157
+ audio_data = adapter.synthesize_speech(
158
+ tts_params[:text],
159
+ voice: tts_params[:voice],
160
+ model: params[:model],
161
+ speed: tts_params[:speed],
162
+ response_format: tts_params[:response_format]
163
+ )
164
+
165
+ {
166
+ available_voices: available_voices,
167
+ selected_voice: tts_params[:voice],
168
+ audio_data: audio_data
169
+ }
170
+ end
171
+ else
172
+ # Just return available voices
173
+ { available_voices: available_voices }
174
+ end
175
+ end
176
+
177
+ # Speed variation worker
178
+ SmartPrompt.define_worker :speed_variation_tts do
179
+ use "tts_service"
180
+ model "FunAudioLLM/CosyVoice2-0.5B"
181
+
182
+ results = []
183
+
184
+ # Generate audio at different speeds
185
+ speeds = params[:speeds] || [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]
186
+
187
+ speeds.each do |speed|
188
+ tts_params = {
189
+ text: params[:text],
190
+ voice: params[:voice] || "alloy",
191
+ speed: speed,
192
+ response_format: params[:response_format] || "mp3"
193
+ }
194
+
195
+ adapter = engine.llms["tts_service"]
196
+
197
+ if params[:save_to_file]
198
+ output_dir = params[:output_dir] || "./speed_variations"
199
+ filename_prefix = params[:filename_prefix] || "speed_#{speed.to_s.gsub('.', '_')}"
200
+ output_path = File.join(output_dir, "#{filename_prefix}_#{Time.now.to_i}.#{tts_params[:response_format]}")
201
+
202
+ result = adapter.synthesize_to_file(
203
+ tts_params[:text],
204
+ output_path,
205
+ voice: tts_params[:voice],
206
+ model: params[:model],
207
+ speed: tts_params[:speed],
208
+ response_format: tts_params[:response_format]
209
+ )
210
+
211
+ results << {
212
+ speed: speed,
213
+ audio_file: result
214
+ }
215
+ else
216
+ audio_data = adapter.synthesize_speech(
217
+ tts_params[:text],
218
+ voice: tts_params[:voice],
219
+ model: params[:model],
220
+ speed: tts_params[:speed],
221
+ response_format: tts_params[:response_format]
222
+ )
223
+
224
+ results << {
225
+ speed: speed,
226
+ audio_data: audio_data
227
+ }
228
+ end
229
+ end
230
+
231
+ { speed_variations: results }
232
+ end
233
+
234
+ # Custom voice management worker
235
+ SmartPrompt.define_worker :custom_voice_manager do
236
+ use "tts_service"
237
+
238
+ adapter = engine.llms["tts_service"]
239
+
240
+ case params[:action]
241
+ when "list"
242
+ # List custom voices
243
+ custom_voices = adapter.list_custom_voices
244
+ { action: "list", custom_voices: custom_voices }
245
+
246
+ when "create"
247
+ # Create custom voice
248
+ if params[:reference_audio_file]
249
+ voice_data = adapter.create_custom_voice(
250
+ params[:name],
251
+ params[:reference_audio_file],
252
+ description: params[:description]
253
+ )
254
+ { action: "create", voice_data: voice_data }
255
+ else
256
+ { error: "reference_audio_file is required for creating custom voice" }
257
+ end
258
+
259
+ when "delete"
260
+ # Delete custom voice
261
+ if params[:voice_id]
262
+ result = adapter.delete_custom_voice(params[:voice_id])
263
+ { action: "delete", result: result }
264
+ else
265
+ { error: "voice_id is required for deleting custom voice" }
266
+ end
267
+
268
+ when "synthesize"
269
+ # Synthesize using custom voice
270
+ if params[:voice_id] && params[:text]
271
+ tts_params = {
272
+ text: params[:text],
273
+ voice: params[:voice_id], # Use voice_id as custom voice
274
+ speed: params[:speed] || 1.0,
275
+ response_format: params[:response_format] || "mp3"
276
+ }
277
+
278
+ if params[:save_to_file]
279
+ output_dir = params[:output_dir] || "./custom_voice_audio"
280
+ filename_prefix = params[:filename_prefix] || "custom_voice"
281
+ output_path = File.join(output_dir, "#{filename_prefix}_#{params[:voice_id]}_#{Time.now.to_i}.#{tts_params[:response_format]}")
282
+
283
+ result = adapter.synthesize_to_file(
284
+ tts_params[:text],
285
+ output_path,
286
+ voice: tts_params[:voice],
287
+ model: params[:model],
288
+ speed: tts_params[:speed],
289
+ response_format: tts_params[:response_format]
290
+ )
291
+
292
+ {
293
+ action: "synthesize",
294
+ voice_id: params[:voice_id],
295
+ audio_file: result
296
+ }
297
+ else
298
+ audio_data = adapter.synthesize_speech(
299
+ tts_params[:text],
300
+ voice: tts_params[:voice],
301
+ model: params[:model],
302
+ speed: tts_params[:speed],
303
+ response_format: tts_params[:response_format]
304
+ )
305
+
306
+ {
307
+ action: "synthesize",
308
+ voice_id: params[:voice_id],
309
+ audio_data: audio_data
310
+ }
311
+ end
312
+ else
313
+ { error: "voice_id and text are required for synthesis" }
314
+ end
315
+
316
+ else
317
+ # Default action: list voices
318
+ predefined_voices = adapter.available_voices
319
+ custom_voices = adapter.list_custom_voices
320
+ {
321
+ action: "default",
322
+ predefined_voices: predefined_voices,
323
+ custom_voices: custom_voices
324
+ }
325
+ end
326
+ end
327
+
328
+ # Batch TTS worker for multiple texts
329
+ SmartPrompt.define_worker :batch_tts do
330
+ use "tts_service"
331
+ model "FunAudioLLM/CosyVoice2-0.5B"
332
+
333
+ results = []
334
+
335
+ # Process multiple texts
336
+ texts = params[:texts] || [params[:text]]
337
+
338
+ texts.each_with_index do |text, index|
339
+ tts_params = {
340
+ text: text,
341
+ voice: params[:voice] || "alloy",
342
+ speed: params[:speed] || 1.0,
343
+ response_format: params[:response_format] || "mp3",
344
+ language: params[:language]
345
+ }
346
+
347
+ adapter = engine.llms["tts_service"]
348
+
349
+ if params[:save_to_file]
350
+ output_dir = params[:output_dir] || "./batch_audio"
351
+ filename_prefix = params[:filename_prefix] || "batch_tts_#{index}"
352
+ output_path = File.join(output_dir, "#{filename_prefix}_#{Time.now.to_i}.#{tts_params[:response_format]}")
353
+
354
+ result = adapter.synthesize_to_file(
355
+ tts_params[:text],
356
+ output_path,
357
+ voice: tts_params[:voice],
358
+ model: params[:model],
359
+ speed: tts_params[:speed],
360
+ response_format: tts_params[:response_format],
361
+ language: tts_params[:language]
362
+ )
363
+
364
+ results << {
365
+ text: text,
366
+ index: index,
367
+ audio_file: result
368
+ }
369
+ else
370
+ audio_data = adapter.synthesize_speech(
371
+ tts_params[:text],
372
+ voice: tts_params[:voice],
373
+ model: params[:model],
374
+ speed: tts_params[:speed],
375
+ response_format: tts_params[:response_format],
376
+ language: tts_params[:language]
377
+ )
378
+
379
+ results << {
380
+ text: text,
381
+ index: index,
382
+ audio_data: audio_data
383
+ }
384
+ end
385
+ end
386
+
387
+ { batch_results: results }
388
+ end