smart_prompt 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -10
- data/README.cn.md +307 -64
- data/README.md +311 -64
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +363 -281
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +81 -191
- data/lib/smart_prompt/engine.rb +36 -19
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/openai_adapter.rb +1 -25
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +25 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +22 -2
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +84 -8
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# Example demonstrating the monitoring and logging features of HistoryManager
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
require 'logger'
|
|
6
|
+
|
|
7
|
+
# Set up logger with INFO level
|
|
8
|
+
SmartPrompt.logger = Logger.new($stdout)
|
|
9
|
+
SmartPrompt.logger.level = Logger::INFO
|
|
10
|
+
SmartPrompt.logger.formatter = proc do |severity, datetime, progname, msg|
|
|
11
|
+
"[#{datetime.strftime('%H:%M:%S')}] #{severity}: #{msg}\n"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
puts "=" * 80
|
|
15
|
+
puts "History Manager Monitoring Example"
|
|
16
|
+
puts "=" * 80
|
|
17
|
+
puts
|
|
18
|
+
|
|
19
|
+
# Create a HistoryManager with monitoring enabled
|
|
20
|
+
config = {
|
|
21
|
+
cache_size: 5,
|
|
22
|
+
session_defaults: {
|
|
23
|
+
max_messages: 10,
|
|
24
|
+
max_tokens: 1000
|
|
25
|
+
},
|
|
26
|
+
persistence: {
|
|
27
|
+
enabled: true,
|
|
28
|
+
storage_path: "./history_data_example",
|
|
29
|
+
async: false
|
|
30
|
+
},
|
|
31
|
+
monitoring: {
|
|
32
|
+
enabled: true,
|
|
33
|
+
log_level: :info
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
manager = SmartPrompt::HistoryManager.new(config)
|
|
38
|
+
|
|
39
|
+
puts "\n--- Creating Sessions and Adding Messages ---\n"
|
|
40
|
+
|
|
41
|
+
# Create first session
|
|
42
|
+
manager.add_message("user_123", { role: "system", content: "You are a helpful assistant." })
|
|
43
|
+
manager.add_message("user_123", { role: "user", content: "What is machine learning?" })
|
|
44
|
+
manager.add_message("user_123", { role: "assistant", content: "Machine learning is a subset of AI..." })
|
|
45
|
+
|
|
46
|
+
# Create second session
|
|
47
|
+
manager.add_message("user_456", { role: "user", content: "Hello!" })
|
|
48
|
+
manager.add_message("user_456", { role: "assistant", content: "Hi! How can I help you?" })
|
|
49
|
+
|
|
50
|
+
puts "\n--- Getting Session Statistics ---\n"
|
|
51
|
+
|
|
52
|
+
# Get statistics for a specific session
|
|
53
|
+
session_stats = manager.get_stats("user_123")
|
|
54
|
+
puts "Session user_123 statistics:"
|
|
55
|
+
puts " Messages: #{session_stats[:message_count]}"
|
|
56
|
+
puts " Tokens: #{session_stats[:total_tokens]}"
|
|
57
|
+
puts " Created: #{session_stats[:created_at]}"
|
|
58
|
+
puts
|
|
59
|
+
|
|
60
|
+
# Get system-wide statistics
|
|
61
|
+
system_stats = manager.get_stats
|
|
62
|
+
puts "System-wide statistics:"
|
|
63
|
+
puts " Active sessions: #{system_stats[:active_sessions]}"
|
|
64
|
+
puts " Total messages: #{system_stats[:total_messages]}"
|
|
65
|
+
puts " Total tokens: #{system_stats[:total_tokens]}"
|
|
66
|
+
puts " Messages per session (avg): #{system_stats[:messages_per_session_avg].round(2)}"
|
|
67
|
+
puts " Tokens per message (avg): #{system_stats[:tokens_per_message_avg].round(2)}"
|
|
68
|
+
puts " Cache hits: #{system_stats[:cache_hits]}"
|
|
69
|
+
puts " Cache misses: #{system_stats[:cache_misses]}"
|
|
70
|
+
puts " Cache hit rate: #{(system_stats[:cache_hit_rate] * 100).round(2)}%"
|
|
71
|
+
puts
|
|
72
|
+
|
|
73
|
+
puts "\n--- Exporting Metrics ---\n"
|
|
74
|
+
|
|
75
|
+
# Export metrics in different formats
|
|
76
|
+
puts "Prometheus format (first 10 lines):"
|
|
77
|
+
prometheus_metrics = manager.export_metrics(format: :prometheus)
|
|
78
|
+
puts prometheus_metrics.lines.first(10).join
|
|
79
|
+
|
|
80
|
+
puts "\nJSON format:"
|
|
81
|
+
json_metrics = manager.export_metrics(format: :json)
|
|
82
|
+
require 'json'
|
|
83
|
+
metrics_hash = JSON.parse(json_metrics)
|
|
84
|
+
puts JSON.pretty_generate(metrics_hash.slice(
|
|
85
|
+
'active_sessions',
|
|
86
|
+
'total_messages',
|
|
87
|
+
'cache_hit_rate',
|
|
88
|
+
'messages_per_session_avg'
|
|
89
|
+
))
|
|
90
|
+
|
|
91
|
+
puts "\n--- Retrieving Context ---\n"
|
|
92
|
+
|
|
93
|
+
# Retrieve context with token limit
|
|
94
|
+
context = manager.get_context("user_123", 500)
|
|
95
|
+
puts "Retrieved #{context.count} messages from user_123 (within 500 token limit)"
|
|
96
|
+
|
|
97
|
+
puts "\n--- Searching Messages ---\n"
|
|
98
|
+
|
|
99
|
+
# Search for messages
|
|
100
|
+
results = manager.search_messages("user_123", "machine learning")
|
|
101
|
+
puts "Found #{results.count} messages containing 'machine learning'"
|
|
102
|
+
|
|
103
|
+
puts "\n--- Clearing Session ---\n"
|
|
104
|
+
|
|
105
|
+
# Clear a session (keeping system messages)
|
|
106
|
+
manager.clear_session("user_456", keep_system_messages: true)
|
|
107
|
+
|
|
108
|
+
puts "\n--- Final Statistics ---\n"
|
|
109
|
+
|
|
110
|
+
final_stats = manager.get_stats
|
|
111
|
+
puts "Final system statistics:"
|
|
112
|
+
puts " Active sessions: #{final_stats[:active_sessions]}"
|
|
113
|
+
puts " Total messages: #{final_stats[:total_messages]}"
|
|
114
|
+
puts " Sessions created: #{final_stats[:sessions_created]}"
|
|
115
|
+
puts " Sessions deleted: #{final_stats[:sessions_deleted]}"
|
|
116
|
+
puts " Messages added: #{final_stats[:messages_added]}"
|
|
117
|
+
puts " Context retrievals: #{final_stats[:context_retrievals]}"
|
|
118
|
+
|
|
119
|
+
# Cleanup
|
|
120
|
+
manager.shutdown
|
|
121
|
+
puts "\n--- Manager Shutdown Complete ---\n"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Multimodal Example for SmartPrompt
|
|
2
|
+
# This example demonstrates how to use the new MultimodalAdapter
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
|
|
6
|
+
# Configuration for multimodal capabilities
|
|
7
|
+
config = {
|
|
8
|
+
"adapters" => {
|
|
9
|
+
"multimodal" => "MultimodalAdapter"
|
|
10
|
+
},
|
|
11
|
+
"llms" => {
|
|
12
|
+
"qwen_vl" => {
|
|
13
|
+
"adapter" => "multimodal",
|
|
14
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
15
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
16
|
+
"default_model" => "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"default_llm" => "qwen_vl",
|
|
20
|
+
"template_path" => "./templates",
|
|
21
|
+
"worker_path" => "./workers",
|
|
22
|
+
"logger_file" => "./logs/smart_prompt.log"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Write config to file
|
|
26
|
+
File.write('multimodal_config.yml', config.to_yaml)
|
|
27
|
+
|
|
28
|
+
# Initialize engine
|
|
29
|
+
engine = SmartPrompt::Engine.new('multimodal_config.yml')
|
|
30
|
+
|
|
31
|
+
# Example 1: Simple image analysis
|
|
32
|
+
puts "=== Example 1: Image Analysis ==="
|
|
33
|
+
result = engine.call_worker(:image_analyzer, {
|
|
34
|
+
image_url: "https://example.com/image.jpg",
|
|
35
|
+
question: "描述这张图片中的内容"
|
|
36
|
+
})
|
|
37
|
+
puts "Image Analysis Result: #{result}"
|
|
38
|
+
|
|
39
|
+
# Example 2: Video analysis
|
|
40
|
+
puts "\n=== Example 2: Video Analysis ==="
|
|
41
|
+
result = engine.call_worker(:video_analyzer, {
|
|
42
|
+
video_url: "https://example.com/video.mp4",
|
|
43
|
+
question: "这个视频的主要内容是什么?",
|
|
44
|
+
max_frames: 15,
|
|
45
|
+
fps: 2
|
|
46
|
+
})
|
|
47
|
+
puts "Video Analysis Result: #{result}"
|
|
48
|
+
|
|
49
|
+
# Example 3: Multiple images comparison
|
|
50
|
+
puts "\n=== Example 3: Multiple Images Comparison ==="
|
|
51
|
+
result = engine.call_worker(:multi_image_analyzer, {
|
|
52
|
+
image_urls: [
|
|
53
|
+
"https://example.com/image1.jpg",
|
|
54
|
+
"https://example.com/image2.jpg"
|
|
55
|
+
],
|
|
56
|
+
question: "比较这两张图片的相似之处和不同之处"
|
|
57
|
+
})
|
|
58
|
+
puts "Multi-Image Analysis Result: #{result}"
|
|
59
|
+
|
|
60
|
+
puts "\n=== All examples completed successfully ==="
|
|
61
|
+
|
|
62
|
+
# Clean up
|
|
63
|
+
File.delete('multimodal_config.yml') if File.exist?('multimodal_config.yml')
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# Example demonstrating the RelevanceBasedStrategy
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
|
|
6
|
+
# Create a session with some conversation history
|
|
7
|
+
session = SmartPrompt::Session.new("demo_session", {})
|
|
8
|
+
|
|
9
|
+
# Add a diverse set of messages
|
|
10
|
+
session.add_message(role: "system", content: "You are a helpful AI assistant")
|
|
11
|
+
session.add_message(role: "user", content: "Tell me about machine learning")
|
|
12
|
+
session.add_message(role: "assistant", content: "Machine learning is a subset of artificial intelligence that enables systems to learn from data")
|
|
13
|
+
session.add_message(role: "user", content: "What are your favorite animals?")
|
|
14
|
+
session.add_message(role: "assistant", content: "I don't have personal preferences, but many people love cats and dogs")
|
|
15
|
+
session.add_message(role: "user", content: "How does deep learning work?")
|
|
16
|
+
session.add_message(role: "assistant", content: "Deep learning uses neural networks with multiple layers to learn hierarchical representations")
|
|
17
|
+
session.add_message(role: "user", content: "Tell me a joke")
|
|
18
|
+
session.add_message(role: "assistant", content: "Why did the programmer quit? Because they didn't get arrays!")
|
|
19
|
+
session.add_message(role: "user", content: "What is supervised learning?")
|
|
20
|
+
session.add_message(role: "assistant", content: "Supervised learning is when you train a model with labeled data")
|
|
21
|
+
|
|
22
|
+
puts "=" * 80
|
|
23
|
+
puts "RelevanceBasedStrategy Example"
|
|
24
|
+
puts "=" * 80
|
|
25
|
+
puts
|
|
26
|
+
|
|
27
|
+
# Create the strategy
|
|
28
|
+
strategy = SmartPrompt::RelevanceBasedStrategy.new(
|
|
29
|
+
top_k: 5,
|
|
30
|
+
recency_weight: 0.3,
|
|
31
|
+
relevance_weight: 0.7
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Current message is about neural networks
|
|
35
|
+
current_message = SmartPrompt::Message.new(
|
|
36
|
+
role: "user",
|
|
37
|
+
content: "Can you explain more about neural networks and how they relate to machine learning?"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
puts "Current message: #{current_message.content}"
|
|
41
|
+
puts
|
|
42
|
+
puts "Total messages in session: #{session.message_count}"
|
|
43
|
+
puts
|
|
44
|
+
|
|
45
|
+
# Select relevant messages
|
|
46
|
+
messages = session.get_messages
|
|
47
|
+
selected = strategy.select_messages(messages, nil, current_message)
|
|
48
|
+
|
|
49
|
+
puts "Selected #{selected.length} most relevant messages:"
|
|
50
|
+
puts "-" * 80
|
|
51
|
+
selected.each_with_index do |msg, idx|
|
|
52
|
+
puts "#{idx + 1}. [#{msg.role}] #{msg.content}"
|
|
53
|
+
end
|
|
54
|
+
puts
|
|
55
|
+
|
|
56
|
+
# Demonstrate with token limit
|
|
57
|
+
puts "=" * 80
|
|
58
|
+
puts "With Token Limit (100 tokens)"
|
|
59
|
+
puts "=" * 80
|
|
60
|
+
selected_limited = strategy.select_messages(messages, 100, current_message)
|
|
61
|
+
total_tokens = selected_limited.sum { |m| m.token_count || 0 }
|
|
62
|
+
|
|
63
|
+
puts "Selected #{selected_limited.length} messages (#{total_tokens} tokens):"
|
|
64
|
+
puts "-" * 80
|
|
65
|
+
selected_limited.each_with_index do |msg, idx|
|
|
66
|
+
tokens = msg.token_count || 0
|
|
67
|
+
puts "#{idx + 1}. [#{msg.role}] (#{tokens} tokens) #{msg.content[0..60]}..."
|
|
68
|
+
end
|
|
69
|
+
puts
|
|
70
|
+
|
|
71
|
+
# Show compression recommendation
|
|
72
|
+
puts "=" * 80
|
|
73
|
+
puts "Compression Recommendation"
|
|
74
|
+
puts "=" * 80
|
|
75
|
+
should_compress = strategy.should_compress?(session)
|
|
76
|
+
puts "Should compress? #{should_compress}"
|
|
77
|
+
puts "Reason: Session has #{session.message_count} messages, threshold is #{5 * 3} messages"
|
|
78
|
+
puts
|
|
79
|
+
|
|
80
|
+
puts "=" * 80
|
|
81
|
+
puts "Strategy Configuration"
|
|
82
|
+
puts "=" * 80
|
|
83
|
+
puts "Top-k: 5"
|
|
84
|
+
puts "Recency weight: 0.3"
|
|
85
|
+
puts "Relevance weight: 0.7"
|
|
86
|
+
puts "Embedding service: Not configured (using keyword similarity)"
|
|
87
|
+
puts "=" * 80
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# SenseNova (商汤 日日新) Example for SmartPrompt
|
|
2
|
+
#
|
|
3
|
+
# Demonstrates all four SenseNova model categories through one SenseNovaAdapter:
|
|
4
|
+
# 1. 商量 文本对话 (chat) — sync + streaming
|
|
5
|
+
# 2. 商量 图文多模态 (vision)
|
|
6
|
+
# 3. Cupido 向量模型 (embeddings)
|
|
7
|
+
# 4. 秒画 文生图 (text-to-image)
|
|
8
|
+
#
|
|
9
|
+
# Requires a valid SenseNova API key in the SENSENOVA_API_KEY environment variable
|
|
10
|
+
# (get one at https://platform.sensenova.cn/console) and the relevant models enabled.
|
|
11
|
+
|
|
12
|
+
require_relative "../lib/smart_prompt"
|
|
13
|
+
|
|
14
|
+
api_key = ENV["SENSENOVA_API_KEY"]
|
|
15
|
+
|
|
16
|
+
config = {
|
|
17
|
+
"adapters" => {
|
|
18
|
+
"sensenova" => "SenseNovaAdapter",
|
|
19
|
+
},
|
|
20
|
+
"llms" => {
|
|
21
|
+
"sensechat" => {
|
|
22
|
+
"adapter" => "sensenova",
|
|
23
|
+
"url" => "https://token.sensenova.cn/v1",
|
|
24
|
+
"api_key" => api_key,
|
|
25
|
+
"model" => "sensenova-6.7-flash-lite",
|
|
26
|
+
"temperature" => 0.7,
|
|
27
|
+
},
|
|
28
|
+
"sensevision" => {
|
|
29
|
+
"adapter" => "sensenova",
|
|
30
|
+
"url" => "https://token.sensenova.cn/v1",
|
|
31
|
+
"api_key" => api_key,
|
|
32
|
+
"model" => "sensenova-6.7-flash-lite",
|
|
33
|
+
},
|
|
34
|
+
"senseembedding" => {
|
|
35
|
+
"adapter" => "sensenova",
|
|
36
|
+
"url" => "https://api.sensenova.cn/compatible-mode/v2",
|
|
37
|
+
"embeddings_url" => "https://api.sensenova.cn/v1/llm/embeddings",
|
|
38
|
+
"api_key" => api_key,
|
|
39
|
+
"model" => "Cupido",
|
|
40
|
+
},
|
|
41
|
+
"senseimage" => {
|
|
42
|
+
"adapter" => "sensenova",
|
|
43
|
+
"url" => "https://token.sensenova.cn/v1",
|
|
44
|
+
"image_url" => "https://token.sensenova.cn/v1/images/generations",
|
|
45
|
+
"api_key" => api_key,
|
|
46
|
+
"model" => "sensenova-u1-fast",
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
"default_llm" => "sensechat",
|
|
50
|
+
"template_path" => "./templates",
|
|
51
|
+
"worker_path" => "./workers",
|
|
52
|
+
"logger_file" => "./logs/smart_prompt.log",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
File.write("sensenova_config.yml", config.to_yaml)
|
|
56
|
+
engine = SmartPrompt::Engine.new("sensenova_config.yml")
|
|
57
|
+
|
|
58
|
+
puts "=== SmartPrompt SenseNova Demo ==="
|
|
59
|
+
unless api_key
|
|
60
|
+
puts "Note: SENSENOVA_API_KEY is not set — the API calls below will fail at the network layer."
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# 1. Chat (sync)
|
|
64
|
+
puts "\n=== Example 1: 商量 文本对话 (sync) ==="
|
|
65
|
+
begin
|
|
66
|
+
result = engine.call_worker(:sensenova_chat, { prompt: "用一句话介绍商汤日日新大模型。" })
|
|
67
|
+
puts "Reply: #{result}"
|
|
68
|
+
rescue => e
|
|
69
|
+
puts "Error: #{e.message}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# 2. Chat (streaming) — tokens are printed as they arrive.
|
|
73
|
+
puts "\n=== Example 2: 商量 文本对话 (streaming) ==="
|
|
74
|
+
begin
|
|
75
|
+
engine.call_worker_by_stream(:sensenova_chat, { prompt: "写两句关于春天的诗。" }) do |chunk, _|
|
|
76
|
+
if (delta = chunk.dig("choices", 0, "delta", "content"))
|
|
77
|
+
print delta
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
puts
|
|
81
|
+
rescue => e
|
|
82
|
+
puts "Error: #{e.message}"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# 3. Multimodal vision
|
|
86
|
+
puts "\n=== Example 3: 商量 图文多模态 ==="
|
|
87
|
+
begin
|
|
88
|
+
result = engine.call_worker(:sensenova_vision, {
|
|
89
|
+
image_url: "https://img0.baidu.com/it/u=3775751201,1094020238&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=615",
|
|
90
|
+
question: "图片里有什么?",
|
|
91
|
+
})
|
|
92
|
+
puts "Vision result: #{result}"
|
|
93
|
+
rescue => e
|
|
94
|
+
puts "Error: #{e.message}"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# 4. Embeddings (Cupido)
|
|
98
|
+
puts "\n=== Example 4: Cupido 向量模型 ==="
|
|
99
|
+
begin
|
|
100
|
+
vector = engine.call_worker(:sensenova_embed, { text: "商汤日日新大模型", length: 1024 })
|
|
101
|
+
puts "Embedding dim: #{vector.is_a?(Array) ? vector.size : vector} (first 5: #{vector.first(5) rescue vector})"
|
|
102
|
+
rescue => e
|
|
103
|
+
puts "Error: #{e.message}"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# 5. Text-to-image (秒画)
|
|
107
|
+
puts "\n=== Example 5: 秒画 文生图 ==="
|
|
108
|
+
begin
|
|
109
|
+
result = engine.call_worker(:sensenova_image, {
|
|
110
|
+
prompt: "一只在书房里读书的可爱机器人,温暖的光线,数字插画",
|
|
111
|
+
size: "2048x2048",
|
|
112
|
+
save_to_file: true,
|
|
113
|
+
output_dir: "./generated_images",
|
|
114
|
+
filename_prefix: "sensenova_robot",
|
|
115
|
+
})
|
|
116
|
+
if result.is_a?(Hash) && result[:images]
|
|
117
|
+
puts "Generated #{result[:images].size} image(s)"
|
|
118
|
+
puts "First image URL: #{result[:images].first[:url]}"
|
|
119
|
+
puts "Saved files: #{result[:saved_files]}"
|
|
120
|
+
else
|
|
121
|
+
puts "Result: #{result}"
|
|
122
|
+
end
|
|
123
|
+
rescue => e
|
|
124
|
+
puts "Error (image endpoint may need a live key to confirm): #{e.message}"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
puts "\n=== All examples completed ==="
|
|
128
|
+
|
|
129
|
+
File.delete("sensenova_config.yml") if File.exist?("sensenova_config.yml")
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
# STT Example for SmartPrompt
|
|
2
|
+
# This example demonstrates how to use the new STTAdapter
|
|
3
|
+
|
|
4
|
+
require_relative '../lib/smart_prompt'
|
|
5
|
+
|
|
6
|
+
# Configuration for STT capabilities
|
|
7
|
+
config = {
|
|
8
|
+
"adapters" => {
|
|
9
|
+
"multimodal" => "MultimodalAdapter",
|
|
10
|
+
"image_generation" => "ImageGenerationAdapter",
|
|
11
|
+
"video_generation" => "VideoGenerationAdapter",
|
|
12
|
+
"tts" => "TTSAdapter",
|
|
13
|
+
"stt" => "STTAdapter"
|
|
14
|
+
},
|
|
15
|
+
"llms" => {
|
|
16
|
+
"qwen_vl" => {
|
|
17
|
+
"adapter" => "multimodal",
|
|
18
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
19
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
20
|
+
"model" => "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
21
|
+
},
|
|
22
|
+
"image_gen" => {
|
|
23
|
+
"adapter" => "image_generation",
|
|
24
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
25
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
26
|
+
"model" => "stabilityai/stable-diffusion-xl-base-1.0"
|
|
27
|
+
},
|
|
28
|
+
"video_gen" => {
|
|
29
|
+
"adapter" => "video_generation",
|
|
30
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
31
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
32
|
+
"model" => "Wan-AI/Wan2.2-T2V-A14B"
|
|
33
|
+
},
|
|
34
|
+
"tts_service" => {
|
|
35
|
+
"adapter" => "tts",
|
|
36
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
37
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
38
|
+
"model" => "FunAudioLLM/CosyVoice2-0.5B"
|
|
39
|
+
},
|
|
40
|
+
"stt_service" => {
|
|
41
|
+
"adapter" => "stt",
|
|
42
|
+
"url" => "https://api.siliconflow.cn/v1/",
|
|
43
|
+
"api_key" => ENV["SILICONFLOW_API_KEY"],
|
|
44
|
+
"model" => "FunAudioLLM/CosyVoice2-0.5B"
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"default_llm" => "qwen_vl",
|
|
48
|
+
"template_path" => "./templates",
|
|
49
|
+
"worker_path" => "./workers",
|
|
50
|
+
"logger_file" => "./logs/smart_prompt.log"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Write config to file
|
|
54
|
+
File.write('stt_config.yml', config.to_yaml)
|
|
55
|
+
|
|
56
|
+
# Initialize engine
|
|
57
|
+
engine = SmartPrompt::Engine.new('stt_config.yml')
|
|
58
|
+
|
|
59
|
+
puts "=== SmartPrompt STT Demo ==="
|
|
60
|
+
|
|
61
|
+
# Example 1: Basic speech-to-text transcription
|
|
62
|
+
puts "\n=== Example 1: Basic STT Transcription ==="
|
|
63
|
+
begin
|
|
64
|
+
# Note: This example requires an actual audio file
|
|
65
|
+
# Replace with a real audio file path for testing
|
|
66
|
+
audio_file_path = "./test_audio.wav"
|
|
67
|
+
|
|
68
|
+
if File.exist?(audio_file_path)
|
|
69
|
+
result = engine.call_worker(:stt_transcriber, {
|
|
70
|
+
audio_file: audio_file_path,
|
|
71
|
+
language: "zh",
|
|
72
|
+
response_format: "json"
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
puts "STT transcription successful!"
|
|
76
|
+
puts "Transcribed text: #{result[:transcription][:text]}"
|
|
77
|
+
puts "Language: #{result[:transcription][:language]}"
|
|
78
|
+
puts "Duration: #{result[:transcription][:duration]} seconds"
|
|
79
|
+
puts "File size: #{result[:transcription][:file_size]} bytes"
|
|
80
|
+
else
|
|
81
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
82
|
+
puts "Please create a test audio file to run this example"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
rescue => e
|
|
86
|
+
puts "Error in STT transcription: #{e.message}"
|
|
87
|
+
puts "Note: This example requires a valid SILICONFLOW_API_KEY environment variable"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Example 2: URL-based transcription
|
|
91
|
+
puts "\n=== Example 2: URL-based STT Transcription ==="
|
|
92
|
+
begin
|
|
93
|
+
# Note: Replace with a real audio URL for testing
|
|
94
|
+
audio_url = "https://example.com/audio.wav"
|
|
95
|
+
|
|
96
|
+
result = engine.call_worker(:stt_url_transcriber, {
|
|
97
|
+
audio_url: audio_url,
|
|
98
|
+
language: "en",
|
|
99
|
+
response_format: "text"
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
puts "URL-based STT transcription successful!"
|
|
103
|
+
puts "Transcribed text: #{result[:transcription][:text]}"
|
|
104
|
+
puts "Audio URL: #{result[:transcription][:audio_url]}"
|
|
105
|
+
|
|
106
|
+
rescue => e
|
|
107
|
+
puts "Error in URL-based STT: #{e.message}"
|
|
108
|
+
puts "Note: This requires a valid audio URL"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Example 3: Batch transcription
|
|
112
|
+
puts "\n=== Example 3: Batch STT Processing ==="
|
|
113
|
+
begin
|
|
114
|
+
# Note: Replace with real audio files for testing
|
|
115
|
+
audio_files = ["./audio1.wav", "./audio2.wav", "./audio3.wav"]
|
|
116
|
+
existing_files = audio_files.select { |f| File.exist?(f) }
|
|
117
|
+
|
|
118
|
+
if existing_files.any?
|
|
119
|
+
result = engine.call_worker(:batch_stt, {
|
|
120
|
+
audio_files: existing_files,
|
|
121
|
+
language: "zh"
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
puts "Batch STT processing successful!"
|
|
125
|
+
puts "Total files: #{result[:batch_result][:total_files]}"
|
|
126
|
+
puts "Successful: #{result[:batch_result][:successful]}"
|
|
127
|
+
puts "Failed: #{result[:batch_result][:failed]}"
|
|
128
|
+
|
|
129
|
+
result[:batch_result][:results].each do |file_result|
|
|
130
|
+
if file_result[:success]
|
|
131
|
+
puts " - #{File.basename(file_result[:file])}: #{file_result[:transcription][:text].length} characters"
|
|
132
|
+
else
|
|
133
|
+
puts " - #{File.basename(file_result[:file])}: ERROR - #{file_result[:error]}"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
else
|
|
137
|
+
puts "No audio files found for batch processing"
|
|
138
|
+
puts "Please create test audio files to run this example"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
rescue => e
|
|
142
|
+
puts "Error in batch STT: #{e.message}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Example 4: Audio file information
|
|
146
|
+
puts "\n=== Example 4: Audio File Information ==="
|
|
147
|
+
begin
|
|
148
|
+
audio_file_path = "./test_audio.wav"
|
|
149
|
+
|
|
150
|
+
if File.exist?(audio_file_path)
|
|
151
|
+
result = engine.call_worker(:audio_info, {
|
|
152
|
+
audio_file: audio_file_path
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
puts "Audio file information retrieved!"
|
|
156
|
+
puts "File name: #{result[:audio_info][:file_name]}"
|
|
157
|
+
puts "File size: #{result[:audio_info][:file_size]} bytes"
|
|
158
|
+
puts "Format: #{result[:audio_info][:format]}"
|
|
159
|
+
puts "Estimated duration: #{result[:audio_info][:estimated_duration]} seconds"
|
|
160
|
+
puts "Supported: #{result[:audio_info][:supported]}"
|
|
161
|
+
else
|
|
162
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
rescue => e
|
|
166
|
+
puts "Error getting audio info: #{e.message}"
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Example 5: Language detection
|
|
170
|
+
puts "\n=== Example 5: Language Detection ==="
|
|
171
|
+
begin
|
|
172
|
+
# Test with Chinese text
|
|
173
|
+
result = engine.call_worker(:language_detector, {
|
|
174
|
+
text: "这是一个中文文本,用于语言检测演示。"
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
puts "Language detection successful!"
|
|
178
|
+
puts "Text: #{result[:text]}"
|
|
179
|
+
puts "Detected language: #{result[:detected_language]}"
|
|
180
|
+
|
|
181
|
+
# Test with English text
|
|
182
|
+
result_en = engine.call_worker(:language_detector, {
|
|
183
|
+
text: "This is an English text for language detection demonstration."
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
puts "English text detected as: #{result_en[:detected_language]}"
|
|
187
|
+
|
|
188
|
+
rescue => e
|
|
189
|
+
puts "Error in language detection: #{e.message}"
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Example 6: Multi-language STT
|
|
193
|
+
puts "\n=== Example 6: Multi-language STT ==="
|
|
194
|
+
begin
|
|
195
|
+
audio_file_path = "./test_audio.wav"
|
|
196
|
+
|
|
197
|
+
if File.exist?(audio_file_path)
|
|
198
|
+
result = engine.call_worker(:multilingual_stt, {
|
|
199
|
+
audio_file: audio_file_path
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
puts "Multi-language STT successful!"
|
|
203
|
+
puts "Detected language: #{result[:detected_language]}"
|
|
204
|
+
puts "Initial transcription: #{result[:initial_transcription][:text]}"
|
|
205
|
+
|
|
206
|
+
if result[:improved_transcription]
|
|
207
|
+
puts "Improved transcription: #{result[:improved_transcription][:text]}"
|
|
208
|
+
end
|
|
209
|
+
else
|
|
210
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
rescue => e
|
|
214
|
+
puts "Error in multi-language STT: #{e.message}"
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Example 7: Format conversion
|
|
218
|
+
puts "\n=== Example 7: STT Format Conversion ==="
|
|
219
|
+
begin
|
|
220
|
+
audio_file_path = "./test_audio.wav"
|
|
221
|
+
|
|
222
|
+
if File.exist?(audio_file_path)
|
|
223
|
+
result = engine.call_worker(:stt_format_converter, {
|
|
224
|
+
audio_file: audio_file_path,
|
|
225
|
+
formats: ["json", "text", "srt", "vtt"]
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
puts "Format conversion successful!"
|
|
229
|
+
result[:format_results].each do |format, transcription|
|
|
230
|
+
puts " - #{format.upcase}: #{transcription[:text].length} characters"
|
|
231
|
+
end
|
|
232
|
+
else
|
|
233
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
rescue => e
|
|
237
|
+
puts "Error in format conversion: #{e.message}"
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Example 8: Direct adapter usage
|
|
241
|
+
puts "\n=== Example 8: Direct Adapter Usage ==="
|
|
242
|
+
begin
|
|
243
|
+
# Get the adapter directly
|
|
244
|
+
adapter = engine.llms["stt_service"]
|
|
245
|
+
|
|
246
|
+
audio_file_path = "./test_audio.wav"
|
|
247
|
+
|
|
248
|
+
if File.exist?(audio_file_path)
|
|
249
|
+
# Transcribe audio directly
|
|
250
|
+
transcription_data = adapter.transcribe_audio(
|
|
251
|
+
audio_file_path,
|
|
252
|
+
language: "zh",
|
|
253
|
+
temperature: 0.0,
|
|
254
|
+
response_format: "json"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
puts "Direct adapter usage successful!"
|
|
258
|
+
puts "Transcribed text: #{transcription_data[:text]}"
|
|
259
|
+
puts "Language: #{transcription_data[:language]}"
|
|
260
|
+
puts "Duration: #{transcription_data[:duration]} seconds"
|
|
261
|
+
|
|
262
|
+
# Get audio information
|
|
263
|
+
audio_info = adapter.get_audio_info(audio_file_path)
|
|
264
|
+
puts "Audio info - Format: #{audio_info[:format]}, Size: #{audio_info[:file_size]} bytes"
|
|
265
|
+
|
|
266
|
+
# Detect language
|
|
267
|
+
detected_language = adapter.detect_language(transcription_data[:text])
|
|
268
|
+
puts "Detected language: #{detected_language}"
|
|
269
|
+
|
|
270
|
+
else
|
|
271
|
+
puts "Audio file not found: #{audio_file_path}"
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
rescue => e
|
|
275
|
+
puts "Error in direct adapter usage: #{e.message}"
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
puts "\n=== All examples completed ==="
|
|
279
|
+
puts "\nImportant Notes:"
|
|
280
|
+
puts "1. STT requires valid SILICONFLOW_API_KEY environment variable"
|
|
281
|
+
puts "2. Audio files must be in supported formats (mp3, wav, webm, etc.)"
|
|
282
|
+
puts "3. Maximum file size: 25MB"
|
|
283
|
+
puts "4. Supported languages: Chinese, English, Japanese, Korean"
|
|
284
|
+
puts "5. Response formats: json, text, srt, vtt"
|
|
285
|
+
|
|
286
|
+
# Clean up
|
|
287
|
+
File.delete('stt_config.yml') if File.exist?('stt_config.yml')
|