smart_prompt 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -10
- data/README.cn.md +307 -64
- data/README.md +311 -64
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +363 -281
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +81 -191
- data/lib/smart_prompt/engine.rb +36 -19
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/openai_adapter.rb +1 -25
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +25 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +22 -2
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +84 -8
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
require_relative 'context_strategy'
|
|
2
|
+
|
|
3
|
+
module SmartPrompt
|
|
4
|
+
# SummaryBasedStrategy implements a context selection strategy that
|
|
5
|
+
# automatically compresses older messages through summarization
|
|
6
|
+
#
|
|
7
|
+
# This strategy:
|
|
8
|
+
# - Monitors message count and triggers summarization at threshold
|
|
9
|
+
# - Keeps recent messages uncompressed for context continuity
|
|
10
|
+
# - Generates summaries of older messages to reduce token usage
|
|
11
|
+
# - Falls back to truncation when summarization fails
|
|
12
|
+
# - Maintains conversation coherence while reducing token costs
|
|
13
|
+
class SummaryBasedStrategy
|
|
14
|
+
include ContextStrategy
|
|
15
|
+
|
|
16
|
+
# Initialize the summary-based strategy
|
|
17
|
+
# @param config [Hash] Configuration options
|
|
18
|
+
# @option config [Integer] :summary_threshold (20) Message count that triggers summarization
|
|
19
|
+
# @option config [Integer] :keep_recent (5) Number of recent messages to keep uncompressed
|
|
20
|
+
# @option config [CompressionEngine] :compression_engine Engine for generating summaries
|
|
21
|
+
# @option config [Boolean] :preserve_system (true) Whether to always keep system messages
|
|
22
|
+
def initialize(config = {})
|
|
23
|
+
@summary_threshold = config[:summary_threshold] || 20
|
|
24
|
+
@keep_recent = config[:keep_recent] || 5
|
|
25
|
+
@compression_engine = config[:compression_engine]
|
|
26
|
+
@preserve_system = config[:preserve_system] != false
|
|
27
|
+
|
|
28
|
+
# Create a default compression engine if none provided
|
|
29
|
+
@compression_engine ||= CompressionEngine.new(config[:compression] || {})
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Select messages using summary-based approach
|
|
33
|
+
# Automatically summarizes older messages when threshold is exceeded
|
|
34
|
+
# @param messages [Array<Message>] All messages in the session
|
|
35
|
+
# @param max_tokens [Integer, nil] Maximum token limit for selected messages
|
|
36
|
+
# @param current_message [Message, nil] Not used in this strategy
|
|
37
|
+
# @return [Array<Message>] Selected messages with summaries
|
|
38
|
+
def select_messages(messages, max_tokens, current_message = nil)
|
|
39
|
+
return [] if messages.nil? || messages.empty?
|
|
40
|
+
|
|
41
|
+
# If below threshold, return messages (filtering system messages if needed)
|
|
42
|
+
if messages.count <= @summary_threshold
|
|
43
|
+
filtered = @preserve_system ? messages : messages.reject(&:system_message?)
|
|
44
|
+
return max_tokens ? trim_to_token_limit(filtered, max_tokens) : filtered
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Separate system messages, summaries, and regular messages
|
|
48
|
+
system_messages = @preserve_system ? messages.select(&:system_message?) : []
|
|
49
|
+
existing_summaries = messages.select { |msg| msg.is_summary }
|
|
50
|
+
regular_messages = messages.reject { |msg| msg.system_message? || msg.is_summary }
|
|
51
|
+
|
|
52
|
+
# Keep the most recent messages
|
|
53
|
+
recent_messages = regular_messages.last(@keep_recent)
|
|
54
|
+
|
|
55
|
+
# Get older messages that need summarization
|
|
56
|
+
old_messages = regular_messages[0...-@keep_recent]
|
|
57
|
+
|
|
58
|
+
# Generate summary if we have old messages and no existing summary
|
|
59
|
+
if !old_messages.empty? && existing_summaries.empty?
|
|
60
|
+
begin
|
|
61
|
+
summary = @compression_engine.summarize(old_messages)
|
|
62
|
+
if summary
|
|
63
|
+
# Combine: system messages + summary + recent messages
|
|
64
|
+
selected = system_messages + [summary] + recent_messages
|
|
65
|
+
else
|
|
66
|
+
# If summarization failed, fall back to keeping more recent messages
|
|
67
|
+
SmartPrompt.logger.warn "Summarization failed, falling back to recent messages only"
|
|
68
|
+
fallback_count = [@summary_threshold / 2, regular_messages.count].min
|
|
69
|
+
selected = system_messages + regular_messages.last(fallback_count)
|
|
70
|
+
end
|
|
71
|
+
rescue => e
|
|
72
|
+
SmartPrompt.logger.error "Error during summarization: #{e.message}, using fallback"
|
|
73
|
+
# Fallback: keep system messages and recent messages
|
|
74
|
+
selected = system_messages + recent_messages
|
|
75
|
+
end
|
|
76
|
+
else
|
|
77
|
+
# Use existing summaries if available
|
|
78
|
+
selected = system_messages + existing_summaries + recent_messages
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Trim to token limit if specified
|
|
82
|
+
max_tokens ? trim_to_token_limit(selected, max_tokens) : selected
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Determine if compression should be triggered
|
|
86
|
+
# Recommends compression when message count exceeds threshold
|
|
87
|
+
# @param session [Session] The session to evaluate
|
|
88
|
+
# @return [Boolean] true if message count > summary_threshold
|
|
89
|
+
def should_compress?(session)
|
|
90
|
+
return false if session.nil?
|
|
91
|
+
session.message_count > @summary_threshold
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
# Trim messages to fit within token limit
|
|
97
|
+
# Prioritizes keeping system messages and summaries
|
|
98
|
+
# @param messages [Array<Message>] Messages to trim
|
|
99
|
+
# @param max_tokens [Integer] Maximum token limit
|
|
100
|
+
# @return [Array<Message>] Trimmed messages
|
|
101
|
+
def trim_to_token_limit(messages, max_tokens)
|
|
102
|
+
return messages unless max_tokens
|
|
103
|
+
return [] if messages.empty?
|
|
104
|
+
|
|
105
|
+
# Separate into priority groups
|
|
106
|
+
system_messages = messages.select(&:system_message?)
|
|
107
|
+
summaries = messages.select { |msg| msg.is_summary && !msg.system_message? }
|
|
108
|
+
regular_messages = messages.reject { |msg| msg.system_message? || msg.is_summary }
|
|
109
|
+
|
|
110
|
+
# Start with system messages (highest priority)
|
|
111
|
+
selected = []
|
|
112
|
+
total = 0
|
|
113
|
+
|
|
114
|
+
system_messages.each do |msg|
|
|
115
|
+
msg_tokens = msg.token_count || 0
|
|
116
|
+
if total + msg_tokens <= max_tokens
|
|
117
|
+
selected << msg
|
|
118
|
+
total += msg_tokens
|
|
119
|
+
else
|
|
120
|
+
# If we can't fit system messages, we have a problem
|
|
121
|
+
SmartPrompt.logger.warn "Token limit too small to fit all system messages"
|
|
122
|
+
break
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Add summaries (second priority)
|
|
127
|
+
summaries.each do |msg|
|
|
128
|
+
msg_tokens = msg.token_count || 0
|
|
129
|
+
if total + msg_tokens <= max_tokens
|
|
130
|
+
selected << msg
|
|
131
|
+
total += msg_tokens
|
|
132
|
+
else
|
|
133
|
+
break
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Add regular messages from newest to oldest (third priority)
|
|
138
|
+
regular_messages.reverse_each do |msg|
|
|
139
|
+
msg_tokens = msg.token_count || 0
|
|
140
|
+
if total + msg_tokens <= max_tokens
|
|
141
|
+
selected << msg
|
|
142
|
+
total += msg_tokens
|
|
143
|
+
else
|
|
144
|
+
break
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Sort by timestamp to maintain conversation order
|
|
149
|
+
selected.sort_by(&:timestamp)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module SmartPrompt
|
|
2
|
+
# TokenCounter provides token counting functionality with caching
|
|
3
|
+
# Uses tiktoken for accurate token counting compatible with OpenAI models
|
|
4
|
+
class TokenCounter
|
|
5
|
+
def initialize(model: "gpt-3.5-turbo")
|
|
6
|
+
@cache = {}
|
|
7
|
+
@model = model
|
|
8
|
+
@encoding = nil
|
|
9
|
+
@use_tiktoken = load_tiktoken
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Count tokens in text with caching
|
|
13
|
+
def count(text)
|
|
14
|
+
return 0 if text.nil? || text.empty?
|
|
15
|
+
|
|
16
|
+
# Return cached result if available
|
|
17
|
+
return @cache[text] if @cache.key?(text)
|
|
18
|
+
|
|
19
|
+
# Calculate and cache the result
|
|
20
|
+
token_count = if @use_tiktoken
|
|
21
|
+
count_with_tiktoken(text)
|
|
22
|
+
else
|
|
23
|
+
count_with_fallback(text)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
@cache[text] = token_count
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Count tokens across multiple messages
|
|
30
|
+
def count_messages(messages)
|
|
31
|
+
return 0 if messages.nil? || messages.empty?
|
|
32
|
+
|
|
33
|
+
messages.sum { |msg| count(msg.content) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Clear the cache
|
|
37
|
+
def clear_cache
|
|
38
|
+
@cache.clear
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Get cache size
|
|
42
|
+
def cache_size
|
|
43
|
+
@cache.size
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def load_tiktoken
|
|
49
|
+
begin
|
|
50
|
+
require 'tiktoken_ruby'
|
|
51
|
+
@encoding = Tiktoken.encoding_for_model(@model)
|
|
52
|
+
true
|
|
53
|
+
rescue LoadError
|
|
54
|
+
SmartPrompt.logger.warn "tiktoken_ruby not available, using fallback token counting"
|
|
55
|
+
false
|
|
56
|
+
rescue => e
|
|
57
|
+
SmartPrompt.logger.warn "Failed to initialize tiktoken: #{e.message}, using fallback"
|
|
58
|
+
false
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def count_with_tiktoken(text)
|
|
63
|
+
@encoding.encode(text).length
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Fallback token counting using simple word-based estimation
|
|
67
|
+
# Approximates ~1.3 tokens per word for English text
|
|
68
|
+
def count_with_fallback(text)
|
|
69
|
+
# Simple approximation: split by whitespace and punctuation
|
|
70
|
+
words = text.scan(/\w+/)
|
|
71
|
+
(words.length * 1.3).ceil
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
require "openai"
|
|
2
|
+
require "base64"
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
module SmartPrompt
|
|
7
|
+
class TTSAdapter < LLMAdapter
|
|
8
|
+
# Predefined voice options
|
|
9
|
+
PREDEFINED_VOICES = {
|
|
10
|
+
"alloy" => "沉稳男声alex",
|
|
11
|
+
"echo" => "温柔女声claire",
|
|
12
|
+
"fable" => "活泼女声fable",
|
|
13
|
+
"onyx" => "磁性男声onyx",
|
|
14
|
+
"nova" => "甜美女声nova",
|
|
15
|
+
"shimmer" => "优雅女声shimmer"
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
# Supported languages
|
|
19
|
+
SUPPORTED_LANGUAGES = %w[zh en ja ko]
|
|
20
|
+
|
|
21
|
+
# Supported output formats
|
|
22
|
+
SUPPORTED_FORMATS = %w[mp3 opus wav pcm]
|
|
23
|
+
|
|
24
|
+
def initialize(config)
|
|
25
|
+
super
|
|
26
|
+
api_key = @config["api_key"]
|
|
27
|
+
if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
|
|
28
|
+
api_key = eval(api_key)
|
|
29
|
+
end
|
|
30
|
+
begin
|
|
31
|
+
@client = OpenAI::Client.new(
|
|
32
|
+
access_token: api_key,
|
|
33
|
+
uri_base: @config["url"],
|
|
34
|
+
request_timeout: 120,
|
|
35
|
+
)
|
|
36
|
+
rescue OpenAI::ConfigurationError => e
|
|
37
|
+
SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
|
|
38
|
+
raise LLMAPIError, "Invalid TTS configuration: #{e.message}"
|
|
39
|
+
rescue OpenAI::Error => e
|
|
40
|
+
SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
|
|
41
|
+
raise LLMAPIError, "TTS authentication failed: #{e.message}"
|
|
42
|
+
rescue SocketError => e
|
|
43
|
+
SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
|
|
44
|
+
raise LLMAPIError, "Network error: Unable to connect to TTS API"
|
|
45
|
+
rescue => e
|
|
46
|
+
SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
|
|
47
|
+
raise Error, "Unexpected error initializing TTS client: #{e.message}"
|
|
48
|
+
ensure
|
|
49
|
+
SmartPrompt.logger.info "Successfully created a TTS client."
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Text-to-speech synthesis
|
|
54
|
+
def synthesize_speech(text, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil)
|
|
55
|
+
SmartPrompt.logger.info "TTSAdapter: Synthesizing speech from text"
|
|
56
|
+
|
|
57
|
+
model_name = model || @config["model"]
|
|
58
|
+
|
|
59
|
+
# Validate parameters
|
|
60
|
+
validate_tts_parameters(text, voice, speed, response_format, language)
|
|
61
|
+
|
|
62
|
+
begin
|
|
63
|
+
# Map voice name if it's a predefined voice
|
|
64
|
+
voice_name = PREDEFINED_VOICES[voice] || voice
|
|
65
|
+
|
|
66
|
+
parameters = {
|
|
67
|
+
model: model_name,
|
|
68
|
+
input: text,
|
|
69
|
+
voice: voice_name,
|
|
70
|
+
speed: speed,
|
|
71
|
+
response_format: response_format
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# Add language parameter if specified
|
|
75
|
+
parameters[:language] = language if language
|
|
76
|
+
|
|
77
|
+
SmartPrompt.logger.info "TTS parameters: #{parameters.except(:input)}"
|
|
78
|
+
|
|
79
|
+
# Custom implementation for TTS since OpenAI gem doesn't support audio endpoints
|
|
80
|
+
response = submit_tts_request(parameters)
|
|
81
|
+
|
|
82
|
+
@last_response = response
|
|
83
|
+
|
|
84
|
+
# Process response
|
|
85
|
+
if response.is_a?(String) && response.start_with?("data:audio/")
|
|
86
|
+
# Base64 encoded audio data
|
|
87
|
+
audio_data = {
|
|
88
|
+
audio_data: response,
|
|
89
|
+
format: response_format,
|
|
90
|
+
text_length: text.length,
|
|
91
|
+
voice: voice_name
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
SmartPrompt.logger.info "TTS synthesis successful, generated #{text.length} characters"
|
|
95
|
+
return audio_data
|
|
96
|
+
else
|
|
97
|
+
SmartPrompt.logger.error "Invalid TTS response format"
|
|
98
|
+
raise LLMAPIError, "Invalid TTS response format"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
rescue OpenAI::Error => e
|
|
102
|
+
SmartPrompt.logger.error "TTS API error: #{e.message}"
|
|
103
|
+
raise LLMAPIError, "TTS API error: #{e.message}"
|
|
104
|
+
rescue => e
|
|
105
|
+
SmartPrompt.logger.error "Unexpected error during TTS synthesis: #{e.message}"
|
|
106
|
+
raise Error, "Unexpected error during TTS synthesis: #{e.message}"
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Synthesize speech and save to file
|
|
111
|
+
def synthesize_to_file(text, output_path, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil)
|
|
112
|
+
SmartPrompt.logger.info "TTSAdapter: Synthesizing speech to file"
|
|
113
|
+
|
|
114
|
+
begin
|
|
115
|
+
# Synthesize speech
|
|
116
|
+
audio_data = synthesize_speech(
|
|
117
|
+
text,
|
|
118
|
+
voice: voice,
|
|
119
|
+
model: model,
|
|
120
|
+
speed: speed,
|
|
121
|
+
response_format: response_format,
|
|
122
|
+
language: language
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Save to file
|
|
126
|
+
save_audio_to_file(audio_data[:audio_data], output_path, response_format)
|
|
127
|
+
|
|
128
|
+
SmartPrompt.logger.info "TTS audio saved to: #{output_path}"
|
|
129
|
+
return {
|
|
130
|
+
file_path: output_path,
|
|
131
|
+
text_length: audio_data[:text_length],
|
|
132
|
+
voice: audio_data[:voice],
|
|
133
|
+
format: response_format
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
rescue => e
|
|
137
|
+
SmartPrompt.logger.error "Error synthesizing to file: #{e.message}"
|
|
138
|
+
raise Error, "Error synthesizing to file: #{e.message}"
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Get available voices
|
|
143
|
+
def available_voices
|
|
144
|
+
PREDEFINED_VOICES.dup
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Create custom voice from reference audio
|
|
148
|
+
def create_custom_voice(name, reference_audio_file, description: nil)
|
|
149
|
+
SmartPrompt.logger.info "TTSAdapter: Creating custom voice"
|
|
150
|
+
|
|
151
|
+
begin
|
|
152
|
+
unless File.exist?(reference_audio_file)
|
|
153
|
+
raise Error, "Reference audio file not found: #{reference_audio_file}"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Check audio file size (should be less than 30 seconds)
|
|
157
|
+
file_size = File.size(reference_audio_file)
|
|
158
|
+
if file_size > 5 * 1024 * 1024 # 5MB limit
|
|
159
|
+
raise Error, "Reference audio file too large (max 5MB)"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Convert audio to base64
|
|
163
|
+
audio_data = File.binread(reference_audio_file)
|
|
164
|
+
base64_audio = Base64.strict_encode64(audio_data)
|
|
165
|
+
|
|
166
|
+
parameters = {
|
|
167
|
+
name: name,
|
|
168
|
+
audio: base64_audio
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
parameters[:description] = description if description
|
|
172
|
+
|
|
173
|
+
SmartPrompt.logger.info "Creating custom voice: #{name}"
|
|
174
|
+
|
|
175
|
+
# Custom implementation for voice creation
|
|
176
|
+
response = create_custom_voice_request(parameters)
|
|
177
|
+
|
|
178
|
+
@last_response = response
|
|
179
|
+
|
|
180
|
+
if response["voice_id"]
|
|
181
|
+
voice_data = {
|
|
182
|
+
voice_id: response["voice_id"],
|
|
183
|
+
name: response["name"],
|
|
184
|
+
status: response["status"],
|
|
185
|
+
created_at: response["created_at"]
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
SmartPrompt.logger.info "Custom voice created successfully: #{voice_data[:voice_id]}"
|
|
189
|
+
return voice_data
|
|
190
|
+
else
|
|
191
|
+
SmartPrompt.logger.error "Failed to create custom voice"
|
|
192
|
+
raise LLMAPIError, "Failed to create custom voice"
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
rescue => e
|
|
196
|
+
SmartPrompt.logger.error "Error creating custom voice: #{e.message}"
|
|
197
|
+
raise Error, "Error creating custom voice: #{e.message}"
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# List custom voices
|
|
202
|
+
def list_custom_voices
|
|
203
|
+
SmartPrompt.logger.info "TTSAdapter: Listing custom voices"
|
|
204
|
+
|
|
205
|
+
begin
|
|
206
|
+
response = list_custom_voices_request
|
|
207
|
+
|
|
208
|
+
@last_response = response
|
|
209
|
+
|
|
210
|
+
if response["voices"]
|
|
211
|
+
voices = response["voices"].map do |voice|
|
|
212
|
+
{
|
|
213
|
+
voice_id: voice["id"],
|
|
214
|
+
name: voice["name"],
|
|
215
|
+
description: voice["description"],
|
|
216
|
+
status: voice["status"],
|
|
217
|
+
created_at: voice["created_at"]
|
|
218
|
+
}
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
SmartPrompt.logger.info "Found #{voices.size} custom voices"
|
|
222
|
+
return voices
|
|
223
|
+
else
|
|
224
|
+
SmartPrompt.logger.error "No custom voices found"
|
|
225
|
+
return []
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
rescue => e
|
|
229
|
+
SmartPrompt.logger.error "Error listing custom voices: #{e.message}"
|
|
230
|
+
raise Error, "Error listing custom voices: #{e.message}"
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Delete custom voice
|
|
235
|
+
def delete_custom_voice(voice_id)
|
|
236
|
+
SmartPrompt.logger.info "TTSAdapter: Deleting custom voice"
|
|
237
|
+
|
|
238
|
+
begin
|
|
239
|
+
response = delete_custom_voice_request(voice_id)
|
|
240
|
+
|
|
241
|
+
@last_response = response
|
|
242
|
+
|
|
243
|
+
if response["deleted"]
|
|
244
|
+
SmartPrompt.logger.info "Custom voice deleted successfully: #{voice_id}"
|
|
245
|
+
return { deleted: true, voice_id: voice_id }
|
|
246
|
+
else
|
|
247
|
+
SmartPrompt.logger.error "Failed to delete custom voice"
|
|
248
|
+
raise LLMAPIError, "Failed to delete custom voice"
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
rescue => e
|
|
252
|
+
SmartPrompt.logger.error "Error deleting custom voice: #{e.message}"
|
|
253
|
+
raise Error, "Error deleting custom voice: #{e.message}"
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
private
|
|
258
|
+
|
|
259
|
+
def validate_tts_parameters(text, voice, speed, response_format, language)
|
|
260
|
+
# Validate text
|
|
261
|
+
if text.nil? || text.strip.empty?
|
|
262
|
+
raise Error, "Text cannot be empty"
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
if text.length > 4096
|
|
266
|
+
raise Error, "Text too long (max 4096 characters)"
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Validate voice
|
|
270
|
+
unless PREDEFINED_VOICES.key?(voice)
|
|
271
|
+
SmartPrompt.logger.warn "Voice '#{voice}' is not a predefined voice, using as custom voice name"
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Validate speed
|
|
275
|
+
unless (0.25..4.0).include?(speed)
|
|
276
|
+
raise Error, "Speed must be between 0.25 and 4.0"
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Validate response format
|
|
280
|
+
unless SUPPORTED_FORMATS.include?(response_format)
|
|
281
|
+
raise Error, "Unsupported response format: #{response_format}"
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Validate language
|
|
285
|
+
if language && !SUPPORTED_LANGUAGES.include?(language)
|
|
286
|
+
raise Error, "Unsupported language: #{language}"
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def save_audio_to_file(audio_data, output_path, format)
|
|
291
|
+
# Create directory if it doesn't exist
|
|
292
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
293
|
+
|
|
294
|
+
# Extract base64 data from data URL if present
|
|
295
|
+
if audio_data.start_with?("data:audio/")
|
|
296
|
+
# Remove data URL prefix
|
|
297
|
+
base64_data = audio_data.sub(/^data:audio\/\w+;base64,/, "")
|
|
298
|
+
audio_bytes = Base64.decode64(base64_data)
|
|
299
|
+
else
|
|
300
|
+
# Assume it's already base64
|
|
301
|
+
audio_bytes = Base64.decode64(audio_data)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Write to file
|
|
305
|
+
File.binwrite(output_path, audio_bytes)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# Custom implementation for TTS API call
|
|
309
|
+
def submit_tts_request(parameters)
|
|
310
|
+
uri = URI.parse("#{@config['url']}/audio/speech")
|
|
311
|
+
|
|
312
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
313
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
314
|
+
|
|
315
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
|
316
|
+
request['Content-Type'] = 'application/json'
|
|
317
|
+
request['Authorization'] = "Bearer #{@config['api_key']}"
|
|
318
|
+
|
|
319
|
+
request.body = parameters.to_json
|
|
320
|
+
|
|
321
|
+
response = http.request(request)
|
|
322
|
+
|
|
323
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
324
|
+
# Return base64 encoded audio data
|
|
325
|
+
"data:audio/#{parameters[:response_format]};base64,#{Base64.strict_encode64(response.body)}"
|
|
326
|
+
else
|
|
327
|
+
raise LLMAPIError, "TTS API error: #{response.code} - #{response.body}"
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Custom implementation for custom voice creation
|
|
332
|
+
def create_custom_voice_request(parameters)
|
|
333
|
+
uri = URI.parse("#{@config['url']}/voices")
|
|
334
|
+
|
|
335
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
336
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
337
|
+
|
|
338
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
|
339
|
+
request['Content-Type'] = 'application/json'
|
|
340
|
+
request['Authorization'] = "Bearer #{@config['api_key']}"
|
|
341
|
+
|
|
342
|
+
request.body = parameters.to_json
|
|
343
|
+
|
|
344
|
+
response = http.request(request)
|
|
345
|
+
|
|
346
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
347
|
+
JSON.parse(response.body)
|
|
348
|
+
else
|
|
349
|
+
raise LLMAPIError, "Custom voice creation API error: #{response.code} - #{response.body}"
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Custom implementation for listing custom voices
|
|
354
|
+
def list_custom_voices_request
|
|
355
|
+
uri = URI.parse("#{@config['url']}/voices")
|
|
356
|
+
|
|
357
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
358
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
359
|
+
|
|
360
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
|
361
|
+
request['Authorization'] = "Bearer #{@config['api_key']}"
|
|
362
|
+
|
|
363
|
+
response = http.request(request)
|
|
364
|
+
|
|
365
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
366
|
+
JSON.parse(response.body)
|
|
367
|
+
else
|
|
368
|
+
raise LLMAPIError, "List voices API error: #{response.code} - #{response.body}"
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Custom implementation for deleting custom voice
|
|
373
|
+
def delete_custom_voice_request(voice_id)
|
|
374
|
+
uri = URI.parse("#{@config['url']}/voices/#{voice_id}")
|
|
375
|
+
|
|
376
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
377
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
378
|
+
|
|
379
|
+
request = Net::HTTP::Delete.new(uri.request_uri)
|
|
380
|
+
request['Authorization'] = "Bearer #{@config['api_key']}"
|
|
381
|
+
|
|
382
|
+
response = http.request(request)
|
|
383
|
+
|
|
384
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
385
|
+
JSON.parse(response.body)
|
|
386
|
+
else
|
|
387
|
+
raise LLMAPIError, "Delete voice API error: #{response.code} - #{response.body}"
|
|
388
|
+
end
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Override send_request to provide a meaningful error for chat operations
|
|
392
|
+
def send_request(messages, model = nil, temperature = 0.7, tools = nil, proc = nil)
|
|
393
|
+
SmartPrompt.logger.error "TTSAdapter does not support chat operations. Use synthesize_speech or synthesize_to_file methods instead."
|
|
394
|
+
raise NotImplementedError, "TTSAdapter does not support chat operations"
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# Override embeddings method
|
|
398
|
+
def embeddings(text, model)
|
|
399
|
+
SmartPrompt.logger.error "TTSAdapter does not support embeddings operations."
|
|
400
|
+
raise NotImplementedError, "TTSAdapter does not support embeddings operations"
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
end
|
data/lib/smart_prompt/version.rb
CHANGED