smart_prompt 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -10
- data/README.cn.md +307 -64
- data/README.md +311 -64
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +363 -281
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +81 -191
- data/lib/smart_prompt/engine.rb +36 -19
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/openai_adapter.rb +1 -25
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +25 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +22 -2
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +84 -8
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
require_relative 'context_strategy'
|
|
2
|
+
require_relative 'sliding_window_strategy'
|
|
3
|
+
require_relative 'relevance_based_strategy'
|
|
4
|
+
require_relative 'summary_based_strategy'
|
|
5
|
+
|
|
6
|
+
module SmartPrompt
|
|
7
|
+
# HybridStrategy implements a flexible context selection strategy that
|
|
8
|
+
# combines multiple strategies for optimal results
|
|
9
|
+
#
|
|
10
|
+
# This strategy supports two modes:
|
|
11
|
+
# - Adaptive mode: Automatically selects the best strategy based on message count
|
|
12
|
+
# - Combined mode: Merges results from multiple strategies
|
|
13
|
+
#
|
|
14
|
+
# Adaptive mode selection logic:
|
|
15
|
+
# - < 20 messages: Use SlidingWindowStrategy (simple and efficient)
|
|
16
|
+
# - 20-50 messages: Use RelevanceBasedStrategy (balance recency and relevance)
|
|
17
|
+
# - > 50 messages: Use SummaryBasedStrategy (compress older messages)
|
|
18
|
+
#
|
|
19
|
+
# Combined mode:
|
|
20
|
+
# - Runs multiple strategies and merges their results
|
|
21
|
+
# - Removes duplicates and sorts by importance
|
|
22
|
+
# - Provides comprehensive context from different perspectives
|
|
23
|
+
class HybridStrategy
|
|
24
|
+
include ContextStrategy
|
|
25
|
+
|
|
26
|
+
# Initialize the hybrid strategy
|
|
27
|
+
# @param config [Hash] Configuration options
|
|
28
|
+
# @option config [Symbol] :mode (:adaptive) Strategy mode - :adaptive or :combined
|
|
29
|
+
# @option config [Hash] :sliding_window ({}) Configuration for SlidingWindowStrategy
|
|
30
|
+
# @option config [Hash] :relevance_based ({}) Configuration for RelevanceBasedStrategy
|
|
31
|
+
# @option config [Hash] :summary_based ({}) Configuration for SummaryBasedStrategy
|
|
32
|
+
# @option config [Integer] :adaptive_threshold_low (20) Message count threshold for adaptive mode (low)
|
|
33
|
+
# @option config [Integer] :adaptive_threshold_high (50) Message count threshold for adaptive mode (high)
|
|
34
|
+
def initialize(config = {})
|
|
35
|
+
@mode = config[:mode] || :adaptive
|
|
36
|
+
@adaptive_threshold_low = config[:adaptive_threshold_low] || 20
|
|
37
|
+
@adaptive_threshold_high = config[:adaptive_threshold_high] || 50
|
|
38
|
+
|
|
39
|
+
# Initialize sub-strategies with their configurations
|
|
40
|
+
@sliding_window = SlidingWindowStrategy.new(config[:sliding_window] || {})
|
|
41
|
+
@relevance_based = RelevanceBasedStrategy.new(config[:relevance_based] || {})
|
|
42
|
+
@summary_based = SummaryBasedStrategy.new(config[:summary_based] || {})
|
|
43
|
+
|
|
44
|
+
# Validate mode
|
|
45
|
+
unless [:adaptive, :combined].include?(@mode)
|
|
46
|
+
raise ArgumentError, "Invalid mode: #{@mode}. Must be :adaptive or :combined"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Select messages using hybrid approach
|
|
51
|
+
# @param messages [Array<Message>] All messages in the session
|
|
52
|
+
# @param max_tokens [Integer, nil] Maximum token limit for selected messages
|
|
53
|
+
# @param current_message [Message, nil] The current message for relevance calculation
|
|
54
|
+
# @return [Array<Message>] Selected messages
|
|
55
|
+
def select_messages(messages, max_tokens, current_message = nil)
|
|
56
|
+
return [] if messages.nil? || messages.empty?
|
|
57
|
+
|
|
58
|
+
case @mode
|
|
59
|
+
when :adaptive
|
|
60
|
+
select_adaptive(messages, max_tokens, current_message)
|
|
61
|
+
when :combined
|
|
62
|
+
select_combined(messages, max_tokens, current_message)
|
|
63
|
+
else
|
|
64
|
+
# Fallback to sliding window if mode is somehow invalid
|
|
65
|
+
@sliding_window.select_messages(messages, max_tokens, current_message)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Determine if compression should be triggered
|
|
70
|
+
# Uses the most conservative threshold from all strategies
|
|
71
|
+
# @param session [Session] The session to evaluate
|
|
72
|
+
# @return [Boolean] true if any strategy recommends compression
|
|
73
|
+
def should_compress?(session)
|
|
74
|
+
return false if session.nil?
|
|
75
|
+
|
|
76
|
+
# In adaptive mode, use the threshold of the currently selected strategy
|
|
77
|
+
if @mode == :adaptive
|
|
78
|
+
message_count = session.message_count
|
|
79
|
+
|
|
80
|
+
if message_count < @adaptive_threshold_low
|
|
81
|
+
@sliding_window.should_compress?(session)
|
|
82
|
+
elsif message_count < @adaptive_threshold_high
|
|
83
|
+
@relevance_based.should_compress?(session)
|
|
84
|
+
else
|
|
85
|
+
@summary_based.should_compress?(session)
|
|
86
|
+
end
|
|
87
|
+
else
|
|
88
|
+
# In combined mode, compress if any strategy recommends it
|
|
89
|
+
@sliding_window.should_compress?(session) ||
|
|
90
|
+
@relevance_based.should_compress?(session) ||
|
|
91
|
+
@summary_based.should_compress?(session)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
# Select messages using adaptive strategy selection
|
|
98
|
+
# Chooses the best strategy based on message count
|
|
99
|
+
# @param messages [Array<Message>] All messages
|
|
100
|
+
# @param max_tokens [Integer, nil] Maximum token limit
|
|
101
|
+
# @param current_message [Message, nil] Current message for relevance
|
|
102
|
+
# @return [Array<Message>] Selected messages
|
|
103
|
+
def select_adaptive(messages, max_tokens, current_message)
|
|
104
|
+
message_count = messages.count
|
|
105
|
+
|
|
106
|
+
# Select strategy based on message count thresholds
|
|
107
|
+
strategy = if message_count < @adaptive_threshold_low
|
|
108
|
+
# For small conversations, use simple sliding window
|
|
109
|
+
@sliding_window
|
|
110
|
+
elsif message_count < @adaptive_threshold_high
|
|
111
|
+
# For medium conversations, use relevance-based selection
|
|
112
|
+
@relevance_based
|
|
113
|
+
else
|
|
114
|
+
# For large conversations, use summarization
|
|
115
|
+
@summary_based
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Log the selected strategy for debugging
|
|
119
|
+
log_debug "Adaptive mode: selected #{strategy.class.name} for #{message_count} messages (thresholds: <#{@adaptive_threshold_low}, <#{@adaptive_threshold_high})"
|
|
120
|
+
|
|
121
|
+
# Delegate to the selected strategy
|
|
122
|
+
strategy.select_messages(messages, max_tokens, current_message)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Select messages by combining results from multiple strategies
|
|
126
|
+
# Merges results and removes duplicates
|
|
127
|
+
# @param messages [Array<Message>] All messages
|
|
128
|
+
# @param max_tokens [Integer, nil] Maximum token limit
|
|
129
|
+
# @param current_message [Message, nil] Current message for relevance
|
|
130
|
+
# @return [Array<Message>] Selected messages
|
|
131
|
+
def select_combined(messages, max_tokens, current_message)
|
|
132
|
+
# Get results from each strategy (without token limit initially)
|
|
133
|
+
sliding_result = @sliding_window.select_messages(messages, nil, current_message)
|
|
134
|
+
relevance_result = @relevance_based.select_messages(messages, nil, current_message)
|
|
135
|
+
|
|
136
|
+
# For summary-based, only include if we have many messages
|
|
137
|
+
# to avoid premature summarization
|
|
138
|
+
summary_result = if messages.count > @adaptive_threshold_high
|
|
139
|
+
@summary_based.select_messages(messages, nil, current_message)
|
|
140
|
+
else
|
|
141
|
+
[]
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Combine all results and remove duplicates
|
|
145
|
+
# Use message object_id to identify unique messages
|
|
146
|
+
combined = (sliding_result + relevance_result + summary_result).uniq
|
|
147
|
+
|
|
148
|
+
log_debug "Combined mode: merged #{sliding_result.count} + #{relevance_result.count} + #{summary_result.count} = #{combined.count} unique messages"
|
|
149
|
+
|
|
150
|
+
# Sort by timestamp to maintain conversation order
|
|
151
|
+
combined = combined.sort_by(&:timestamp)
|
|
152
|
+
|
|
153
|
+
# Trim to token limit if specified
|
|
154
|
+
result = max_tokens ? trim_to_token_limit(combined, max_tokens) : combined
|
|
155
|
+
|
|
156
|
+
if max_tokens && result.count < combined.count
|
|
157
|
+
tokens_before = combined.sum { |m| m.token_count || 0 }
|
|
158
|
+
tokens_after = result.sum { |m| m.token_count || 0 }
|
|
159
|
+
log_debug "Combined mode: trimmed to token limit #{max_tokens}: #{combined.count} -> #{result.count} messages, #{tokens_before} -> #{tokens_after} tokens"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
result
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Trim messages to fit within token limit
|
|
166
|
+
# Prioritizes messages with higher importance scores
|
|
167
|
+
# @param messages [Array<Message>] Messages to trim
|
|
168
|
+
# @param max_tokens [Integer] Maximum token limit
|
|
169
|
+
# @return [Array<Message>] Trimmed messages
|
|
170
|
+
def trim_to_token_limit(messages, max_tokens)
|
|
171
|
+
return messages unless max_tokens
|
|
172
|
+
return [] if messages.empty?
|
|
173
|
+
|
|
174
|
+
# Separate system messages (always keep) from others
|
|
175
|
+
system_messages = messages.select(&:system_message?)
|
|
176
|
+
other_messages = messages.reject(&:system_message?)
|
|
177
|
+
|
|
178
|
+
# Start with system messages
|
|
179
|
+
selected = []
|
|
180
|
+
total = 0
|
|
181
|
+
|
|
182
|
+
system_messages.each do |msg|
|
|
183
|
+
msg_tokens = msg.token_count || 0
|
|
184
|
+
if total + msg_tokens <= max_tokens
|
|
185
|
+
selected << msg
|
|
186
|
+
total += msg_tokens
|
|
187
|
+
else
|
|
188
|
+
SmartPrompt.logger.warn "Token limit too small to fit all system messages"
|
|
189
|
+
break
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Sort other messages by importance score (if available) or recency
|
|
194
|
+
sorted_others = other_messages.sort_by do |msg|
|
|
195
|
+
# Use importance_score if available, otherwise use timestamp as proxy
|
|
196
|
+
score = msg.importance_score || msg.timestamp.to_f
|
|
197
|
+
-score # Negative for descending order
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Add messages until we hit the token limit
|
|
201
|
+
sorted_others.each do |msg|
|
|
202
|
+
msg_tokens = msg.token_count || 0
|
|
203
|
+
if total + msg_tokens <= max_tokens
|
|
204
|
+
selected << msg
|
|
205
|
+
total += msg_tokens
|
|
206
|
+
else
|
|
207
|
+
# Stop when we can't fit any more messages
|
|
208
|
+
break
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Re-sort by timestamp to maintain conversation order
|
|
213
|
+
selected.sort_by(&:timestamp)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Logging helper methods
|
|
217
|
+
def log_debug(message)
|
|
218
|
+
return unless SmartPrompt.logger
|
|
219
|
+
SmartPrompt.logger.debug "[HybridStrategy] #{message}"
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
require "openai"
|
|
2
|
+
require "base64"
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
require "fileutils"
|
|
7
|
+
|
|
8
|
+
module SmartPrompt
|
|
9
|
+
# Adapter for SiliconFlow's image generation API.
|
|
10
|
+
#
|
|
11
|
+
# SiliconFlow exposes image generation through a single endpoint:
|
|
12
|
+
#
|
|
13
|
+
# POST {url}/images/generations
|
|
14
|
+
#
|
|
15
|
+
# Unlike OpenAI's image API, SiliconFlow uses its own parameter names
|
|
16
|
+
# (`image_size`, `batch_size`, `negative_prompt`, `num_inference_steps`,
|
|
17
|
+
# `guidance_scale`, `cfg`, ...) and returns an `images` array instead of a
|
|
18
|
+
# `data` array. The OpenAI gem's `images.generate` helper therefore does not
|
|
19
|
+
# fit, so — like the TTS/Video adapters — we talk to the endpoint directly
|
|
20
|
+
# with Net::HTTP.
|
|
21
|
+
class ImageGenerationAdapter < LLMAdapter
|
|
22
|
+
SUPPORTED_IMAGE_FORMATS = %w[jpg jpeg png gif bmp webp].freeze
|
|
23
|
+
|
|
24
|
+
# Default resolution for text-to-image generation ("widthxheight").
|
|
25
|
+
# Edit models (Qwen/Qwen-Image-Edit*) ignore this field, so it is only sent
|
|
26
|
+
# for text-to-image calls.
|
|
27
|
+
DEFAULT_IMAGE_SIZE = "1024x1024"
|
|
28
|
+
|
|
29
|
+
def initialize(config)
|
|
30
|
+
super
|
|
31
|
+
api_key = @config["api_key"]
|
|
32
|
+
if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
|
|
33
|
+
api_key = eval(api_key)
|
|
34
|
+
end
|
|
35
|
+
@api_key = api_key
|
|
36
|
+
@base_url = @config["url"].to_s.chomp("/")
|
|
37
|
+
@model = @config["model"]
|
|
38
|
+
|
|
39
|
+
begin
|
|
40
|
+
# Created for parity with the other non-chat adapters; the actual image
|
|
41
|
+
# requests are issued directly below via Net::HTTP.
|
|
42
|
+
@client = OpenAI::Client.new(
|
|
43
|
+
access_token: @api_key,
|
|
44
|
+
uri_base: @config["url"],
|
|
45
|
+
request_timeout: 240,
|
|
46
|
+
)
|
|
47
|
+
rescue OpenAI::ConfigurationError => e
|
|
48
|
+
SmartPrompt.logger.error "Failed to initialize ImageGeneration client: #{e.message}"
|
|
49
|
+
raise LLMAPIError, "Invalid ImageGeneration configuration: #{e.message}"
|
|
50
|
+
rescue OpenAI::Error => e
|
|
51
|
+
SmartPrompt.logger.error "Failed to initialize ImageGeneration client: #{e.message}"
|
|
52
|
+
raise LLMAPIError, "ImageGeneration authentication failed: #{e.message}"
|
|
53
|
+
rescue SocketError => e
|
|
54
|
+
SmartPrompt.logger.error "Failed to initialize ImageGeneration client: #{e.message}"
|
|
55
|
+
raise LLMAPIError, "Network error: Unable to connect to ImageGeneration API"
|
|
56
|
+
rescue => e
|
|
57
|
+
SmartPrompt.logger.error "Failed to initialize ImageGeneration client: #{e.message}"
|
|
58
|
+
raise Error, "Unexpected error initializing ImageGeneration client: #{e.message}"
|
|
59
|
+
ensure
|
|
60
|
+
SmartPrompt.logger.info "Successfully created an ImageGeneration client (model=#{@model})."
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Text-to-image generation.
|
|
65
|
+
#
|
|
66
|
+
# +params+ accepts SiliconFlow-native keys plus a couple of friendly aliases:
|
|
67
|
+
#
|
|
68
|
+
# model:, negative_prompt:,
|
|
69
|
+
# image_size: (alias: size:),
|
|
70
|
+
# batch_size: (alias: n:),
|
|
71
|
+
# seed:, num_inference_steps:, guidance_scale:, cfg:
|
|
72
|
+
#
|
|
73
|
+
# Returns an Array of hashes, e.g. [{ url: "...", b64_json: nil, seed: 123 }].
|
|
74
|
+
def generate_image(prompt, params = {})
|
|
75
|
+
SmartPrompt.logger.info "ImageGenerationAdapter: Generating image from text"
|
|
76
|
+
|
|
77
|
+
raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
|
|
78
|
+
|
|
79
|
+
parameters = build_parameters(prompt, params)
|
|
80
|
+
parameters[:image_size] = resolve_image_size(params)
|
|
81
|
+
# batch_size only applies to a subset of models (e.g. Kolors); send it
|
|
82
|
+
# only when the caller explicitly asks for it.
|
|
83
|
+
batch = params[:batch_size] || params[:n]
|
|
84
|
+
parameters[:batch_size] = batch if batch
|
|
85
|
+
|
|
86
|
+
SmartPrompt.logger.info "Image generation parameters: #{parameters.except(:prompt).inspect}"
|
|
87
|
+
|
|
88
|
+
begin
|
|
89
|
+
response = submit_image_request("/images/generations", parameters)
|
|
90
|
+
@last_response = response
|
|
91
|
+
images = parse_images(response)
|
|
92
|
+
SmartPrompt.logger.info "Successfully generated #{images.size} image(s)"
|
|
93
|
+
images
|
|
94
|
+
rescue LLMAPIError, Error
|
|
95
|
+
raise
|
|
96
|
+
rescue JSON::ParserError => e
|
|
97
|
+
SmartPrompt.logger.error "Failed to parse image generation response: #{e.message}"
|
|
98
|
+
raise LLMAPIError, "Failed to parse image generation response"
|
|
99
|
+
rescue => e
|
|
100
|
+
SmartPrompt.logger.error "Unexpected error during image generation: #{e.message}"
|
|
101
|
+
raise Error, "Unexpected error during image generation: #{e.message}"
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Image editing / image-to-image generation (for Qwen/Qwen-Image-Edit-* and
|
|
106
|
+
# Kolors composable models). +image+ (and optionally +image2+/+image3+) may
|
|
107
|
+
# be a local file path, a base64 data URL, or a public http(s) URL.
|
|
108
|
+
def edit_image(prompt, params = {})
|
|
109
|
+
SmartPrompt.logger.info "ImageGenerationAdapter: Editing image"
|
|
110
|
+
|
|
111
|
+
raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
|
|
112
|
+
raise Error, "An input image is required for image editing" if params[:image].nil? && params[:image_file].nil?
|
|
113
|
+
|
|
114
|
+
normalized = params.dup
|
|
115
|
+
normalized[:image] = normalize_input_image(normalized[:image] || normalized[:image_file])
|
|
116
|
+
normalized[:image2] = normalize_input_image(normalized[:image2]) if normalized[:image2]
|
|
117
|
+
normalized[:image3] = normalize_input_image(normalized[:image3]) if normalized[:image3]
|
|
118
|
+
|
|
119
|
+
# Edit models reject image_size, so we deliberately omit it here.
|
|
120
|
+
parameters = build_parameters(prompt, normalized)
|
|
121
|
+
parameters[:image] = normalized[:image]
|
|
122
|
+
parameters[:image2] = normalized[:image2] if normalized[:image2]
|
|
123
|
+
parameters[:image3] = normalized[:image3] if normalized[:image3]
|
|
124
|
+
|
|
125
|
+
SmartPrompt.logger.info "Image edit parameters: #{parameters.except(:prompt, :image, :image2, :image3).inspect}"
|
|
126
|
+
|
|
127
|
+
begin
|
|
128
|
+
response = submit_image_request("/images/generations", parameters)
|
|
129
|
+
@last_response = response
|
|
130
|
+
images = parse_images(response)
|
|
131
|
+
SmartPrompt.logger.info "Successfully edited image, generated #{images.size} result(s)"
|
|
132
|
+
images
|
|
133
|
+
rescue LLMAPIError, Error
|
|
134
|
+
raise
|
|
135
|
+
rescue JSON::ParserError => e
|
|
136
|
+
SmartPrompt.logger.error "Failed to parse image edit response: #{e.message}"
|
|
137
|
+
raise LLMAPIError, "Failed to parse image edit response"
|
|
138
|
+
rescue => e
|
|
139
|
+
SmartPrompt.logger.error "Unexpected error during image editing: #{e.message}"
|
|
140
|
+
raise Error, "Unexpected error during image editing: #{e.message}"
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Save one or many generated images to disk. Accepts the Array returned by
|
|
145
|
+
# #generate_image/#edit_image or a single image hash. Returns the list of
|
|
146
|
+
# written file paths.
|
|
147
|
+
def save_image(image_data, output_dir = "./output", filename_prefix = "generated_image")
|
|
148
|
+
SmartPrompt.logger.info "ImageGenerationAdapter: Saving image to file"
|
|
149
|
+
|
|
150
|
+
begin
|
|
151
|
+
FileUtils.mkdir_p(output_dir)
|
|
152
|
+
images = image_data.is_a?(Array) ? image_data : [image_data]
|
|
153
|
+
|
|
154
|
+
saved_files = images.each_with_index.map do |img, index|
|
|
155
|
+
save_single_image(img, output_dir, "#{filename_prefix}_#{index + 1}")
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
SmartPrompt.logger.info "Successfully saved #{saved_files.size} image(s) to #{output_dir}"
|
|
159
|
+
saved_files
|
|
160
|
+
rescue => e
|
|
161
|
+
SmartPrompt.logger.error "Error saving image: #{e.message}"
|
|
162
|
+
raise Error, "Error saving image: #{e.message}"
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
private
|
|
167
|
+
|
|
168
|
+
# Assemble the common SiliconFlow request parameters (everything except the
|
|
169
|
+
# text-vs-image specific fields handled by the callers).
|
|
170
|
+
def build_parameters(prompt, params)
|
|
171
|
+
model_name = params[:model] || @model
|
|
172
|
+
if model_name.nil? || model_name.to_s.strip.empty?
|
|
173
|
+
raise Error, "No model configured for image generation (set llm 'model' or pass model:)"
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
parameters = { model: model_name, prompt: prompt.to_s }
|
|
177
|
+
parameters[:negative_prompt] = params[:negative_prompt] if params[:negative_prompt]
|
|
178
|
+
parameters[:seed] = params[:seed] if params[:seed]
|
|
179
|
+
parameters[:num_inference_steps] = params[:num_inference_steps] if params[:num_inference_steps]
|
|
180
|
+
parameters[:guidance_scale] = params[:guidance_scale] if params[:guidance_scale]
|
|
181
|
+
parameters[:cfg] = params[:cfg] if params[:cfg]
|
|
182
|
+
parameters
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def resolve_image_size(params)
|
|
186
|
+
size = params[:image_size] || params[:size]
|
|
187
|
+
size.nil? || size.to_s.strip.empty? ? DEFAULT_IMAGE_SIZE : size.to_s
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# POST a JSON body to the given SiliconFlow path and return the parsed
|
|
191
|
+
# response hash, raising LLMAPIError on non-2xx responses.
|
|
192
|
+
def submit_image_request(path, parameters)
|
|
193
|
+
uri = URI.parse("#{@base_url}#{path}")
|
|
194
|
+
|
|
195
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
196
|
+
http.use_ssl = (uri.scheme == "https")
|
|
197
|
+
http.open_timeout = 30
|
|
198
|
+
http.read_timeout = 240
|
|
199
|
+
|
|
200
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
|
201
|
+
request["Content-Type"] = "application/json"
|
|
202
|
+
request["Authorization"] = "Bearer #{@api_key}"
|
|
203
|
+
request.body = parameters.to_json
|
|
204
|
+
|
|
205
|
+
SmartPrompt.logger.debug "Image request POST #{uri} body=#{parameters.to_json}"
|
|
206
|
+
|
|
207
|
+
response = http.request(request)
|
|
208
|
+
|
|
209
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
210
|
+
JSON.parse(response.body)
|
|
211
|
+
else
|
|
212
|
+
SmartPrompt.logger.error "Image API error: #{response.code} - #{response.body}"
|
|
213
|
+
raise LLMAPIError, "Image generation API error: #{response.code} - #{response.body}"
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Normalize the SiliconFlow `images` response into a uniform Array of
|
|
218
|
+
# symbol-keyed hashes. Falls back to OpenAI's `data` key for compatibility.
|
|
219
|
+
def parse_images(response)
|
|
220
|
+
items = response["images"] || response["data"]
|
|
221
|
+
items = [] unless items.is_a?(Array)
|
|
222
|
+
|
|
223
|
+
if items.empty?
|
|
224
|
+
SmartPrompt.logger.error "No image data in response: #{response.inspect}"
|
|
225
|
+
raise LLMAPIError, "No image data in response"
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
items.map do |image_data|
|
|
229
|
+
{
|
|
230
|
+
url: image_data["url"],
|
|
231
|
+
b64_json: image_data["b64_json"],
|
|
232
|
+
seed: image_data["seed"],
|
|
233
|
+
}
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Accept a local file path, a base64 data URL, or an http(s) URL and return
|
|
238
|
+
# the value SiliconFlow expects in the `image` field.
|
|
239
|
+
def normalize_input_image(image)
|
|
240
|
+
return image if image.nil?
|
|
241
|
+
|
|
242
|
+
if image.is_a?(String)
|
|
243
|
+
return image if image.start_with?("data:")
|
|
244
|
+
return image if image.start_with?("http://", "https://")
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
raise Error, "Image file not found: #{image}" unless File.exist?(image)
|
|
248
|
+
|
|
249
|
+
ext = File.extname(image).downcase.delete(".")
|
|
250
|
+
unless SUPPORTED_IMAGE_FORMATS.include?(ext)
|
|
251
|
+
raise Error, "Unsupported image format: #{ext}"
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
mime = ext == "jpg" ? "jpeg" : ext
|
|
255
|
+
"data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(image))}"
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def save_single_image(image_data, output_dir, filename)
|
|
259
|
+
if image_data[:b64_json]
|
|
260
|
+
file_path = File.join(output_dir, "#{filename}.png")
|
|
261
|
+
File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
|
|
262
|
+
elsif image_data[:url]
|
|
263
|
+
uri = URI.parse(image_data[:url])
|
|
264
|
+
response = Net::HTTP.get_response(uri)
|
|
265
|
+
|
|
266
|
+
raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
267
|
+
|
|
268
|
+
ext = case response["content-type"]
|
|
269
|
+
when "image/jpeg", "image/jpg" then "jpg"
|
|
270
|
+
when "image/png" then "png"
|
|
271
|
+
when "image/gif" then "gif"
|
|
272
|
+
when "image/webp" then "webp"
|
|
273
|
+
else "png"
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
file_path = File.join(output_dir, "#{filename}.#{ext}")
|
|
277
|
+
File.binwrite(file_path, response.body)
|
|
278
|
+
else
|
|
279
|
+
raise Error, "No image data available to save"
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
file_path
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Override send_request to provide a meaningful error for chat operations.
|
|
286
|
+
def send_request(messages, model = nil, temperature = 0.7, tools = nil, proc = nil)
|
|
287
|
+
SmartPrompt.logger.error "ImageGenerationAdapter does not support chat operations. Use generate_image or edit_image instead."
|
|
288
|
+
raise NotImplementedError, "ImageGenerationAdapter does not support chat operations"
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Override embeddings method.
|
|
292
|
+
def embeddings(text, model)
|
|
293
|
+
SmartPrompt.logger.error "ImageGenerationAdapter does not support embeddings operations."
|
|
294
|
+
raise NotImplementedError, "ImageGenerationAdapter does not support embeddings operations"
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
require 'thread'
|
|
2
|
+
|
|
3
|
+
module SmartPrompt
|
|
4
|
+
# LRUCache implements a Least Recently Used cache with size limit enforcement
|
|
5
|
+
# Thread-safe implementation for managing session cache
|
|
6
|
+
class LRUCache
|
|
7
|
+
attr_reader :max_size
|
|
8
|
+
|
|
9
|
+
def initialize(max_size = nil)
|
|
10
|
+
@max_size = max_size
|
|
11
|
+
@cache = {}
|
|
12
|
+
@access_times = {}
|
|
13
|
+
@mutex = Mutex.new
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Get a value from the cache
|
|
17
|
+
# Updates access time for LRU tracking
|
|
18
|
+
def get(key)
|
|
19
|
+
@mutex.synchronize do
|
|
20
|
+
if @cache.key?(key)
|
|
21
|
+
@access_times[key] = Time.now
|
|
22
|
+
@cache[key]
|
|
23
|
+
else
|
|
24
|
+
nil
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Put a value into the cache
|
|
30
|
+
# Enforces size limit by evicting least recently used entry if needed
|
|
31
|
+
def put(key, value)
|
|
32
|
+
@mutex.synchronize do
|
|
33
|
+
# If key already exists, just update it
|
|
34
|
+
if @cache.key?(key)
|
|
35
|
+
@cache[key] = value
|
|
36
|
+
@access_times[key] = Time.now
|
|
37
|
+
return value
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Enforce size limit before adding new entry
|
|
41
|
+
if @max_size && @cache.size >= @max_size
|
|
42
|
+
evict_lru
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Add new entry
|
|
46
|
+
@cache[key] = value
|
|
47
|
+
@access_times[key] = Time.now
|
|
48
|
+
value
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Check if a key exists in the cache
|
|
53
|
+
def key?(key)
|
|
54
|
+
@mutex.synchronize do
|
|
55
|
+
@cache.key?(key)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Delete a key from the cache
|
|
60
|
+
def delete(key)
|
|
61
|
+
@mutex.synchronize do
|
|
62
|
+
@access_times.delete(key)
|
|
63
|
+
@cache.delete(key)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Get all keys in the cache
|
|
68
|
+
def keys
|
|
69
|
+
@mutex.synchronize do
|
|
70
|
+
@cache.keys
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Get the current size of the cache
|
|
75
|
+
def size
|
|
76
|
+
@mutex.synchronize do
|
|
77
|
+
@cache.size
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Get the least recently used key
|
|
82
|
+
def lru_key
|
|
83
|
+
@mutex.synchronize do
|
|
84
|
+
return nil if @access_times.empty?
|
|
85
|
+
@access_times.min_by { |_, time| time }&.first
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Clear all entries from the cache
|
|
90
|
+
def clear
|
|
91
|
+
@mutex.synchronize do
|
|
92
|
+
@cache.clear
|
|
93
|
+
@access_times.clear
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Get all values in the cache
|
|
98
|
+
def values
|
|
99
|
+
@mutex.synchronize do
|
|
100
|
+
@cache.values
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Check if cache is empty
|
|
105
|
+
def empty?
|
|
106
|
+
@mutex.synchronize do
|
|
107
|
+
@cache.empty?
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Iterate over cache entries
|
|
112
|
+
def each(&block)
|
|
113
|
+
@mutex.synchronize do
|
|
114
|
+
@cache.each(&block)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private
|
|
119
|
+
|
|
120
|
+
# Evict the least recently used entry from the cache
|
|
121
|
+
# This method is called within a mutex, so no need to synchronize again
|
|
122
|
+
def evict_lru
|
|
123
|
+
return if @access_times.empty?
|
|
124
|
+
|
|
125
|
+
lru_key = @access_times.min_by { |_, time| time }&.first
|
|
126
|
+
if lru_key
|
|
127
|
+
@cache.delete(lru_key)
|
|
128
|
+
@access_times.delete(lru_key)
|
|
129
|
+
SmartPrompt.logger.info "LRU cache evicted key: #{lru_key}" if defined?(SmartPrompt.logger)
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|