smart_prompt 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -10
- data/README.cn.md +307 -64
- data/README.md +311 -64
- data/Rakefile +10 -1
- data/config/anthropic_config.yml +151 -0
- data/config/image_generation_config.yml +22 -0
- data/config/multimodal_config.yml +85 -0
- data/config/sensenova_config.yml +63 -0
- data/config/zhipu_config.yml +73 -0
- data/examples/anthropic_basic_chat.rb +143 -0
- data/examples/anthropic_example.rb +232 -0
- data/examples/anthropic_multimodal.rb +212 -0
- data/examples/anthropic_streaming.rb +312 -0
- data/examples/anthropic_tool_calling.rb +393 -0
- data/examples/automatic_cleanup_example.rb +109 -0
- data/examples/history_management_examples.rb +522 -0
- data/examples/image_generation_example.rb +130 -0
- data/examples/monitoring_example.rb +121 -0
- data/examples/multimodal_example.rb +63 -0
- data/examples/relevance_based_strategy_example.rb +87 -0
- data/examples/sensenova_example.rb +129 -0
- data/examples/stt_example.rb +287 -0
- data/examples/tts_example.rb +244 -0
- data/examples/video_generation_example.rb +189 -0
- data/examples/zhipu_example.rb +151 -0
- data/lib/smart_prompt/anthropic_adapter.rb +363 -281
- data/lib/smart_prompt/compression_engine.rb +201 -0
- data/lib/smart_prompt/context_strategy.rb +22 -0
- data/lib/smart_prompt/conversation.rb +81 -149
- data/lib/smart_prompt/engine.rb +36 -19
- data/lib/smart_prompt/history_manager.rb +596 -0
- data/lib/smart_prompt/hybrid_strategy.rb +222 -0
- data/lib/smart_prompt/image_generation_adapter.rb +297 -0
- data/lib/smart_prompt/lru_cache.rb +133 -0
- data/lib/smart_prompt/message.rb +57 -0
- data/lib/smart_prompt/multimodal_adapter.rb +277 -0
- data/lib/smart_prompt/openai_adapter.rb +1 -25
- data/lib/smart_prompt/persistence_layer.rb +197 -0
- data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
- data/lib/smart_prompt/sensenova_adapter.rb +410 -0
- data/lib/smart_prompt/session.rb +140 -0
- data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
- data/lib/smart_prompt/stt_adapter.rb +381 -0
- data/lib/smart_prompt/summary_based_strategy.rb +152 -0
- data/lib/smart_prompt/token_counter.rb +74 -0
- data/lib/smart_prompt/tts_adapter.rb +403 -0
- data/lib/smart_prompt/version.rb +1 -1
- data/lib/smart_prompt/video_generation_adapter.rb +330 -0
- data/lib/smart_prompt/worker.rb +25 -3
- data/lib/smart_prompt/zhipu_adapter.rb +616 -0
- data/lib/smart_prompt.rb +22 -2
- data/workers/history_management_examples.rb +407 -0
- data/workers/image_generation_workers.rb +119 -0
- data/workers/multimodal_workers.rb +110 -0
- data/workers/sensenova_workers.rb +62 -0
- data/workers/stt_workers.rb +195 -0
- data/workers/tts_workers.rb +388 -0
- data/workers/video_generation_workers.rb +264 -0
- data/workers/zhipu_workers.rb +113 -0
- metadata +95 -5
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
require_relative 'context_strategy'
|
|
2
|
+
|
|
3
|
+
module SmartPrompt
|
|
4
|
+
# RelevanceBasedStrategy implements a context selection strategy based on
|
|
5
|
+
# semantic relevance and importance scoring
|
|
6
|
+
#
|
|
7
|
+
# This strategy:
|
|
8
|
+
# - Calculates importance scores combining recency and relevance
|
|
9
|
+
# - Selects top-k most important messages
|
|
10
|
+
# - Supports both keyword-based and embedding-based similarity
|
|
11
|
+
# - Maintains temporal ordering of selected messages
|
|
12
|
+
# - Trims to token limits while preserving important context
|
|
13
|
+
class RelevanceBasedStrategy
|
|
14
|
+
include ContextStrategy
|
|
15
|
+
|
|
16
|
+
# Initialize the relevance-based strategy
|
|
17
|
+
# @param config [Hash] Configuration options
|
|
18
|
+
# @option config [Integer] :top_k (10) Number of top messages to select
|
|
19
|
+
# @option config [Float] :recency_weight (0.3) Weight for recency in scoring (0.0-1.0)
|
|
20
|
+
# @option config [Float] :relevance_weight (0.7) Weight for relevance in scoring (0.0-1.0)
|
|
21
|
+
# @option config [Object] :embedding_service (nil) Optional embedding service for semantic similarity
|
|
22
|
+
def initialize(config = {})
|
|
23
|
+
@top_k = config[:top_k] || 10
|
|
24
|
+
@recency_weight = config[:recency_weight] || 0.3
|
|
25
|
+
@relevance_weight = config[:relevance_weight] || 0.7
|
|
26
|
+
@embedding_service = config[:embedding_service]
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Select messages based on relevance and importance
|
|
30
|
+
# @param messages [Array<Message>] All messages in the session
|
|
31
|
+
# @param max_tokens [Integer, nil] Maximum token limit for selected messages
|
|
32
|
+
# @param current_message [Message, nil] The current message for relevance calculation
|
|
33
|
+
# @return [Array<Message>] Selected messages ordered by timestamp
|
|
34
|
+
def select_messages(messages, max_tokens, current_message = nil)
|
|
35
|
+
return [] if messages.nil? || messages.empty?
|
|
36
|
+
|
|
37
|
+
# If no current message, fall back to recency-only selection
|
|
38
|
+
unless current_message
|
|
39
|
+
log_debug "No current message provided, falling back to recency-based selection"
|
|
40
|
+
return select_by_recency(messages, max_tokens)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Calculate importance score for each message
|
|
44
|
+
scored_messages = messages.map.with_index do |msg, idx|
|
|
45
|
+
score = calculate_score(msg, idx, messages.length, current_message)
|
|
46
|
+
[msg, score]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Log top scores for debugging
|
|
50
|
+
top_scores = scored_messages.sort_by { |_, score| -score }.take(5).map { |_, s| s.round(3) }
|
|
51
|
+
log_debug "RelevanceBasedStrategy: calculated scores for #{messages.count} messages, top 5 scores: #{top_scores.inspect}"
|
|
52
|
+
|
|
53
|
+
# Sort by score (descending) and take top-k
|
|
54
|
+
selected = scored_messages
|
|
55
|
+
.sort_by { |_, score| -score }
|
|
56
|
+
.take(@top_k)
|
|
57
|
+
.map(&:first)
|
|
58
|
+
|
|
59
|
+
log_debug "RelevanceBasedStrategy: selected top #{selected.count}/#{messages.count} messages by importance (recency_weight=#{@recency_weight}, relevance_weight=#{@relevance_weight})"
|
|
60
|
+
|
|
61
|
+
# Re-order by timestamp to maintain conversation flow
|
|
62
|
+
selected = selected.sort_by(&:timestamp)
|
|
63
|
+
|
|
64
|
+
# Trim to token limit if specified
|
|
65
|
+
result = max_tokens ? trim_to_token_limit(selected, max_tokens) : selected
|
|
66
|
+
|
|
67
|
+
if max_tokens && result.count < selected.count
|
|
68
|
+
tokens_before = selected.sum { |m| m.token_count || 0 }
|
|
69
|
+
tokens_after = result.sum { |m| m.token_count || 0 }
|
|
70
|
+
log_debug "RelevanceBasedStrategy: trimmed to token limit #{max_tokens}: #{selected.count} -> #{result.count} messages, #{tokens_before} -> #{tokens_after} tokens"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
result
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Determine if compression should be triggered
|
|
77
|
+
# Recommends compression when message count exceeds 3x top_k
|
|
78
|
+
# @param session [Session] The session to evaluate
|
|
79
|
+
# @return [Boolean] true if message count > 3 * top_k
|
|
80
|
+
def should_compress?(session)
|
|
81
|
+
session.message_count > @top_k * 3
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
# Calculate combined importance score for a message
|
|
87
|
+
# @param message [Message] The message to score
|
|
88
|
+
# @param index [Integer] Position of message in the session
|
|
89
|
+
# @param total [Integer] Total number of messages
|
|
90
|
+
# @param current_message [Message] Current message for relevance comparison
|
|
91
|
+
# @return [Float] Combined score (0.0-1.0)
|
|
92
|
+
def calculate_score(message, index, total, current_message)
|
|
93
|
+
# Calculate recency score (newer messages score higher)
|
|
94
|
+
recency_score = total > 1 ? index.to_f / (total - 1) : 1.0
|
|
95
|
+
|
|
96
|
+
# Calculate relevance score
|
|
97
|
+
relevance_score = if @embedding_service
|
|
98
|
+
calculate_semantic_similarity(message, current_message)
|
|
99
|
+
else
|
|
100
|
+
calculate_keyword_similarity(message, current_message)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Combine scores with configured weights
|
|
104
|
+
@recency_weight * recency_score + @relevance_weight * relevance_score
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Calculate semantic similarity using embeddings
|
|
108
|
+
# @param msg1 [Message] First message
|
|
109
|
+
# @param msg2 [Message] Second message
|
|
110
|
+
# @return [Float] Cosine similarity (0.0-1.0)
|
|
111
|
+
def calculate_semantic_similarity(msg1, msg2)
|
|
112
|
+
begin
|
|
113
|
+
emb1 = @embedding_service.get_embedding(msg1.content)
|
|
114
|
+
emb2 = @embedding_service.get_embedding(msg2.content)
|
|
115
|
+
cosine_similarity(emb1, emb2)
|
|
116
|
+
rescue => e
|
|
117
|
+
SmartPrompt.logger.warn "Embedding similarity failed: #{e.message}, falling back to keyword similarity"
|
|
118
|
+
calculate_keyword_similarity(msg1, msg2)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Calculate keyword-based similarity using Jaccard index
|
|
123
|
+
# @param msg1 [Message] First message
|
|
124
|
+
# @param msg2 [Message] Second message
|
|
125
|
+
# @return [Float] Jaccard similarity (0.0-1.0)
|
|
126
|
+
def calculate_keyword_similarity(msg1, msg2)
|
|
127
|
+
# Extract words and normalize
|
|
128
|
+
words1 = extract_words(msg1.content)
|
|
129
|
+
words2 = extract_words(msg2.content)
|
|
130
|
+
|
|
131
|
+
# Handle empty content
|
|
132
|
+
return 0.0 if words1.empty? || words2.empty?
|
|
133
|
+
|
|
134
|
+
# Calculate Jaccard similarity: |intersection| / |union|
|
|
135
|
+
intersection = (words1 & words2).length
|
|
136
|
+
union = (words1 | words2).length
|
|
137
|
+
|
|
138
|
+
union > 0 ? intersection.to_f / union : 0.0
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Extract and normalize words from text
|
|
142
|
+
# @param text [String] Text to process
|
|
143
|
+
# @return [Array<String>] Normalized words
|
|
144
|
+
def extract_words(text)
|
|
145
|
+
return [] if text.nil? || text.empty?
|
|
146
|
+
|
|
147
|
+
# Convert to lowercase, extract words, remove common stop words
|
|
148
|
+
words = text.downcase.scan(/\b\w+\b/)
|
|
149
|
+
|
|
150
|
+
# Remove very short words (likely not meaningful)
|
|
151
|
+
words.select { |w| w.length > 2 }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Calculate cosine similarity between two vectors
|
|
155
|
+
# @param vec1 [Array<Float>] First vector
|
|
156
|
+
# @param vec2 [Array<Float>] Second vector
|
|
157
|
+
# @return [Float] Cosine similarity (0.0-1.0)
|
|
158
|
+
def cosine_similarity(vec1, vec2)
|
|
159
|
+
return 0.0 if vec1.nil? || vec2.nil? || vec1.empty? || vec2.empty?
|
|
160
|
+
return 0.0 if vec1.length != vec2.length
|
|
161
|
+
|
|
162
|
+
# Calculate dot product
|
|
163
|
+
dot_product = vec1.zip(vec2).map { |a, b| a * b }.sum
|
|
164
|
+
|
|
165
|
+
# Calculate magnitudes
|
|
166
|
+
magnitude1 = Math.sqrt(vec1.map { |x| x * x }.sum)
|
|
167
|
+
magnitude2 = Math.sqrt(vec2.map { |x| x * x }.sum)
|
|
168
|
+
|
|
169
|
+
# Avoid division by zero
|
|
170
|
+
return 0.0 if magnitude1 == 0.0 || magnitude2 == 0.0
|
|
171
|
+
|
|
172
|
+
# Return cosine similarity (normalized to 0-1 range)
|
|
173
|
+
similarity = dot_product / (magnitude1 * magnitude2)
|
|
174
|
+
|
|
175
|
+
# Clamp to [0, 1] range (cosine can be negative for opposite vectors)
|
|
176
|
+
[[similarity, 0.0].max, 1.0].min
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Select messages by recency only (fallback when no current message)
|
|
180
|
+
# @param messages [Array<Message>] All messages
|
|
181
|
+
# @param max_tokens [Integer, nil] Maximum token limit
|
|
182
|
+
# @return [Array<Message>] Most recent messages
|
|
183
|
+
def select_by_recency(messages, max_tokens)
|
|
184
|
+
selected = messages.last(@top_k)
|
|
185
|
+
max_tokens ? trim_to_token_limit(selected, max_tokens) : selected
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Trim messages to fit within token limit
|
|
189
|
+
# Removes messages from the beginning (oldest first) until within limit
|
|
190
|
+
# @param messages [Array<Message>] Messages to trim
|
|
191
|
+
# @param max_tokens [Integer] Maximum token limit
|
|
192
|
+
# @return [Array<Message>] Trimmed messages
|
|
193
|
+
def trim_to_token_limit(messages, max_tokens)
|
|
194
|
+
return messages unless max_tokens
|
|
195
|
+
return [] if messages.empty?
|
|
196
|
+
|
|
197
|
+
# Calculate tokens from newest to oldest, keeping messages that fit
|
|
198
|
+
total = 0
|
|
199
|
+
selected = []
|
|
200
|
+
|
|
201
|
+
messages.reverse_each do |msg|
|
|
202
|
+
msg_tokens = msg.token_count || 0
|
|
203
|
+
if total + msg_tokens <= max_tokens
|
|
204
|
+
selected.unshift(msg)
|
|
205
|
+
total += msg_tokens
|
|
206
|
+
else
|
|
207
|
+
# Stop adding messages once we exceed the limit
|
|
208
|
+
break
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
selected
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Logging helper methods
|
|
216
|
+
def log_debug(message)
|
|
217
|
+
return unless SmartPrompt.logger
|
|
218
|
+
SmartPrompt.logger.debug "[RelevanceBasedStrategy] #{message}"
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
require "base64"
|
|
2
|
+
require "json"
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "fileutils"
|
|
6
|
+
|
|
7
|
+
module SmartPrompt
|
|
8
|
+
# Adapter for SenseNova (商汤 日日新) — the SenseCore large-model platform.
|
|
9
|
+
#
|
|
10
|
+
# One adapter owns the whole provider: all four documented model categories share the
|
|
11
|
+
# same `api.sensenova.cn` domain and Bearer-token auth, so a single config block serves
|
|
12
|
+
# them just by changing `model`.
|
|
13
|
+
#
|
|
14
|
+
# 1. 商量 文本对话 / 多模态 (chat + vision) — OpenAI-compatible
|
|
15
|
+
# POST {url}/chat/completions (url defaults to .../compatible-mode/v2)
|
|
16
|
+
# Streaming is SSE; the model may emit a `reasoning`/`delta.reasoning` field on
|
|
17
|
+
# reasoning models, which we remap to OpenAI's `reasoning_content` so the engine's
|
|
18
|
+
# stream aggregator (Engine#@stream_proc) keeps working unchanged.
|
|
19
|
+
# 2. Cupido 向量模型 (embeddings) — native, non-OpenAI response shape
|
|
20
|
+
# POST {embeddings_url} (defaults to .../v1/llm/embeddings)
|
|
21
|
+
# Body {model, input:[...]}; response {embeddings:[{index, embedding, ...}]}.
|
|
22
|
+
# 3. 秒画 文生图 (text-to-image) — OpenAI-compatible /images/generations
|
|
23
|
+
# POST {image_url} (native /v1 base, e.g. .../v1/images/generations;
|
|
24
|
+
# NOT under compatible-mode/v2, which 404s)
|
|
25
|
+
#
|
|
26
|
+
# We talk to the endpoints directly with Net::HTTP (like the image/tts/stt adapters)
|
|
27
|
+
# rather than the `openai` gem, because we must surface SenseNova's `reasoning` field,
|
|
28
|
+
# remap streaming deltas, and handle the native embeddings shape. No new gem deps.
|
|
29
|
+
class SenseNovaAdapter < LLMAdapter
|
|
30
|
+
DEFAULT_BASE_URL = "https://api.sensenova.cn/compatible-mode/v2".freeze
|
|
31
|
+
DEFAULT_EMBEDDINGS_URL = "https://api.sensenova.cn/v1/llm/embeddings".freeze
|
|
32
|
+
# 秒画 text-to-image (sensenova-u1-fast) lives on the token.sensenova.cn /v1 base
|
|
33
|
+
# (confirmed working 2026-06-19). NOT under compatible-mode/v2, which 404s.
|
|
34
|
+
DEFAULT_IMAGE_URL = "https://token.sensenova.cn/v1/images/generations".freeze
|
|
35
|
+
# Sizes accepted by sensenova-u1-fast (the API 400s on anything else, e.g. 1024x1024).
|
|
36
|
+
VALID_IMAGE_SIZES = %w[
|
|
37
|
+
1664x2496 2496x1664 1760x2368 2368x1760 1824x2272 2272x1824
|
|
38
|
+
2048x2048 2752x1536 1536x2752 3072x1376 1344x3136 2560x720 3072x864
|
|
39
|
+
].freeze
|
|
40
|
+
DEFAULT_IMAGE_SIZE = "2048x2048".freeze
|
|
41
|
+
SUPPORTED_IMAGE_FORMATS = %w[jpg jpeg png gif bmp webp].freeze
|
|
42
|
+
|
|
43
|
+
# SenseNova sampling parameters forwarded from config to the chat request when present.
|
|
44
|
+
CHAT_OPTIONAL_KEYS = %w[
|
|
45
|
+
top_p top_k min_p presence_penalty frequency_penalty repetition_penalty
|
|
46
|
+
reasoning_effort max_completion_tokens max_tokens
|
|
47
|
+
].freeze
|
|
48
|
+
|
|
49
|
+
def initialize(config)
|
|
50
|
+
super
|
|
51
|
+
SmartPrompt.logger.info "Start create the SmartPrompt SenseNovaAdapter."
|
|
52
|
+
|
|
53
|
+
api_key = @config["api_key"]
|
|
54
|
+
if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
|
|
55
|
+
api_key = eval(api_key)
|
|
56
|
+
end
|
|
57
|
+
# Match the other adapters: tolerate a missing key at construction (e.g. when the
|
|
58
|
+
# ENV var isn't set yet) and let the first request fail with a clear auth error.
|
|
59
|
+
SmartPrompt.logger.warn "SenseNova api_key is empty — API calls will fail until it is set." if api_key.nil? || api_key.to_s.strip.empty?
|
|
60
|
+
|
|
61
|
+
@api_key = api_key
|
|
62
|
+
@base_url = (@config["url"] || DEFAULT_BASE_URL).to_s.chomp("/")
|
|
63
|
+
@embeddings_url = (@config["embeddings_url"] || DEFAULT_EMBEDDINGS_URL).to_s
|
|
64
|
+
# 秒画 image generation lives on the native /v1 base (NOT compatible-mode/v2),
|
|
65
|
+
# e.g. https://api.sensenova.cn/v1/images/generations. Override per-llm if needed.
|
|
66
|
+
@image_url = (@config["image_url"] || DEFAULT_IMAGE_URL).to_s
|
|
67
|
+
SmartPrompt.logger.info "SenseNova base_url=#{@base_url}"
|
|
68
|
+
rescue => e
|
|
69
|
+
SmartPrompt.logger.error "Failed to initialize SenseNova client: #{e.message}"
|
|
70
|
+
raise e.is_a?(SmartPrompt::Error) ? e : LLMAPIError, "Invalid SenseNova configuration: #{e.message}"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
public
|
|
74
|
+
|
|
75
|
+
# Chat / multimodal request.
|
|
76
|
+
#
|
|
77
|
+
# Non-streaming returns a full OpenAI-format hash (so last_response carries usage +
|
|
78
|
+
# reasoning); streaming calls +proc+ with each OpenAI-shaped chunk and returns nil.
|
|
79
|
+
def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
|
|
80
|
+
model_name = model || @config["model"]
|
|
81
|
+
body = build_chat_body(messages, model_name, temperature, tools)
|
|
82
|
+
SmartPrompt.logger.info "SenseNovaAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
|
|
83
|
+
|
|
84
|
+
if proc
|
|
85
|
+
body["stream"] = true
|
|
86
|
+
stream_chat("#{@base_url}/chat/completions", body) { |data| proc.call(build_stream_chunk(data), 0) }
|
|
87
|
+
SmartPrompt.logger.info "SenseNovaAdapter: streaming request finished"
|
|
88
|
+
nil
|
|
89
|
+
else
|
|
90
|
+
raw = http_post_json("#{@base_url}/chat/completions", body)
|
|
91
|
+
response = build_completion_response(raw)
|
|
92
|
+
@last_response = response
|
|
93
|
+
SmartPrompt.logger.info "SenseNovaAdapter: received chat response"
|
|
94
|
+
response
|
|
95
|
+
end
|
|
96
|
+
rescue LLMAPIError, Error
|
|
97
|
+
raise
|
|
98
|
+
rescue => e
|
|
99
|
+
SmartPrompt.logger.error "SenseNova chat error: #{e.message}"
|
|
100
|
+
raise LLMAPIError, "Failed to call SenseNova chat: #{e.message}"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Cupido embeddings. SenseNova's native endpoint takes {model, input:[...]} and
|
|
104
|
+
# returns {embeddings:[{index, embedding:[...], ...}]}; we surface the first vector.
|
|
105
|
+
def embeddings(text, model)
|
|
106
|
+
model_name = model || @config["embedding_model"] || @config["model"]
|
|
107
|
+
SmartPrompt.logger.info "SenseNovaAdapter: embeddings model=#{model_name}"
|
|
108
|
+
|
|
109
|
+
body = { "model" => model_name, "input" => [text.to_s] }
|
|
110
|
+
response =
|
|
111
|
+
begin
|
|
112
|
+
http_post_json(@embeddings_url, body)
|
|
113
|
+
rescue LLMAPIError, Error
|
|
114
|
+
raise
|
|
115
|
+
rescue => e
|
|
116
|
+
raise LLMAPIError, "Failed to call SenseNova embeddings: #{e.message}"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
items = response["embeddings"] || response["data"]
|
|
120
|
+
unless items.is_a?(Array) && items.any? && items[0]["embedding"]
|
|
121
|
+
raise LLMAPIError, "No embedding vector in SenseNova response: #{response.inspect}"
|
|
122
|
+
end
|
|
123
|
+
items[0]["embedding"]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# 秒画 text-to-image via the OpenAI-compatible /images/generations endpoint.
|
|
127
|
+
# Response is parsed defensively (OpenAI `data[]` or SenseNova `images[]`).
|
|
128
|
+
# Returns an Array of {url:, b64_json:, seed:}.
|
|
129
|
+
def generate_image(prompt, params = {})
|
|
130
|
+
SmartPrompt.logger.info "SenseNovaAdapter: generating image"
|
|
131
|
+
raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
|
|
132
|
+
|
|
133
|
+
model_name = params[:model] || @config["image_model"] || @config["model"]
|
|
134
|
+
raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
|
|
135
|
+
|
|
136
|
+
body = { "model" => model_name, "prompt" => prompt.to_s }
|
|
137
|
+
body["n"] = params[:n] if params[:n]
|
|
138
|
+
body["size"] = resolve_image_size(params[:size] || params[:image_size])
|
|
139
|
+
body["response_format"] = params[:response_format] if params[:response_format]
|
|
140
|
+
body["negative_prompt"] = params[:negative_prompt] if params[:negative_prompt]
|
|
141
|
+
body["seed"] = params[:seed] if params[:seed]
|
|
142
|
+
body["num_inference_steps"] = params[:num_inference_steps] if params[:num_inference_steps]
|
|
143
|
+
body["guidance_scale"] = params[:guidance_scale] if params[:guidance_scale]
|
|
144
|
+
|
|
145
|
+
SmartPrompt.logger.info "SenseNova image params: #{body.except('prompt').inspect}"
|
|
146
|
+
|
|
147
|
+
response =
|
|
148
|
+
begin
|
|
149
|
+
http_post_json(@image_url, body)
|
|
150
|
+
rescue LLMAPIError, Error
|
|
151
|
+
raise
|
|
152
|
+
rescue => e
|
|
153
|
+
raise Error, "Failed to call SenseNova image generation: #{e.message}"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
items = response["data"] || response["images"]
|
|
157
|
+
unless items.is_a?(Array) && items.any?
|
|
158
|
+
SmartPrompt.logger.error "SenseNova image response had no data: #{response.inspect}"
|
|
159
|
+
raise LLMAPIError, "No image data in SenseNova response"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
images = items.map do |d|
|
|
163
|
+
{ url: d["url"], b64_json: d["b64_json"], seed: d["seed"] }
|
|
164
|
+
end
|
|
165
|
+
SmartPrompt.logger.info "SenseNovaAdapter: generated #{images.size} image(s)"
|
|
166
|
+
images
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Save one or many generated images to disk (Array from #generate_image or a single hash).
|
|
170
|
+
def save_image(image_data, output_dir = "./output", filename_prefix = "sensenova_image")
|
|
171
|
+
FileUtils.mkdir_p(output_dir)
|
|
172
|
+
images = image_data.is_a?(Array) ? image_data : [image_data]
|
|
173
|
+
saved = images.each_with_index.map do |img, index|
|
|
174
|
+
save_single_image(img, output_dir, "#{filename_prefix}_#{index + 1}")
|
|
175
|
+
end
|
|
176
|
+
SmartPrompt.logger.info "Saved #{saved.size} SenseNova image(s) to #{output_dir}"
|
|
177
|
+
saved
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
private
|
|
181
|
+
|
|
182
|
+
# ---- chat request building ------------------------------------------------
|
|
183
|
+
|
|
184
|
+
def build_chat_body(messages, model_name, temperature, tools)
|
|
185
|
+
body = {
|
|
186
|
+
"model" => model_name,
|
|
187
|
+
"messages" => process_multimodal_messages(messages),
|
|
188
|
+
"temperature" => @config["temperature"] || temperature || 0.7,
|
|
189
|
+
}
|
|
190
|
+
CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
|
|
191
|
+
body["tools"] = tools if tools && !tools.empty?
|
|
192
|
+
body
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Pass messages through, normalizing any multimodal content. Local image paths inside
|
|
196
|
+
# image_url.url are converted to data: URLs; http(s)/data URLs and plain text pass through.
|
|
197
|
+
def process_multimodal_messages(messages)
|
|
198
|
+
messages.map do |msg|
|
|
199
|
+
role = msg[:role] || msg["role"]
|
|
200
|
+
content = msg[:content] || msg["content"]
|
|
201
|
+
content = content.map { |item| normalize_content_item(item) } if content.is_a?(Array)
|
|
202
|
+
{ "role" => role, "content" => content }
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def normalize_content_item(item)
|
|
207
|
+
return { "type" => "text", "text" => item.to_s } unless item.is_a?(Hash)
|
|
208
|
+
|
|
209
|
+
type = item[:type] || item["type"]
|
|
210
|
+
if type == "image_url"
|
|
211
|
+
iu = item[:image_url] || item["image_url"]
|
|
212
|
+
url = iu.is_a?(Hash) ? (iu[:url] || iu["url"]) : iu
|
|
213
|
+
{ "type" => "image_url", "image_url" => { "url" => normalize_image_url(url) } }
|
|
214
|
+
else
|
|
215
|
+
stringify_hash(item)
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def normalize_image_url(url)
|
|
220
|
+
return url if url.nil?
|
|
221
|
+
return url if url.start_with?("http://", "https://", "data:")
|
|
222
|
+
|
|
223
|
+
raise Error, "Image file not found: #{url}" unless File.exist?(url)
|
|
224
|
+
ext = File.extname(url).downcase.delete(".")
|
|
225
|
+
raise Error, "Unsupported image format: #{ext}" unless SUPPORTED_IMAGE_FORMATS.include?(ext)
|
|
226
|
+
mime = ext == "jpg" ? "jpeg" : ext
|
|
227
|
+
"data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(url))}"
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# ---- response shaping -----------------------------------------------------
|
|
231
|
+
|
|
232
|
+
# Convert a non-streaming SenseNova response into the OpenAI completion shape the
|
|
233
|
+
# rest of SmartPrompt expects, surfacing the reasoning model's `reasoning` field.
|
|
234
|
+
def build_completion_response(raw)
|
|
235
|
+
msg = raw.dig("choices", 0, "message") || {}
|
|
236
|
+
message = { "role" => msg["role"] || "assistant" }
|
|
237
|
+
message["content"] = msg["content"]
|
|
238
|
+
message["reasoning_content"] = msg["reasoning"] if msg["reasoning"]
|
|
239
|
+
message["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
|
|
240
|
+
|
|
241
|
+
response = {
|
|
242
|
+
"id" => raw["id"],
|
|
243
|
+
"object" => raw["object"] || "chat.completion",
|
|
244
|
+
"created" => raw["created"],
|
|
245
|
+
"model" => raw["model"],
|
|
246
|
+
"choices" => [{
|
|
247
|
+
"index" => 0,
|
|
248
|
+
"message" => message,
|
|
249
|
+
"finish_reason" => raw.dig("choices", 0, "finish_reason"),
|
|
250
|
+
}],
|
|
251
|
+
}
|
|
252
|
+
response["usage"] = raw["usage"] if raw["usage"]
|
|
253
|
+
response["system_fingerprint"] = raw["system_fingerprint"] if raw["system_fingerprint"]
|
|
254
|
+
response
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Convert one SSE event from SenseNova's stream into an OpenAI-style streaming chunk.
|
|
258
|
+
# The key remap is delta.reasoning -> delta.reasoning_content, which is what
|
|
259
|
+
# Engine#@stream_proc reads for reasoning models.
|
|
260
|
+
def build_stream_chunk(data)
|
|
261
|
+
chunk = {
|
|
262
|
+
"id" => data["id"],
|
|
263
|
+
"object" => data["object"],
|
|
264
|
+
"created" => data["created"],
|
|
265
|
+
"model" => data["model"],
|
|
266
|
+
}
|
|
267
|
+
chunk["usage"] = data["usage"] if data["usage"]
|
|
268
|
+
chunk["system_fingerprint"] = data["system_fingerprint"] if data["system_fingerprint"]
|
|
269
|
+
|
|
270
|
+
choices = data["choices"] || []
|
|
271
|
+
if choices.any?
|
|
272
|
+
delta = choices[0]["delta"] || {}
|
|
273
|
+
new_delta = {}
|
|
274
|
+
new_delta["role"] = delta["role"] if delta["role"]
|
|
275
|
+
new_delta["content"] = delta["content"] if delta["content"]
|
|
276
|
+
new_delta["reasoning_content"] = delta["reasoning"] if delta["reasoning"]
|
|
277
|
+
new_delta["tool_calls"] = delta["tool_calls"] if delta["tool_calls"]
|
|
278
|
+
chunk["choices"] = [{
|
|
279
|
+
"index" => choices[0]["index"] || 0,
|
|
280
|
+
"delta" => new_delta,
|
|
281
|
+
"finish_reason" => choices[0]["finish_reason"],
|
|
282
|
+
}]
|
|
283
|
+
else
|
|
284
|
+
# Usage-only final event (choices is an empty array).
|
|
285
|
+
chunk["choices"] = []
|
|
286
|
+
end
|
|
287
|
+
chunk
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# ---- HTTP -----------------------------------------------------------------
|
|
291
|
+
|
|
292
|
+
def http_post_json(url, body)
|
|
293
|
+
uri = URI.parse(url)
|
|
294
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
295
|
+
http.use_ssl = (uri.scheme == "https")
|
|
296
|
+
http.open_timeout = 30
|
|
297
|
+
http.read_timeout = 240
|
|
298
|
+
|
|
299
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
|
300
|
+
request["Content-Type"] = "application/json"
|
|
301
|
+
request["Authorization"] = "Bearer #{@api_key}"
|
|
302
|
+
request.body = body.to_json
|
|
303
|
+
|
|
304
|
+
SmartPrompt.logger.debug "SenseNova POST #{uri} body=#{body.to_json}"
|
|
305
|
+
response = http.request(request)
|
|
306
|
+
|
|
307
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
308
|
+
JSON.parse(response.body)
|
|
309
|
+
else
|
|
310
|
+
SmartPrompt.logger.error "SenseNova API error: #{response.code} - #{response.body}"
|
|
311
|
+
raise LLMAPIError, "SenseNova API error: #{response.code} - #{response.body}"
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# POST with stream:true and yield each parsed SSE `data:` payload to the block.
|
|
316
|
+
def stream_chat(url, body)
|
|
317
|
+
uri = URI.parse(url)
|
|
318
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
319
|
+
http.use_ssl = (uri.scheme == "https")
|
|
320
|
+
http.open_timeout = 30
|
|
321
|
+
http.read_timeout = 300
|
|
322
|
+
|
|
323
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
|
324
|
+
request["Content-Type"] = "application/json"
|
|
325
|
+
request["Authorization"] = "Bearer #{@api_key}"
|
|
326
|
+
request["Accept"] = "text/event-stream"
|
|
327
|
+
request.body = body.to_json
|
|
328
|
+
|
|
329
|
+
buffer = ""
|
|
330
|
+
done = false
|
|
331
|
+
|
|
332
|
+
http.request(request) do |response|
|
|
333
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
334
|
+
raise LLMAPIError, "SenseNova stream error: #{response.code} - #{response.body}"
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
response.read_body do |segment|
|
|
338
|
+
break if done
|
|
339
|
+
buffer << segment
|
|
340
|
+
while (idx = buffer.index("\n"))
|
|
341
|
+
line = buffer.slice!(0, idx + 1).strip
|
|
342
|
+
next if line.empty? || !line.start_with?("data:")
|
|
343
|
+
|
|
344
|
+
payload = line.sub(/\Adata:\s*/, "")
|
|
345
|
+
if payload == "[DONE]"
|
|
346
|
+
done = true
|
|
347
|
+
break
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
begin
|
|
351
|
+
data = JSON.parse(payload)
|
|
352
|
+
rescue JSON::ParserError
|
|
353
|
+
next
|
|
354
|
+
end
|
|
355
|
+
yield data
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Resolve the image size: default to 2048x2048 when none given, and warn (but still
|
|
362
|
+
# send) when the caller asks for a size sensenova-u1-fast does not accept.
|
|
363
|
+
def resolve_image_size(size)
|
|
364
|
+
return DEFAULT_IMAGE_SIZE if size.nil? || size.to_s.strip.empty?
|
|
365
|
+
size = size.to_s
|
|
366
|
+
unless VALID_IMAGE_SIZES.include?(size)
|
|
367
|
+
SmartPrompt.logger.warn "SenseNova image size '#{size}' is not in the known-valid list " \
|
|
368
|
+
"(#{VALID_IMAGE_SIZES.join(', ')}); the API may reject it."
|
|
369
|
+
end
|
|
370
|
+
size
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def save_single_image(image_data, output_dir, filename)
|
|
374
|
+
if image_data[:b64_json]
|
|
375
|
+
file_path = File.join(output_dir, "#{filename}.png")
|
|
376
|
+
File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
|
|
377
|
+
elsif image_data[:url]
|
|
378
|
+
uri = URI.parse(image_data[:url])
|
|
379
|
+
response = Net::HTTP.get_response(uri)
|
|
380
|
+
raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
381
|
+
|
|
382
|
+
ext = case response["content-type"]
|
|
383
|
+
when "image/jpeg", "image/jpg" then "jpg"
|
|
384
|
+
when "image/png" then "png"
|
|
385
|
+
when "image/gif" then "gif"
|
|
386
|
+
when "image/webp" then "webp"
|
|
387
|
+
else "png"
|
|
388
|
+
end
|
|
389
|
+
file_path = File.join(output_dir, "#{filename}.#{ext}")
|
|
390
|
+
File.binwrite(file_path, response.body)
|
|
391
|
+
else
|
|
392
|
+
raise Error, "No image data available to save"
|
|
393
|
+
end
|
|
394
|
+
file_path
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def stringify_hash(hash)
|
|
398
|
+
case hash
|
|
399
|
+
when Hash
|
|
400
|
+
hash.each_with_object({}) do |(k, v), memo|
|
|
401
|
+
memo[k.to_s] = stringify_hash(v)
|
|
402
|
+
end
|
|
403
|
+
when Array
|
|
404
|
+
hash.map { |v| stringify_hash(v) }
|
|
405
|
+
else
|
|
406
|
+
hash
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
end
|