smart_prompt 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -10
  3. data/README.cn.md +307 -64
  4. data/README.md +311 -64
  5. data/Rakefile +10 -1
  6. data/config/anthropic_config.yml +151 -0
  7. data/config/image_generation_config.yml +22 -0
  8. data/config/multimodal_config.yml +85 -0
  9. data/config/sensenova_config.yml +63 -0
  10. data/config/zhipu_config.yml +73 -0
  11. data/examples/anthropic_basic_chat.rb +143 -0
  12. data/examples/anthropic_example.rb +232 -0
  13. data/examples/anthropic_multimodal.rb +212 -0
  14. data/examples/anthropic_streaming.rb +312 -0
  15. data/examples/anthropic_tool_calling.rb +393 -0
  16. data/examples/automatic_cleanup_example.rb +109 -0
  17. data/examples/history_management_examples.rb +522 -0
  18. data/examples/image_generation_example.rb +130 -0
  19. data/examples/monitoring_example.rb +121 -0
  20. data/examples/multimodal_example.rb +63 -0
  21. data/examples/relevance_based_strategy_example.rb +87 -0
  22. data/examples/sensenova_example.rb +129 -0
  23. data/examples/stt_example.rb +287 -0
  24. data/examples/tts_example.rb +244 -0
  25. data/examples/video_generation_example.rb +189 -0
  26. data/examples/zhipu_example.rb +151 -0
  27. data/lib/smart_prompt/anthropic_adapter.rb +363 -281
  28. data/lib/smart_prompt/compression_engine.rb +201 -0
  29. data/lib/smart_prompt/context_strategy.rb +22 -0
  30. data/lib/smart_prompt/conversation.rb +81 -149
  31. data/lib/smart_prompt/engine.rb +36 -19
  32. data/lib/smart_prompt/history_manager.rb +596 -0
  33. data/lib/smart_prompt/hybrid_strategy.rb +222 -0
  34. data/lib/smart_prompt/image_generation_adapter.rb +297 -0
  35. data/lib/smart_prompt/lru_cache.rb +133 -0
  36. data/lib/smart_prompt/message.rb +57 -0
  37. data/lib/smart_prompt/multimodal_adapter.rb +277 -0
  38. data/lib/smart_prompt/openai_adapter.rb +1 -25
  39. data/lib/smart_prompt/persistence_layer.rb +197 -0
  40. data/lib/smart_prompt/relevance_based_strategy.rb +221 -0
  41. data/lib/smart_prompt/sensenova_adapter.rb +410 -0
  42. data/lib/smart_prompt/session.rb +140 -0
  43. data/lib/smart_prompt/sliding_window_strategy.rb +100 -0
  44. data/lib/smart_prompt/stt_adapter.rb +381 -0
  45. data/lib/smart_prompt/summary_based_strategy.rb +152 -0
  46. data/lib/smart_prompt/token_counter.rb +74 -0
  47. data/lib/smart_prompt/tts_adapter.rb +403 -0
  48. data/lib/smart_prompt/version.rb +1 -1
  49. data/lib/smart_prompt/video_generation_adapter.rb +330 -0
  50. data/lib/smart_prompt/worker.rb +25 -3
  51. data/lib/smart_prompt/zhipu_adapter.rb +616 -0
  52. data/lib/smart_prompt.rb +22 -2
  53. data/workers/history_management_examples.rb +407 -0
  54. data/workers/image_generation_workers.rb +119 -0
  55. data/workers/multimodal_workers.rb +110 -0
  56. data/workers/sensenova_workers.rb +62 -0
  57. data/workers/stt_workers.rb +195 -0
  58. data/workers/tts_workers.rb +388 -0
  59. data/workers/video_generation_workers.rb +264 -0
  60. data/workers/zhipu_workers.rb +113 -0
  61. metadata +95 -5
@@ -0,0 +1,221 @@
1
+ require_relative 'context_strategy'
2
+
3
+ module SmartPrompt
4
+ # RelevanceBasedStrategy implements a context selection strategy based on
5
+ # semantic relevance and importance scoring
6
+ #
7
+ # This strategy:
8
+ # - Calculates importance scores combining recency and relevance
9
+ # - Selects top-k most important messages
10
+ # - Supports both keyword-based and embedding-based similarity
11
+ # - Maintains temporal ordering of selected messages
12
+ # - Trims to token limits while preserving important context
13
+ class RelevanceBasedStrategy
14
+ include ContextStrategy
15
+
16
+ # Initialize the relevance-based strategy
17
+ # @param config [Hash] Configuration options
18
+ # @option config [Integer] :top_k (10) Number of top messages to select
19
+ # @option config [Float] :recency_weight (0.3) Weight for recency in scoring (0.0-1.0)
20
+ # @option config [Float] :relevance_weight (0.7) Weight for relevance in scoring (0.0-1.0)
21
+ # @option config [Object] :embedding_service (nil) Optional embedding service for semantic similarity
22
+ def initialize(config = {})
23
+ @top_k = config[:top_k] || 10
24
+ @recency_weight = config[:recency_weight] || 0.3
25
+ @relevance_weight = config[:relevance_weight] || 0.7
26
+ @embedding_service = config[:embedding_service]
27
+ end
28
+
29
+ # Select messages based on relevance and importance
30
+ # @param messages [Array<Message>] All messages in the session
31
+ # @param max_tokens [Integer, nil] Maximum token limit for selected messages
32
+ # @param current_message [Message, nil] The current message for relevance calculation
33
+ # @return [Array<Message>] Selected messages ordered by timestamp
34
+ def select_messages(messages, max_tokens, current_message = nil)
35
+ return [] if messages.nil? || messages.empty?
36
+
37
+ # If no current message, fall back to recency-only selection
38
+ unless current_message
39
+ log_debug "No current message provided, falling back to recency-based selection"
40
+ return select_by_recency(messages, max_tokens)
41
+ end
42
+
43
+ # Calculate importance score for each message
44
+ scored_messages = messages.map.with_index do |msg, idx|
45
+ score = calculate_score(msg, idx, messages.length, current_message)
46
+ [msg, score]
47
+ end
48
+
49
+ # Log top scores for debugging
50
+ top_scores = scored_messages.sort_by { |_, score| -score }.take(5).map { |_, s| s.round(3) }
51
+ log_debug "RelevanceBasedStrategy: calculated scores for #{messages.count} messages, top 5 scores: #{top_scores.inspect}"
52
+
53
+ # Sort by score (descending) and take top-k
54
+ selected = scored_messages
55
+ .sort_by { |_, score| -score }
56
+ .take(@top_k)
57
+ .map(&:first)
58
+
59
+ log_debug "RelevanceBasedStrategy: selected top #{selected.count}/#{messages.count} messages by importance (recency_weight=#{@recency_weight}, relevance_weight=#{@relevance_weight})"
60
+
61
+ # Re-order by timestamp to maintain conversation flow
62
+ selected = selected.sort_by(&:timestamp)
63
+
64
+ # Trim to token limit if specified
65
+ result = max_tokens ? trim_to_token_limit(selected, max_tokens) : selected
66
+
67
+ if max_tokens && result.count < selected.count
68
+ tokens_before = selected.sum { |m| m.token_count || 0 }
69
+ tokens_after = result.sum { |m| m.token_count || 0 }
70
+ log_debug "RelevanceBasedStrategy: trimmed to token limit #{max_tokens}: #{selected.count} -> #{result.count} messages, #{tokens_before} -> #{tokens_after} tokens"
71
+ end
72
+
73
+ result
74
+ end
75
+
76
+ # Determine if compression should be triggered
77
+ # Recommends compression when message count exceeds 3x top_k
78
+ # @param session [Session] The session to evaluate
79
+ # @return [Boolean] true if message count > 3 * top_k
80
+ def should_compress?(session)
81
+ session.message_count > @top_k * 3
82
+ end
83
+
84
+ private
85
+
86
+ # Calculate combined importance score for a message
87
+ # @param message [Message] The message to score
88
+ # @param index [Integer] Position of message in the session
89
+ # @param total [Integer] Total number of messages
90
+ # @param current_message [Message] Current message for relevance comparison
91
+ # @return [Float] Combined score (0.0-1.0)
92
+ def calculate_score(message, index, total, current_message)
93
+ # Calculate recency score (newer messages score higher)
94
+ recency_score = total > 1 ? index.to_f / (total - 1) : 1.0
95
+
96
+ # Calculate relevance score
97
+ relevance_score = if @embedding_service
98
+ calculate_semantic_similarity(message, current_message)
99
+ else
100
+ calculate_keyword_similarity(message, current_message)
101
+ end
102
+
103
+ # Combine scores with configured weights
104
+ @recency_weight * recency_score + @relevance_weight * relevance_score
105
+ end
106
+
107
+ # Calculate semantic similarity using embeddings
108
+ # @param msg1 [Message] First message
109
+ # @param msg2 [Message] Second message
110
+ # @return [Float] Cosine similarity (0.0-1.0)
111
+ def calculate_semantic_similarity(msg1, msg2)
112
+ begin
113
+ emb1 = @embedding_service.get_embedding(msg1.content)
114
+ emb2 = @embedding_service.get_embedding(msg2.content)
115
+ cosine_similarity(emb1, emb2)
116
+ rescue => e
117
+ SmartPrompt.logger.warn "Embedding similarity failed: #{e.message}, falling back to keyword similarity"
118
+ calculate_keyword_similarity(msg1, msg2)
119
+ end
120
+ end
121
+
122
+ # Calculate keyword-based similarity using Jaccard index
123
+ # @param msg1 [Message] First message
124
+ # @param msg2 [Message] Second message
125
+ # @return [Float] Jaccard similarity (0.0-1.0)
126
+ def calculate_keyword_similarity(msg1, msg2)
127
+ # Extract words and normalize
128
+ words1 = extract_words(msg1.content)
129
+ words2 = extract_words(msg2.content)
130
+
131
+ # Handle empty content
132
+ return 0.0 if words1.empty? || words2.empty?
133
+
134
+ # Calculate Jaccard similarity: |intersection| / |union|
135
+ intersection = (words1 & words2).length
136
+ union = (words1 | words2).length
137
+
138
+ union > 0 ? intersection.to_f / union : 0.0
139
+ end
140
+
141
+ # Extract and normalize words from text
142
+ # @param text [String] Text to process
143
+ # @return [Array<String>] Normalized words
144
+ def extract_words(text)
145
+ return [] if text.nil? || text.empty?
146
+
147
+ # Convert to lowercase, extract words, remove common stop words
148
+ words = text.downcase.scan(/\b\w+\b/)
149
+
150
+ # Remove very short words (likely not meaningful)
151
+ words.select { |w| w.length > 2 }
152
+ end
153
+
154
+ # Calculate cosine similarity between two vectors
155
+ # @param vec1 [Array<Float>] First vector
156
+ # @param vec2 [Array<Float>] Second vector
157
+ # @return [Float] Cosine similarity (0.0-1.0)
158
+ def cosine_similarity(vec1, vec2)
159
+ return 0.0 if vec1.nil? || vec2.nil? || vec1.empty? || vec2.empty?
160
+ return 0.0 if vec1.length != vec2.length
161
+
162
+ # Calculate dot product
163
+ dot_product = vec1.zip(vec2).map { |a, b| a * b }.sum
164
+
165
+ # Calculate magnitudes
166
+ magnitude1 = Math.sqrt(vec1.map { |x| x * x }.sum)
167
+ magnitude2 = Math.sqrt(vec2.map { |x| x * x }.sum)
168
+
169
+ # Avoid division by zero
170
+ return 0.0 if magnitude1 == 0.0 || magnitude2 == 0.0
171
+
172
+ # Return cosine similarity (normalized to 0-1 range)
173
+ similarity = dot_product / (magnitude1 * magnitude2)
174
+
175
+ # Clamp to [0, 1] range (cosine can be negative for opposite vectors)
176
+ [[similarity, 0.0].max, 1.0].min
177
+ end
178
+
179
+ # Select messages by recency only (fallback when no current message)
180
+ # @param messages [Array<Message>] All messages
181
+ # @param max_tokens [Integer, nil] Maximum token limit
182
+ # @return [Array<Message>] Most recent messages
183
+ def select_by_recency(messages, max_tokens)
184
+ selected = messages.last(@top_k)
185
+ max_tokens ? trim_to_token_limit(selected, max_tokens) : selected
186
+ end
187
+
188
+ # Trim messages to fit within token limit
189
+ # Removes messages from the beginning (oldest first) until within limit
190
+ # @param messages [Array<Message>] Messages to trim
191
+ # @param max_tokens [Integer] Maximum token limit
192
+ # @return [Array<Message>] Trimmed messages
193
+ def trim_to_token_limit(messages, max_tokens)
194
+ return messages unless max_tokens
195
+ return [] if messages.empty?
196
+
197
+ # Calculate tokens from newest to oldest, keeping messages that fit
198
+ total = 0
199
+ selected = []
200
+
201
+ messages.reverse_each do |msg|
202
+ msg_tokens = msg.token_count || 0
203
+ if total + msg_tokens <= max_tokens
204
+ selected.unshift(msg)
205
+ total += msg_tokens
206
+ else
207
+ # Stop adding messages once we exceed the limit
208
+ break
209
+ end
210
+ end
211
+
212
+ selected
213
+ end
214
+
215
+ # Logging helper methods
216
+ def log_debug(message)
217
+ return unless SmartPrompt.logger
218
+ SmartPrompt.logger.debug "[RelevanceBasedStrategy] #{message}"
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,410 @@
1
+ require "base64"
2
+ require "json"
3
+ require "net/http"
4
+ require "uri"
5
+ require "fileutils"
6
+
7
+ module SmartPrompt
8
+ # Adapter for SenseNova (商汤 日日新) — the SenseCore large-model platform.
9
+ #
10
+ # One adapter owns the whole provider: all four documented model categories share the
11
+ # same `api.sensenova.cn` domain and Bearer-token auth, so a single config block serves
12
+ # them just by changing `model`.
13
+ #
14
+ # 1. 商量 文本对话 / 多模态 (chat + vision) — OpenAI-compatible
15
+ # POST {url}/chat/completions (url defaults to .../compatible-mode/v2)
16
+ # Streaming is SSE; the model may emit a `reasoning`/`delta.reasoning` field on
17
+ # reasoning models, which we remap to OpenAI's `reasoning_content` so the engine's
18
+ # stream aggregator (Engine#@stream_proc) keeps working unchanged.
19
+ # 2. Cupido 向量模型 (embeddings) — native, non-OpenAI response shape
20
+ # POST {embeddings_url} (defaults to .../v1/llm/embeddings)
21
+ # Body {model, input:[...]}; response {embeddings:[{index, embedding, ...}]}.
22
+ # 3. 秒画 文生图 (text-to-image) — OpenAI-compatible /images/generations
23
+ # POST {image_url} (native /v1 base, e.g. .../v1/images/generations;
24
+ # NOT under compatible-mode/v2, which 404s)
25
+ #
26
+ # We talk to the endpoints directly with Net::HTTP (like the image/tts/stt adapters)
27
+ # rather than the `openai` gem, because we must surface SenseNova's `reasoning` field,
28
+ # remap streaming deltas, and handle the native embeddings shape. No new gem deps.
29
+ class SenseNovaAdapter < LLMAdapter
30
+ DEFAULT_BASE_URL = "https://api.sensenova.cn/compatible-mode/v2".freeze
31
+ DEFAULT_EMBEDDINGS_URL = "https://api.sensenova.cn/v1/llm/embeddings".freeze
32
+ # 秒画 text-to-image (sensenova-u1-fast) lives on the token.sensenova.cn /v1 base
33
+ # (confirmed working 2026-06-19). NOT under compatible-mode/v2, which 404s.
34
+ DEFAULT_IMAGE_URL = "https://token.sensenova.cn/v1/images/generations".freeze
35
+ # Sizes accepted by sensenova-u1-fast (the API 400s on anything else, e.g. 1024x1024).
36
+ VALID_IMAGE_SIZES = %w[
37
+ 1664x2496 2496x1664 1760x2368 2368x1760 1824x2272 2272x1824
38
+ 2048x2048 2752x1536 1536x2752 3072x1376 1344x3136 2560x720 3072x864
39
+ ].freeze
40
+ DEFAULT_IMAGE_SIZE = "2048x2048".freeze
41
+ SUPPORTED_IMAGE_FORMATS = %w[jpg jpeg png gif bmp webp].freeze
42
+
43
+ # SenseNova sampling parameters forwarded from config to the chat request when present.
44
+ CHAT_OPTIONAL_KEYS = %w[
45
+ top_p top_k min_p presence_penalty frequency_penalty repetition_penalty
46
+ reasoning_effort max_completion_tokens max_tokens
47
+ ].freeze
48
+
49
+ def initialize(config)
50
+ super
51
+ SmartPrompt.logger.info "Start create the SmartPrompt SenseNovaAdapter."
52
+
53
+ api_key = @config["api_key"]
54
+ if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
55
+ api_key = eval(api_key)
56
+ end
57
+ # Match the other adapters: tolerate a missing key at construction (e.g. when the
58
+ # ENV var isn't set yet) and let the first request fail with a clear auth error.
59
+ SmartPrompt.logger.warn "SenseNova api_key is empty — API calls will fail until it is set." if api_key.nil? || api_key.to_s.strip.empty?
60
+
61
+ @api_key = api_key
62
+ @base_url = (@config["url"] || DEFAULT_BASE_URL).to_s.chomp("/")
63
+ @embeddings_url = (@config["embeddings_url"] || DEFAULT_EMBEDDINGS_URL).to_s
64
+ # 秒画 image generation lives on the native /v1 base (NOT compatible-mode/v2),
65
+ # e.g. https://api.sensenova.cn/v1/images/generations. Override per-llm if needed.
66
+ @image_url = (@config["image_url"] || DEFAULT_IMAGE_URL).to_s
67
+ SmartPrompt.logger.info "SenseNova base_url=#{@base_url}"
68
+ rescue => e
69
+ SmartPrompt.logger.error "Failed to initialize SenseNova client: #{e.message}"
70
+ raise e.is_a?(SmartPrompt::Error) ? e : LLMAPIError, "Invalid SenseNova configuration: #{e.message}"
71
+ end
72
+
73
+ public
74
+
75
+ # Chat / multimodal request.
76
+ #
77
+ # Non-streaming returns a full OpenAI-format hash (so last_response carries usage +
78
+ # reasoning); streaming calls +proc+ with each OpenAI-shaped chunk and returns nil.
79
+ def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
80
+ model_name = model || @config["model"]
81
+ body = build_chat_body(messages, model_name, temperature, tools)
82
+ SmartPrompt.logger.info "SenseNovaAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
83
+
84
+ if proc
85
+ body["stream"] = true
86
+ stream_chat("#{@base_url}/chat/completions", body) { |data| proc.call(build_stream_chunk(data), 0) }
87
+ SmartPrompt.logger.info "SenseNovaAdapter: streaming request finished"
88
+ nil
89
+ else
90
+ raw = http_post_json("#{@base_url}/chat/completions", body)
91
+ response = build_completion_response(raw)
92
+ @last_response = response
93
+ SmartPrompt.logger.info "SenseNovaAdapter: received chat response"
94
+ response
95
+ end
96
+ rescue LLMAPIError, Error
97
+ raise
98
+ rescue => e
99
+ SmartPrompt.logger.error "SenseNova chat error: #{e.message}"
100
+ raise LLMAPIError, "Failed to call SenseNova chat: #{e.message}"
101
+ end
102
+
103
+ # Cupido embeddings. SenseNova's native endpoint takes {model, input:[...]} and
104
+ # returns {embeddings:[{index, embedding:[...], ...}]}; we surface the first vector.
105
+ def embeddings(text, model)
106
+ model_name = model || @config["embedding_model"] || @config["model"]
107
+ SmartPrompt.logger.info "SenseNovaAdapter: embeddings model=#{model_name}"
108
+
109
+ body = { "model" => model_name, "input" => [text.to_s] }
110
+ response =
111
+ begin
112
+ http_post_json(@embeddings_url, body)
113
+ rescue LLMAPIError, Error
114
+ raise
115
+ rescue => e
116
+ raise LLMAPIError, "Failed to call SenseNova embeddings: #{e.message}"
117
+ end
118
+
119
+ items = response["embeddings"] || response["data"]
120
+ unless items.is_a?(Array) && items.any? && items[0]["embedding"]
121
+ raise LLMAPIError, "No embedding vector in SenseNova response: #{response.inspect}"
122
+ end
123
+ items[0]["embedding"]
124
+ end
125
+
126
+ # 秒画 text-to-image via the OpenAI-compatible /images/generations endpoint.
127
+ # Response is parsed defensively (OpenAI `data[]` or SenseNova `images[]`).
128
+ # Returns an Array of {url:, b64_json:, seed:}.
129
+ def generate_image(prompt, params = {})
130
+ SmartPrompt.logger.info "SenseNovaAdapter: generating image"
131
+ raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
132
+
133
+ model_name = params[:model] || @config["image_model"] || @config["model"]
134
+ raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
135
+
136
+ body = { "model" => model_name, "prompt" => prompt.to_s }
137
+ body["n"] = params[:n] if params[:n]
138
+ body["size"] = resolve_image_size(params[:size] || params[:image_size])
139
+ body["response_format"] = params[:response_format] if params[:response_format]
140
+ body["negative_prompt"] = params[:negative_prompt] if params[:negative_prompt]
141
+ body["seed"] = params[:seed] if params[:seed]
142
+ body["num_inference_steps"] = params[:num_inference_steps] if params[:num_inference_steps]
143
+ body["guidance_scale"] = params[:guidance_scale] if params[:guidance_scale]
144
+
145
+ SmartPrompt.logger.info "SenseNova image params: #{body.except('prompt').inspect}"
146
+
147
+ response =
148
+ begin
149
+ http_post_json(@image_url, body)
150
+ rescue LLMAPIError, Error
151
+ raise
152
+ rescue => e
153
+ raise Error, "Failed to call SenseNova image generation: #{e.message}"
154
+ end
155
+
156
+ items = response["data"] || response["images"]
157
+ unless items.is_a?(Array) && items.any?
158
+ SmartPrompt.logger.error "SenseNova image response had no data: #{response.inspect}"
159
+ raise LLMAPIError, "No image data in SenseNova response"
160
+ end
161
+
162
+ images = items.map do |d|
163
+ { url: d["url"], b64_json: d["b64_json"], seed: d["seed"] }
164
+ end
165
+ SmartPrompt.logger.info "SenseNovaAdapter: generated #{images.size} image(s)"
166
+ images
167
+ end
168
+
169
+ # Save one or many generated images to disk (Array from #generate_image or a single hash).
170
+ def save_image(image_data, output_dir = "./output", filename_prefix = "sensenova_image")
171
+ FileUtils.mkdir_p(output_dir)
172
+ images = image_data.is_a?(Array) ? image_data : [image_data]
173
+ saved = images.each_with_index.map do |img, index|
174
+ save_single_image(img, output_dir, "#{filename_prefix}_#{index + 1}")
175
+ end
176
+ SmartPrompt.logger.info "Saved #{saved.size} SenseNova image(s) to #{output_dir}"
177
+ saved
178
+ end
179
+
180
+ private
181
+
182
+ # ---- chat request building ------------------------------------------------
183
+
184
+ def build_chat_body(messages, model_name, temperature, tools)
185
+ body = {
186
+ "model" => model_name,
187
+ "messages" => process_multimodal_messages(messages),
188
+ "temperature" => @config["temperature"] || temperature || 0.7,
189
+ }
190
+ CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
191
+ body["tools"] = tools if tools && !tools.empty?
192
+ body
193
+ end
194
+
195
+ # Pass messages through, normalizing any multimodal content. Local image paths inside
196
+ # image_url.url are converted to data: URLs; http(s)/data URLs and plain text pass through.
197
+ def process_multimodal_messages(messages)
198
+ messages.map do |msg|
199
+ role = msg[:role] || msg["role"]
200
+ content = msg[:content] || msg["content"]
201
+ content = content.map { |item| normalize_content_item(item) } if content.is_a?(Array)
202
+ { "role" => role, "content" => content }
203
+ end
204
+ end
205
+
206
+ def normalize_content_item(item)
207
+ return { "type" => "text", "text" => item.to_s } unless item.is_a?(Hash)
208
+
209
+ type = item[:type] || item["type"]
210
+ if type == "image_url"
211
+ iu = item[:image_url] || item["image_url"]
212
+ url = iu.is_a?(Hash) ? (iu[:url] || iu["url"]) : iu
213
+ { "type" => "image_url", "image_url" => { "url" => normalize_image_url(url) } }
214
+ else
215
+ stringify_hash(item)
216
+ end
217
+ end
218
+
219
+ def normalize_image_url(url)
220
+ return url if url.nil?
221
+ return url if url.start_with?("http://", "https://", "data:")
222
+
223
+ raise Error, "Image file not found: #{url}" unless File.exist?(url)
224
+ ext = File.extname(url).downcase.delete(".")
225
+ raise Error, "Unsupported image format: #{ext}" unless SUPPORTED_IMAGE_FORMATS.include?(ext)
226
+ mime = ext == "jpg" ? "jpeg" : ext
227
+ "data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(url))}"
228
+ end
229
+
230
+ # ---- response shaping -----------------------------------------------------
231
+
232
+ # Convert a non-streaming SenseNova response into the OpenAI completion shape the
233
+ # rest of SmartPrompt expects, surfacing the reasoning model's `reasoning` field.
234
+ def build_completion_response(raw)
235
+ msg = raw.dig("choices", 0, "message") || {}
236
+ message = { "role" => msg["role"] || "assistant" }
237
+ message["content"] = msg["content"]
238
+ message["reasoning_content"] = msg["reasoning"] if msg["reasoning"]
239
+ message["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
240
+
241
+ response = {
242
+ "id" => raw["id"],
243
+ "object" => raw["object"] || "chat.completion",
244
+ "created" => raw["created"],
245
+ "model" => raw["model"],
246
+ "choices" => [{
247
+ "index" => 0,
248
+ "message" => message,
249
+ "finish_reason" => raw.dig("choices", 0, "finish_reason"),
250
+ }],
251
+ }
252
+ response["usage"] = raw["usage"] if raw["usage"]
253
+ response["system_fingerprint"] = raw["system_fingerprint"] if raw["system_fingerprint"]
254
+ response
255
+ end
256
+
257
+ # Convert one SSE event from SenseNova's stream into an OpenAI-style streaming chunk.
258
+ # The key remap is delta.reasoning -> delta.reasoning_content, which is what
259
+ # Engine#@stream_proc reads for reasoning models.
260
+ def build_stream_chunk(data)
261
+ chunk = {
262
+ "id" => data["id"],
263
+ "object" => data["object"],
264
+ "created" => data["created"],
265
+ "model" => data["model"],
266
+ }
267
+ chunk["usage"] = data["usage"] if data["usage"]
268
+ chunk["system_fingerprint"] = data["system_fingerprint"] if data["system_fingerprint"]
269
+
270
+ choices = data["choices"] || []
271
+ if choices.any?
272
+ delta = choices[0]["delta"] || {}
273
+ new_delta = {}
274
+ new_delta["role"] = delta["role"] if delta["role"]
275
+ new_delta["content"] = delta["content"] if delta["content"]
276
+ new_delta["reasoning_content"] = delta["reasoning"] if delta["reasoning"]
277
+ new_delta["tool_calls"] = delta["tool_calls"] if delta["tool_calls"]
278
+ chunk["choices"] = [{
279
+ "index" => choices[0]["index"] || 0,
280
+ "delta" => new_delta,
281
+ "finish_reason" => choices[0]["finish_reason"],
282
+ }]
283
+ else
284
+ # Usage-only final event (choices is an empty array).
285
+ chunk["choices"] = []
286
+ end
287
+ chunk
288
+ end
289
+
290
+ # ---- HTTP -----------------------------------------------------------------
291
+
292
+ def http_post_json(url, body)
293
+ uri = URI.parse(url)
294
+ http = Net::HTTP.new(uri.host, uri.port)
295
+ http.use_ssl = (uri.scheme == "https")
296
+ http.open_timeout = 30
297
+ http.read_timeout = 240
298
+
299
+ request = Net::HTTP::Post.new(uri.request_uri)
300
+ request["Content-Type"] = "application/json"
301
+ request["Authorization"] = "Bearer #{@api_key}"
302
+ request.body = body.to_json
303
+
304
+ SmartPrompt.logger.debug "SenseNova POST #{uri} body=#{body.to_json}"
305
+ response = http.request(request)
306
+
307
+ if response.is_a?(Net::HTTPSuccess)
308
+ JSON.parse(response.body)
309
+ else
310
+ SmartPrompt.logger.error "SenseNova API error: #{response.code} - #{response.body}"
311
+ raise LLMAPIError, "SenseNova API error: #{response.code} - #{response.body}"
312
+ end
313
+ end
314
+
315
+ # POST with stream:true and yield each parsed SSE `data:` payload to the block.
316
+ def stream_chat(url, body)
317
+ uri = URI.parse(url)
318
+ http = Net::HTTP.new(uri.host, uri.port)
319
+ http.use_ssl = (uri.scheme == "https")
320
+ http.open_timeout = 30
321
+ http.read_timeout = 300
322
+
323
+ request = Net::HTTP::Post.new(uri.request_uri)
324
+ request["Content-Type"] = "application/json"
325
+ request["Authorization"] = "Bearer #{@api_key}"
326
+ request["Accept"] = "text/event-stream"
327
+ request.body = body.to_json
328
+
329
+ buffer = ""
330
+ done = false
331
+
332
+ http.request(request) do |response|
333
+ unless response.is_a?(Net::HTTPSuccess)
334
+ raise LLMAPIError, "SenseNova stream error: #{response.code} - #{response.body}"
335
+ end
336
+
337
+ response.read_body do |segment|
338
+ break if done
339
+ buffer << segment
340
+ while (idx = buffer.index("\n"))
341
+ line = buffer.slice!(0, idx + 1).strip
342
+ next if line.empty? || !line.start_with?("data:")
343
+
344
+ payload = line.sub(/\Adata:\s*/, "")
345
+ if payload == "[DONE]"
346
+ done = true
347
+ break
348
+ end
349
+
350
+ begin
351
+ data = JSON.parse(payload)
352
+ rescue JSON::ParserError
353
+ next
354
+ end
355
+ yield data
356
+ end
357
+ end
358
+ end
359
+ end
360
+
361
+ # Resolve the image size: default to 2048x2048 when none given, and warn (but still
362
+ # send) when the caller asks for a size sensenova-u1-fast does not accept.
363
+ def resolve_image_size(size)
364
+ return DEFAULT_IMAGE_SIZE if size.nil? || size.to_s.strip.empty?
365
+ size = size.to_s
366
+ unless VALID_IMAGE_SIZES.include?(size)
367
+ SmartPrompt.logger.warn "SenseNova image size '#{size}' is not in the known-valid list " \
368
+ "(#{VALID_IMAGE_SIZES.join(', ')}); the API may reject it."
369
+ end
370
+ size
371
+ end
372
+
373
+ def save_single_image(image_data, output_dir, filename)
374
+ if image_data[:b64_json]
375
+ file_path = File.join(output_dir, "#{filename}.png")
376
+ File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
377
+ elsif image_data[:url]
378
+ uri = URI.parse(image_data[:url])
379
+ response = Net::HTTP.get_response(uri)
380
+ raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
381
+
382
+ ext = case response["content-type"]
383
+ when "image/jpeg", "image/jpg" then "jpg"
384
+ when "image/png" then "png"
385
+ when "image/gif" then "gif"
386
+ when "image/webp" then "webp"
387
+ else "png"
388
+ end
389
+ file_path = File.join(output_dir, "#{filename}.#{ext}")
390
+ File.binwrite(file_path, response.body)
391
+ else
392
+ raise Error, "No image data available to save"
393
+ end
394
+ file_path
395
+ end
396
+
397
+ def stringify_hash(hash)
398
+ case hash
399
+ when Hash
400
+ hash.each_with_object({}) do |(k, v), memo|
401
+ memo[k.to_s] = stringify_hash(v)
402
+ end
403
+ when Array
404
+ hash.map { |v| stringify_hash(v) }
405
+ else
406
+ hash
407
+ end
408
+ end
409
+ end
410
+ end