RubyGems - smart_prompt - Versions diffs - 0.5.2 → 0.5.3 - Mend

smart_prompt 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/config/siliconflow_config.yml +95 -0
data/examples/siliconflow_example.rb +175 -0
data/lib/smart_prompt/adapters/siliconflow/embed.rb +33 -0
data/lib/smart_prompt/adapters/siliconflow/image.rb +103 -0
data/lib/smart_prompt/adapters/siliconflow/rerank.rb +41 -0
data/lib/smart_prompt/adapters/siliconflow/text.rb +54 -0
data/lib/smart_prompt/adapters/siliconflow/video.rb +111 -0
data/lib/smart_prompt/adapters/siliconflow/voice.rb +102 -0
data/lib/smart_prompt/adapters/zhipu/embed.rb +32 -0
data/lib/smart_prompt/adapters/zhipu/image.rb +59 -0
data/lib/smart_prompt/adapters/zhipu/rerank.rb +17 -0
data/lib/smart_prompt/adapters/zhipu/text.rb +57 -0
data/lib/smart_prompt/adapters/zhipu/video.rb +101 -0
data/lib/smart_prompt/adapters/zhipu/voice.rb +55 -0
data/lib/smart_prompt/concerns/http_client.rb +147 -0
data/lib/smart_prompt/concerns/image_persistence.rb +62 -0
data/lib/smart_prompt/concerns/multimodal_messages.rb +108 -0
data/lib/smart_prompt/concerns/openai_chat_shaping.rb +87 -0
data/lib/smart_prompt/siliconflow_adapter.rb +91 -0
data/lib/smart_prompt/version.rb +1 -1
data/workers/siliconflow_workers.rb +167 -0
metadata +21 -1

data/lib/smart_prompt/adapters/siliconflow/voice.rb ADDED Viewed

@@ -0,0 +1,102 @@
+module SmartPrompt
+  module SiliconFlow
+    # Speech synthesis (CosyVoice2 / MOSS-TTSD), speech recognition (SenseVoiceSmall),
+    # and custom-voice cloning management.
+    module Voice
+      # Returns a base64 data URL for the synthesized audio. SiliconFlow's
+      # /audio/speech returns the raw binary audio stream (NOT base64 / NOT JSON),
+      # so we base64-encode it ourselves.
+      def synthesize_speech(text, voice: nil, model: nil, response_format: "mp3", **opts)
+        SmartPrompt.logger.info "SiliconFlowAdapter: TTS"
+        raise Error, "Text cannot be empty" if text.nil? || text.to_s.strip.empty?
+        model_name = model || @config["tts_model"] || "FunAudioLLM/CosyVoice2-0.5B"
+        body = { "model" => model_name, "input" => text.to_s }
+        body["voice"]           = voice            if voice
+        body["response_format"] = response_format
+        body["speed"]           = opts[:speed]           if opts[:speed]
+        body["sample_rate"]     = opts[:sample_rate]     if opts[:sample_rate]
+        body["gain"]            = opts[:gain]            if opts[:gain]
+        body["language"]        = opts[:language]        if opts[:language]
+        audio = http_post_binary(@speech_url, body)
+        "data:audio/#{response_format};base64,#{Base64.strict_encode64(audio)}"
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise Error, "Failed to call SiliconFlow TTS: #{e.message}"
+      end
+      def synthesize_to_file(text, output_path, voice: nil, model: nil, response_format: "mp3", **opts)
+        data_url = synthesize_speech(text, voice: voice, model: model, response_format: response_format, **opts)
+        FileUtils.mkdir_p(File.dirname(output_path))
+        audio_bytes = Base64.decode64(data_url.sub(/\Adata:audio\/\w+;base64,/, ""))
+        File.binwrite(output_path, audio_bytes)
+        SmartPrompt.logger.info "SiliconFlow audio saved to #{output_path}"
+        { file_path: output_path, format: response_format }
+      end
+      # Transcribe an audio file (local path). Returns {text:}. The transcription
+      # endpoint takes multipart/form-data with a `file` field.
+      def transcribe_audio(audio_file, model: nil, language: nil, **opts)
+        SmartPrompt.logger.info "SiliconFlowAdapter: ASR #{File.basename(audio_file)}"
+        raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
+        model_name = model || @config["asr_model"] || "FunAudioLLM/SenseVoiceSmall"
+        form = { "model" => model_name }
+        form["language"]         = language if language
+        form["prompt"]           = opts[:prompt]           if opts[:prompt]
+        form["response_format"]  = opts[:response_format]  if opts[:response_format]
+        mime = "audio/#{File.extname(audio_file).downcase.delete(".") || "wav"}"
+        response = http_post_multipart(@transcription_url, form, "file", audio_file, mime)
+        { text: response["text"] }
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to call SiliconFlow ASR: #{e.message}"
+      end
+      # Upload a reference audio to clone a custom voice. SiliconFlow returns
+      # {"uri": "speech:..."}. `customName` (camelCase) is the display name.
+      def upload_voice(name, audio_file, text: nil, model: nil)
+        SmartPrompt.logger.info "SiliconFlowAdapter: upload voice #{name}"
+        raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
+        model_name = model || @config["tts_model"] || "FunAudioLLM/CosyVoice2-0.5B"
+        form = { "model" => model_name, "customName" => name.to_s }
+        form["text"] = text.to_s if text
+        mime = "audio/#{File.extname(audio_file).downcase.delete(".") || "wav"}"
+        response = http_post_multipart(@voice_upload_url, form, "file", audio_file, mime)
+        raise LLMAPIError, "No uri in SiliconFlow voice upload response: #{response.inspect}" unless response["uri"]
+        { uri: response["uri"], name: name.to_s, raw: response }
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to upload SiliconFlow voice: #{e.message}"
+      end
+      def list_voices
+        SmartPrompt.logger.info "SiliconFlowAdapter: list voices"
+        response = http_get_json(@voice_list_url)
+        (response["result"] || response["voices"] || response).yield_self do |items|
+          items.is_a?(Array) ? items.map { |v| { uri: v["uri"], name: v["customName"] || v["name"] } } : response
+        end
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise LLMAPIError, "Failed to list SiliconFlow voices: #{e.message}"
+      end
+      def delete_voice(uri)
+        SmartPrompt.logger.info "SiliconFlowAdapter: delete voice #{uri}"
+        response = http_post_json(@voice_delete_url, { "uri" => uri })
+        { deleted: response["deleted"].nil? ? true : response["deleted"], uri: uri, raw: response }
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise LLMAPIError, "Failed to delete SiliconFlow voice: #{e.message}"
+      end
+    end
+  end
+end

data/lib/smart_prompt/adapters/zhipu/embed.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module SmartPrompt
+  module ZhipuAI
+    # Embeddings (embedding-3, custom dimensions).
+    module Embed
+      # embedding-3 (default 2048 dims); supports a custom `dimensions` (256/512/1024/2048)
+      # via config. Returns the first embedding vector.
+      def embeddings(text, model)
+        model_name = model || @config["embedding_model"] || @config["model"]
+        SmartPrompt.logger.info "ZhipuAIAdapter: embeddings model=#{model_name}"
+        body = { "model" => model_name, "input" => text.is_a?(Array) ? text : [text.to_s] }
+        body["dimensions"] = @config["dimensions"] if @config["dimensions"]
+        body["encoding_format"] = @config["encoding_format"] if @config["encoding_format"]
+        response =
+          begin
+            http_post_json("#{@base_url}/embeddings", body)
+          rescue LLMAPIError, Error
+            raise
+          rescue => e
+            raise LLMAPIError, "Failed to call Zhipu embeddings: #{e.message}"
+          end
+        items = response["data"]
+        unless items.is_a?(Array) && items.any? && items[0]["embedding"]
+          raise LLMAPIError, "No embedding vector in Zhipu response: #{response.inspect}"
+        end
+        items[0]["embedding"]
+      end
+    end
+  end
+end

data/lib/smart_prompt/adapters/zhipu/image.rb ADDED Viewed

@@ -0,0 +1,59 @@
+module SmartPrompt
+  module ZhipuAI
+    # Text-to-image (CogView / GLM-Image). save_image comes from the ImagePersistence concern.
+    module Image
+      # Text-to-image. The Zhipu response is NESTED: data.images[].url (not OpenAI's data[]),
+      # so we parse defensively. Returns an Array of {url:, b64_json:}.
+      def generate_image(prompt, params = {})
+        SmartPrompt.logger.info "ZhipuAIAdapter: generating image"
+        raise Error, "Prompt cannot be empty" if prompt.nil? || prompt.to_s.strip.empty?
+        model_name = params[:model] || @config["image_model"] || @config["model"]
+        raise Error, "No model configured for image generation" if model_name.nil? || model_name.to_s.strip.empty?
+        body = { "model" => model_name, "prompt" => prompt.to_s }
+        body["size"]            = params[:size]            if params[:size]
+        body["user"]            = params[:user]            if params[:user]
+        body["response_format"] = params[:response_format] if params[:response_format]
+        SmartPrompt.logger.info "Zhipu image params: #{body.except('prompt').inspect}"
+        response =
+          begin
+            http_post_json(@image_url, body)
+          rescue LLMAPIError, Error
+            raise
+          rescue => e
+            raise Error, "Failed to call Zhipu image generation: #{e.message}"
+          end
+        images = parse_image_response(response)
+        SmartPrompt.logger.info "ZhipuAIAdapter: generated #{images.size} image(s)"
+        images
+      end
+      private
+      # Zhipu image response: cogview-3-flash returns the FLAT OpenAI shape data[].url;
+      # older docs mention a NESTED data.images[].url. Handle both plus a bare-url array.
+      def parse_image_response(response)
+        container = response["data"]
+        items =
+          if container.is_a?(Hash)
+            container["images"] || container["data"] || container["url"]
+          elsif container.is_a?(Array)
+            container
+          end
+        items ||= response["images"] || response["urls"]
+        # Some responses return images as a bare array of URLs (strings).
+        items = items.map { |x| x.is_a?(String) ? { "url" => x } : x } if items.is_a?(Array)
+        unless items.is_a?(Array) && items.any?
+          SmartPrompt.logger.error "Zhipu image response had no images: #{response.inspect}"
+          raise LLMAPIError, "No image data in Zhipu response"
+        end
+        items.map { |d| { url: d["url"], b64_json: d["b64_json"] } }
+      end
+    end
+  end
+end

data/lib/smart_prompt/adapters/zhipu/rerank.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module SmartPrompt
+  module ZhipuAI
+    # Rerank (reorder documents by relevance to a query).
+    module Rerank
+      def rerank(query, documents, model: nil)
+        model_name = model || @config["rerank_model"] || @config["model"]
+        body = { "model" => model_name, "query" => query, "documents" => documents }
+        response = http_post_json("#{@base_url}/rerank", body)
+        (response["results"] || []).map { |r| { index: r["index"], relevance_score: r["relevance_score"] || r["score"] } }
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise LLMAPIError, "Failed to call Zhipu rerank: #{e.message}"
+      end
+    end
+  end
+end

data/lib/smart_prompt/adapters/zhipu/text.rb ADDED Viewed

@@ -0,0 +1,57 @@
+module SmartPrompt
+  module ZhipuAI
+    # Text chat + vision (OpenAI-compatible /chat/completions, SSE streaming,
+    # reasoning_content passthrough). CodeGeeX/coding models use a separate base.
+    module Text
+      CHAT_OPTIONAL_KEYS = %w[
+        top_p max_tokens do_sample stop presence_penalty frequency_penalty thinking
+      ].freeze
+      # Chat / multimodal. Non-streaming returns a full OpenAI-format hash (so
+      # last_response carries usage + reasoning_content); streaming calls +proc+
+      # with each OpenAI-shaped chunk.
+      def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
+        model_name = model || @config["model"]
+        body = build_chat_body(messages, model_name, temperature, tools)
+        SmartPrompt.logger.info "ZhipuAIAdapter: chat request model=#{model_name} stream=#{!proc.nil?}"
+        url = chat_url_for(model_name)
+        if proc
+          body["stream"] = true
+          stream_chat(url, body) { |data| proc.call(build_stream_chunk(data), 0) }
+          SmartPrompt.logger.info "ZhipuAIAdapter: streaming request finished"
+          nil
+        else
+          raw = http_post_json(url, body)
+          response = build_completion_response(raw)
+          @last_response = response
+          SmartPrompt.logger.info "ZhipuAIAdapter: received chat response"
+          response
+        end
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        SmartPrompt.logger.error "Zhipu chat error: #{e.message}"
+        raise LLMAPIError, "Failed to call Zhipu chat: #{e.message}"
+      end
+      private
+      def chat_url_for(model_name)
+        # CodeGeeX-4 and coding models are served from the coding base.
+        (model_name.to_s.include?("codegeex") || @config["coding"]) ? "#{@coding_base}/chat/completions" : "#{@base_url}/chat/completions"
+      end
+      def build_chat_body(messages, model_name, temperature, tools)
+        body = {
+          "model"       => model_name,
+          "messages"    => process_multimodal_messages(messages),
+          "temperature" => @config["temperature"] || temperature || 0.7,
+        }
+        CHAT_OPTIONAL_KEYS.each { |k| body[k] = @config[k] if @config.key?(k) }
+        body["tools"] = tools if tools && !tools.empty?
+        body
+      end
+    end
+  end
+end

data/lib/smart_prompt/adapters/zhipu/video.rb ADDED Viewed

@@ -0,0 +1,101 @@
+module SmartPrompt
+  module ZhipuAI
+    # Text-to-video / image-to-video (CogVideoX, async submit -> poll -> download).
+    module Video
+      # Submit a text-to-video (or image-to-video) job. Returns the task id.
+      def generate_video(prompt, params = {})
+        SmartPrompt.logger.info "ZhipuAIAdapter: submitting video job"
+        model_name = params[:model] || @config["video_model"] || @config["model"]
+        raise Error, "No model configured for video generation" if model_name.nil? || model_name.to_s.strip.empty?
+        body = { "model" => model_name, "prompt" => prompt.to_s }
+        %i[quality fps duration with_audio resolution request_id seed].each do |k|
+          body[k.to_s] = params[k] unless params[k].nil?
+        end
+        body["image_url"] = normalize_image_url(params[:image_url]) if params[:image_url]
+        SmartPrompt.logger.info "Zhipu video params: #{body.except('prompt').inspect}"
+        response =
+          begin
+            http_post_json(@video_url, body)
+          rescue LLMAPIError, Error
+            raise
+          rescue => e
+            raise Error, "Failed to submit Zhipu video job: #{e.message}"
+          end
+        task_id = response["id"] || response["task_id"]
+        raise LLMAPIError, "No task id in Zhipu video response: #{response.inspect}" unless task_id
+        SmartPrompt.logger.info "ZhipuAIAdapter: video task #{task_id} submitted"
+        { task_id: task_id, model: model_name, raw: response }
+      end
+      # Poll an async task. Returns the raw status hash (task_status etc.).
+      def check_video_status(task_id)
+        SmartPrompt.logger.info "ZhipuAIAdapter: polling video task #{task_id}"
+        http_get_json("#{@query_url}/#{URI.encode_www_form_component(task_id)}")
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise LLMAPIError, "Failed to query Zhipu video task: #{e.message}"
+      end
+      # Block until the task finishes (or times out), then return the video URL.
+      def wait_for_video_completion(task_id, check_interval: 10, timeout: 600)
+        start = Time.now
+        loop do
+          status = check_video_status(task_id)
+          case task_status_of(status)
+          when "SUCCESS"
+            url = video_url_of(status)
+            raise LLMAPIError, "Video succeeded but no url in: #{status.inspect}" unless url
+            SmartPrompt.logger.info "ZhipuAIAdapter: video ready #{url}"
+            return { task_id: task_id, status: "SUCCESS", video_url: url, cover_image_url: cover_url_of(status), raw: status }
+          when "FAIL", "FAILED"
+            raise LLMAPIError, "Zhipu video generation failed: #{status.inspect}"
+          else
+            if Time.now - start > timeout
+              raise LLMAPIError, "Zhipu video generation timeout after #{timeout}s"
+            end
+            SmartPrompt.logger.info "Zhipu video task #{task_id} still processing..."
+            sleep(check_interval)
+          end
+        end
+      end
+      def download_video(video_url, output_path)
+        uri = URI.parse(video_url)
+        http = Net::HTTP.new(uri.host, uri.port); http.use_ssl = (uri.scheme == "https")
+        response = http.request(Net::HTTP::Get.new(uri.request_uri))
+        raise Error, "Failed to download video: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
+        FileUtils.mkdir_p(File.dirname(output_path))
+        File.binwrite(output_path, response.body)
+        SmartPrompt.logger.info "Zhipu video saved to #{output_path}"
+        output_path
+      rescue => e
+        raise e.is_a?(SmartPrompt::Error) ? e : Error, "Error downloading Zhipu video: #{e.message}"
+      end
+      private
+      # Zhipu async task status is under task_status; accept a few aliases.
+      def task_status_of(status)
+        status["task_status"] || status["status"] || "PROCESSING"
+      end
+      # video_result is an Array: [{cover_image_url:, url:}]. Pull the first video url.
+      def video_url_of(status)
+        vr = status["video_result"]
+        item = vr.is_a?(Array) ? vr[0] : vr
+        return item["url"] || item["video_url"] if item.is_a?(Hash)
+        status["video_url"] || status.dig("data", "video_url")
+      end
+      def cover_url_of(status)
+        vr = status["video_result"]
+        item = vr.is_a?(Array) ? vr[0] : vr
+        item.is_a?(Hash) ? (item["cover_image_url"] || item["cover_url"]) : nil
+      end
+    end
+  end
+end

data/lib/smart_prompt/adapters/zhipu/voice.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module SmartPrompt
+  module ZhipuAI
+    # Speech synthesis (GLM-TTS) + speech recognition (GLM-ASR-2512).
+    module Voice
+      # Returns a base64 data URL for the synthesized audio. GLM-TTS accepts wav/pcm only
+      # (mp3/flac are rejected), so default to wav.
+      def synthesize_speech(text, voice: nil, model: nil, response_format: "wav", **opts)
+        SmartPrompt.logger.info "ZhipuAIAdapter: TTS"
+        raise Error, "Text cannot be empty" if text.nil? || text.to_s.strip.empty?
+        model_name = model || @config["tts_model"] || "glm-tts"
+        body = { "model" => model_name, "input" => text.to_s }
+        body["voice"] = voice if voice
+        body["response_format"] = response_format
+        body["speed"] = opts[:speed] if opts[:speed]
+        body["emotion"] = opts[:emotion] if opts[:emotion]
+        audio = http_post_binary("#{@base_url}/audio/speech", body)
+        "data:audio/#{response_format};base64,#{Base64.strict_encode64(audio)}"
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise Error, "Failed to call Zhipu TTS: #{e.message}"
+      end
+      def synthesize_to_file(text, output_path, voice: nil, model: nil, response_format: "wav", **opts)
+        data_url = synthesize_speech(text, voice: voice, model: model, response_format: response_format, **opts)
+        FileUtils.mkdir_p(File.dirname(output_path))
+        audio_bytes = Base64.decode64(data_url.sub(/\Adata:audio\/\w+;base64,/, ""))
+        File.binwrite(output_path, audio_bytes)
+        SmartPrompt.logger.info "Zhipu audio saved to #{output_path}"
+        { file_path: output_path, format: response_format }
+      end
+      # Transcribe an audio file (local path). Returns {text:}.
+      def transcribe_audio(audio_file, model: nil, language: nil, **opts)
+        SmartPrompt.logger.info "ZhipuAIAdapter: ASR #{File.basename(audio_file)}"
+        raise Error, "Audio file not found: #{audio_file}" unless File.exist?(audio_file)
+        model_name = model || @config["asr_model"] || "glm-asr-2512"
+        form = { "model" => model_name }
+        form["language"] = language if language
+        form["prompt"] = opts[:prompt] if opts[:prompt]
+        form["response_format"] = opts[:response_format] if opts[:response_format]
+        response = http_post_multipart("#{@base_url}/audio/transcriptions", form, audio_file)
+        { text: response["text"] }
+      rescue LLMAPIError, Error
+        raise
+      rescue => e
+        raise e.is_a?(SmartPrompt::Error) ? e : Error, "Failed to call Zhipu ASR: #{e.message}"
+      end
+    end
+  end
+end

data/lib/smart_prompt/concerns/http_client.rb ADDED Viewed

@@ -0,0 +1,147 @@
+require "json"
+require "net/http"
+require "uri"
+module SmartPrompt
+  # Shared Net::HTTP plumbing for Net::HTTP-style adapters (ZhipuAI, SenseNova,
+  # SiliconFlow). Each previously carried its own copy of post/get/binary/multipart
+  # + SSE stream helpers, differing only in the provider label sprinkled through
+  # log/exception messages — which the `provider_label` hook now supplies.
+  #
+  # http_post_multipart takes the general 5-arg shape (file_field + mime); Zhipu's
+  # ASR call site uses a 3-arg shim defined on the adapter itself.
+  module HTTPClient
+    def http_post_json(url, body)
+      uri = URI.parse(url)
+      http = Net::HTTP.new(uri.host, uri.port)
+      http.use_ssl = (uri.scheme == "https")
+      http.open_timeout = 30
+      http.read_timeout = 240
+      req = Net::HTTP::Post.new(uri.request_uri)
+      req["Content-Type"]  = "application/json"
+      req["Authorization"] = "Bearer #{@api_key}"
+      req.body = body.to_json
+      SmartPrompt.logger.debug "#{provider_label} POST #{uri} body=#{body.to_json}"
+      resp = http.request(req)
+      if resp.is_a?(Net::HTTPSuccess)
+        resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
+      else
+        SmartPrompt.logger.error "#{provider_label} API error: #{resp.code} - #{resp.body}"
+        raise LLMAPIError, "#{provider_label} API error: #{resp.code} - #{resp.body}"
+      end
+    end
+    def http_get_json(url)
+      uri = URI.parse(url)
+      http = Net::HTTP.new(uri.host, uri.port)
+      http.use_ssl = (uri.scheme == "https")
+      http.open_timeout = 30
+      http.read_timeout = 60
+      req = Net::HTTP::Get.new(uri.request_uri)
+      req["Authorization"] = "Bearer #{@api_key}"
+      SmartPrompt.logger.debug "#{provider_label} GET #{uri}"
+      resp = http.request(req)
+      if resp.is_a?(Net::HTTPSuccess)
+        resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
+      else
+        raise LLMAPIError, "#{provider_label} API error: #{resp.code} - #{resp.body}"
+      end
+    end
+    # Returns the raw response body bytes (for binary payloads like TTS audio).
+    def http_post_binary(url, body)
+      uri = URI.parse(url)
+      http = Net::HTTP.new(uri.host, uri.port)
+      http.use_ssl = (uri.scheme == "https")
+      http.open_timeout = 30
+      http.read_timeout = 120
+      req = Net::HTTP::Post.new(uri.request_uri)
+      req["Content-Type"]  = "application/json"
+      req["Authorization"] = "Bearer #{@api_key}"
+      req.body = body.to_json
+      resp = http.request(req)
+      if resp.is_a?(Net::HTTPSuccess)
+        resp.body
+      else
+        raise LLMAPIError, "#{provider_label} TTS API error: #{resp.code} - #{resp.body}"
+      end
+    end
+    # multipart/form-data POST with a file upload (ASR, voice upload). Returns parsed JSON.
+    def http_post_multipart(url, form, file_field, file_path, mime)
+      uri = URI.parse(url)
+      http = Net::HTTP.new(uri.host, uri.port)
+      http.use_ssl = (uri.scheme == "https")
+      http.open_timeout = 30
+      http.read_timeout = 180
+      boundary = "----SmartPrompt#{object_id}"
+      body = +""
+      form.each do |k, v|
+        body << "--#{boundary}\r\n"
+        body << "Content-Disposition: form-data; name=\"#{k}\"\r\n\r\n"
+        body << "#{v}\r\n"
+      end
+      File.open(file_path, "rb") do |f|
+        body << "--#{boundary}\r\n"
+        body << "Content-Disposition: form-data; name=\"#{file_field}\"; filename=\"#{File.basename(file_path)}\"\r\n"
+        body << "Content-Type: #{mime}\r\n\r\n"
+        body << f.read
+        body << "\r\n"
+      end
+      body << "--#{boundary}--\r\n"
+      req = Net::HTTP::Post.new(uri.request_uri)
+      req["Content-Type"]  = "multipart/form-data; boundary=#{boundary}"
+      req["Authorization"] = "Bearer #{@api_key}"
+      req.body = body
+      resp = http.request(req)
+      if resp.is_a?(Net::HTTPSuccess)
+        resp.body.to_s.empty? ? {} : JSON.parse(resp.body)
+      else
+        raise LLMAPIError, "#{provider_label} multipart API error: #{resp.code} - #{resp.body}"
+      end
+    end
+    # POST with stream:true and yield each parsed SSE `data:` payload to the block.
+    def stream_chat(url, body)
+      uri = URI.parse(url)
+      http = Net::HTTP.new(uri.host, uri.port)
+      http.use_ssl = (uri.scheme == "https")
+      http.open_timeout = 30
+      http.read_timeout = 300
+      req = Net::HTTP::Post.new(uri.request_uri)
+      req["Content-Type"]  = "application/json"
+      req["Authorization"] = "Bearer #{@api_key}"
+      req["Accept"]        = "text/event-stream"
+      req.body = body.to_json
+      buffer = +""
+      done = false
+      http.request(req) do |response|
+        unless response.is_a?(Net::HTTPSuccess)
+          raise LLMAPIError, "#{provider_label} stream error: #{response.code} - #{response.body}"
+        end
+        response.read_body do |segment|
+          break if done
+          buffer << segment
+          while (idx = buffer.index("\n"))
+            line = buffer.slice!(0, idx + 1).strip
+            next if line.empty? || !line.start_with?("data:")
+            payload = line.sub(/\Adata:\s*/, "")
+            if payload == "[DONE]"
+              done = true
+              break
+            end
+            begin
+              yield JSON.parse(payload)
+            rescue JSON::ParserError
+              next
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/smart_prompt/concerns/image_persistence.rb ADDED Viewed

@@ -0,0 +1,62 @@
+require "base64"
+require "net/http"
+require "uri"
+require "fileutils"
+module SmartPrompt
+  # Shared image-saving logic for adapters that produce generated images (ZhipuAI,
+  # SenseNova, SiliconFlow). Each previously carried a byte-identical copy of
+  # save_image / save_single_image; this concern is the single source.
+  #
+  # Adapters override two hooks:
+  #   * default_image_prefix — filename prefix when the caller passes none
+  #     (e.g. "zhipu_image", "sensenova_image", "siliconflow_image")
+  #   * provider_label       — human label for the "Saved N <label> image(s)" log line
+  module ImagePersistence
+    # Save one or many generated images to disk. Accepts the Array returned by
+    # generate_image/edit_image or a single image hash. Returns the written paths.
+    def save_image(image_data, output_dir = "./output", filename_prefix = nil)
+      FileUtils.mkdir_p(output_dir)
+      images = image_data.is_a?(Array) ? image_data : [image_data]
+      saved = images.each_with_index.map do |img, index|
+        save_single_image(img, output_dir, "#{filename_prefix || default_image_prefix}_#{index + 1}")
+      end
+      SmartPrompt.logger.info "Saved #{saved.size} #{provider_label} image(s) to #{output_dir}"
+      saved
+    end
+    def save_single_image(image_data, output_dir, filename)
+      if image_data[:b64_json]
+        file_path = File.join(output_dir, "#{filename}.png")
+        File.binwrite(file_path, Base64.decode64(image_data[:b64_json]))
+      elsif image_data[:url]
+        uri = URI.parse(image_data[:url])
+        response = Net::HTTP.get_response(uri)
+        raise Error, "Failed to download image from URL: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
+        ext = case response["content-type"]
+              when "image/jpeg", "image/jpg" then "jpg"
+              when "image/png"               then "png"
+              when "image/gif"               then "gif"
+              when "image/webp"              then "webp"
+              else "png"
+              end
+        file_path = File.join(output_dir, "#{filename}.#{ext}")
+        File.binwrite(file_path, response.body)
+      else
+        raise Error, "No image data available to save"
+      end
+      file_path
+    end
+    # ---- hooks (override in adapter) -----------------------------------------
+    def default_image_prefix
+      "image"
+    end
+    def provider_label
+      "Adapter"
+    end
+  end
+end