RubyGems - smart_prompt - Versions diffs - 0.5.2 → 0.5.3 - Mend

smart_prompt 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/config/siliconflow_config.yml +95 -0
data/examples/siliconflow_example.rb +175 -0
data/lib/smart_prompt/adapters/siliconflow/embed.rb +33 -0
data/lib/smart_prompt/adapters/siliconflow/image.rb +103 -0
data/lib/smart_prompt/adapters/siliconflow/rerank.rb +41 -0
data/lib/smart_prompt/adapters/siliconflow/text.rb +54 -0
data/lib/smart_prompt/adapters/siliconflow/video.rb +111 -0
data/lib/smart_prompt/adapters/siliconflow/voice.rb +102 -0
data/lib/smart_prompt/adapters/zhipu/embed.rb +32 -0
data/lib/smart_prompt/adapters/zhipu/image.rb +59 -0
data/lib/smart_prompt/adapters/zhipu/rerank.rb +17 -0
data/lib/smart_prompt/adapters/zhipu/text.rb +57 -0
data/lib/smart_prompt/adapters/zhipu/video.rb +101 -0
data/lib/smart_prompt/adapters/zhipu/voice.rb +55 -0
data/lib/smart_prompt/concerns/http_client.rb +147 -0
data/lib/smart_prompt/concerns/image_persistence.rb +62 -0
data/lib/smart_prompt/concerns/multimodal_messages.rb +108 -0
data/lib/smart_prompt/concerns/openai_chat_shaping.rb +87 -0
data/lib/smart_prompt/siliconflow_adapter.rb +91 -0
data/lib/smart_prompt/version.rb +1 -1
data/workers/siliconflow_workers.rb +167 -0
metadata +21 -1

data/lib/smart_prompt/concerns/multimodal_messages.rb ADDED Viewed

@@ -0,0 +1,108 @@
+require "base64"
+require "net/http"
+require "uri"
+module SmartPrompt
+  # Shared multimodal-message normalization for Net::HTTP adapters (ZhipuAI, SenseNova,
+  # SiliconFlow). Turns an OpenAI-style content array into the shape the provider expects,
+  # inlining local image/audio/video files as base64 data URLs and passing http(s)/data
+  # URLs through. Each adapter previously carried a near-identical copy of this logic.
+  #
+  # SiliconFlow's variant is the superset (image_url + video_url + audio_url, preserving
+  # detail/max_frames/fps); Zhipu/SenseNova only ever send image_url, which is a subset.
+  module MultimodalMessages
+    SUPPORTED_IMAGE_FORMATS = %w[jpg jpeg png gif bmp webp].freeze
+    def process_multimodal_messages(messages)
+      messages.map do |msg|
+        role = msg[:role] || msg["role"]
+        content = msg[:content] || msg["content"]
+        content = content.map { |item| normalize_content_item(item) } if content.is_a?(Array)
+        { "role" => role, "content" => content }
+      end
+    end
+    def normalize_content_item(item)
+      return { "type" => "text", "text" => item.to_s } unless item.is_a?(Hash)
+      case item[:type] || item["type"]
+      when "image_url"
+        normalize_media_part(item, "image_url", :image)
+      when "video_url"
+        normalize_media_part(item, "video_url", :video)
+      when "audio_url"
+        normalize_media_part(item, "audio_url", :audio)
+      else
+        stringify_hash(item)
+      end
+    end
+    # Build an image_url/video_url/audio_url part, inlining local files as data URLs and
+    # preserving any extra keys (detail, max_frames, fps) on the media hash.
+    def normalize_media_part(item, type, media_kind)
+      iu = item[type.to_sym] || item[type]
+      if iu.is_a?(Hash)
+        url = iu[:url] || iu["url"]
+        part = { "type" => type, type => { "url" => normalize_media_url(url, media_kind) } }
+        iu.each { |k, v| part[type][k.to_s] = stringify_hash(v) unless k.to_s == "url" }
+        part
+      else
+        { "type" => type, type => { "url" => normalize_media_url(iu, media_kind) } }
+      end
+    end
+    # Resolve a media URL embedded in a message: http(s)/data pass through; a local path
+    # is base64-encoded as a data URL.
+    def normalize_media_url(url, kind = :image)
+      return url if url.nil?
+      return url if url.start_with?("http://", "https://", "data:")
+      label = kind == :image ? "Image" : kind.to_s.capitalize
+      raise Error, "#{label} file not found: #{url}" unless File.exist?(url)
+      ext = File.extname(url).downcase.delete(".")
+      case kind
+      when :image
+        raise Error, "Unsupported image format: #{ext}" unless SUPPORTED_IMAGE_FORMATS.include?(ext)
+        mime = ext == "jpg" ? "jpeg" : ext
+        "data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(url))}"
+      when :audio
+        "data:audio/#{ext.empty? ? 'wav' : ext};base64,#{Base64.strict_encode64(File.binread(url))}"
+      when :video
+        "data:video/#{ext.empty? ? 'mp4' : ext};base64,#{Base64.strict_encode64(File.binread(url))}"
+      end
+    end
+    # Single-arg image-only shim (call sites like generate_video pass a plain image URL).
+    def normalize_image_url(url)
+      normalize_media_url(url, :image)
+    end
+    # Accept a local path, a base64 data URL, or an http(s) URL for image-edit /
+    # image-to-video `image` fields.
+    def normalize_input_image(image)
+      return image if image.nil?
+      if image.is_a?(String)
+        return image if image.start_with?("data:")
+        return image if image.start_with?("http://", "https://")
+      end
+      raise Error, "Image file not found: #{image}" unless File.exist?(image)
+      ext = File.extname(image).downcase.delete(".")
+      raise Error, "Unsupported image format: #{ext}" unless SUPPORTED_IMAGE_FORMATS.include?(ext)
+      mime = ext == "jpg" ? "jpeg" : ext
+      "data:image/#{mime};base64,#{Base64.strict_encode64(File.binread(image))}"
+    end
+    def stringify_hash(hash)
+      case hash
+      when Hash
+        hash.each_with_object({}) { |(k, v), memo| memo[k.to_s] = stringify_hash(v) }
+      when Array
+        hash.map { |v| stringify_hash(v) }
+      else
+        hash
+      end
+    end
+  end
+end

data/lib/smart_prompt/concerns/openai_chat_shaping.rb ADDED Viewed

@@ -0,0 +1,87 @@
+module SmartPrompt
+  # Shared shaping of Net::HTTP chat responses into the OpenAI completion / stream
+  # shape that the rest of SmartPrompt (Engine#@stream_proc, Conversation) expects.
+  #
+  # Reasoning models expose a thinking trace under a provider-specific field —
+  # surfaced here uniformly as `reasoning_content`. Adapters override one hook:
+  #
+  #   reasoning_field_name  — the source field on message/delta (default
+  #     "reasoning_content"; SenseNova uses "reasoning"). Its value is remapped to
+  #     reasoning_content so Engine#@stream_proc needs no per-provider logic.
+  #
+  #   extra_top_level_fields(raw) — extra top-level keys to copy onto the shaped
+  #     response/chunk (default {}; SenseNova adds system_fingerprint).
+  module OpenAIChatShaping
+    def build_completion_response(raw)
+      msg = raw.dig("choices", 0, "message") || {}
+      message = { "role" => msg["role"] || "assistant" }
+      message["content"] = msg["content"]
+      reasoning = msg[reasoning_field_name]
+      message["reasoning_content"] = reasoning if reasoning
+      message["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
+      response = {
+        "id"      => raw["id"],
+        "object"  => raw["object"] || "chat.completion",
+        "created" => raw["created"],
+        "model"   => raw["model"],
+        "choices" => [{
+          "index"         => 0,
+          "message"       => message,
+          "finish_reason" => raw.dig("choices", 0, "finish_reason"),
+        }],
+      }
+      response["usage"] = raw["usage"] if raw["usage"]
+      merge_extra_top_level(response, raw)
+      response
+    end
+    def build_stream_chunk(data)
+      chunk = {
+        "id"      => data["id"],
+        "object"  => data["object"],
+        "created" => data["created"],
+        "model"   => data["model"],
+      }
+      chunk["usage"] = data["usage"] if data["usage"]
+      merge_extra_top_level(chunk, data)
+      choices = data["choices"] || []
+      if choices.any?
+        delta = choices[0]["delta"] || {}
+        new_delta = {}
+        new_delta["role"]              = delta["role"]        if delta["role"]
+        new_delta["content"]           = delta["content"]     if delta["content"]
+        reasoning = delta[reasoning_field_name]
+        new_delta["reasoning_content"] = reasoning if reasoning
+        new_delta["tool_calls"]        = delta["tool_calls"]  if delta["tool_calls"]
+        chunk["choices"] = [{
+          "index"         => choices[0]["index"] || 0,
+          "delta"         => new_delta,
+          "finish_reason" => choices[0]["finish_reason"],
+        }]
+      else
+        chunk["choices"] = []
+      end
+      chunk
+    end
+    # ---- hooks (override in adapter) -----------------------------------------
+    def reasoning_field_name
+      "reasoning_content"
+    end
+    def extra_top_level_fields(_raw)
+      {}
+    end
+    private
+    def merge_extra_top_level(target, raw)
+      extra_top_level_fields(raw).each do |k, v|
+        target[k] = v unless v.nil? || target.key?(k)
+      end
+    end
+  end
+end

data/lib/smart_prompt/siliconflow_adapter.rb ADDED Viewed

@@ -0,0 +1,91 @@
+require "base64"
+require "json"
+require "net/http"
+require "uri"
+require "fileutils"
+require_relative "concerns/image_persistence"
+require_relative "concerns/openai_chat_shaping"
+require_relative "concerns/multimodal_messages"
+require_relative "concerns/http_client"
+require_relative "adapters/siliconflow/text"
+require_relative "adapters/siliconflow/embed"
+require_relative "adapters/siliconflow/image"
+require_relative "adapters/siliconflow/video"
+require_relative "adapters/siliconflow/voice"
+require_relative "adapters/siliconflow/rerank"
+module SmartPrompt
+  # Adapter for 硅基流动 (SiliconFlow / SiliconCloud) — one adapter owns the whole
+  # provider: every category shares the base URL https://api.siliconflow.cn/v1 and
+  # Bearer auth.
+  #
+  # Per-modality behavior lives in capability modules under adapters/siliconflow/
+  # (Text / Embed / Image / Video / Voice / Rerank); cross-provider plumbing (HTTP,
+  # multimodal normalization, chat shaping, image saving) comes from the shared
+  # concerns. This class wires them together + holds config/credentials.
+  #
+  # Provider-specific quirks (all vs https://docs.siliconflow.cn/cn/api-reference):
+  #   chat/vision — POST {base}/chat/completions (reasoning_content, no remap)
+  #   embeddings  — POST {base}/embeddings        (dimensions only for Qwen3-Embedding)
+  #   rerank      — POST {base}/rerank            (results[].relevance_score)
+  #   image/edit  — POST {base}/images/generations (images[].url; image_size/batch_size/guidance_scale)
+  #   video       — POST {base}/video/submit -> POST {base}/video/status (async; results.videos[].url)
+  #   tts         — POST {base}/audio/speech       (binary audio response)
+  #   asr         — POST {base}/audio/transcriptions (multipart, field "file")
+  #   voice       — /uploads/audio/voice, /audio/voice/list, /audio/voice/deletions
+  class SiliconFlowAdapter < LLMAdapter
+    DEFAULT_BASE_URL = "https://api.siliconflow.cn/v1".freeze
+    # Cross-provider shared concerns
+    include ImagePersistence
+    include OpenAIChatShaping
+    include MultimodalMessages
+    include HTTPClient
+    # Per-capability modules
+    include SiliconFlow::Text
+    include SiliconFlow::Embed
+    include SiliconFlow::Image
+    include SiliconFlow::Video
+    include SiliconFlow::Voice
+    include SiliconFlow::Rerank
+    # ---- hooks for shared concerns -------------------------------------------
+    def provider_label
+      "SiliconFlow"
+    end
+    def default_image_prefix
+      "siliconflow_image"
+    end
+    def initialize(config)
+      super
+      SmartPrompt.logger.info "Start create the SmartPrompt SiliconFlowAdapter."
+      api_key = @config["api_key"]
+      if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
+        api_key = eval(api_key)
+      end
+      # Tolerate a missing key at construction (e.g. when the ENV var isn't set yet)
+      # and let the first request fail with a clear auth error.
+      SmartPrompt.logger.warn "SiliconFlow api_key is empty — API calls will fail until it is set." if api_key.nil? || api_key.to_s.strip.empty?
+      @api_key  = api_key
+      @base_url = (@config["url"] || DEFAULT_BASE_URL).to_s.chomp("/")
+      # Optional per-method URL overrides (default to the standard paths off @base_url).
+      @image_url         = (@config["image_url"]         || "#{@base_url}/images/generations").to_s
+      @video_submit_url  = (@config["video_submit_url"]  || "#{@base_url}/video/submit").to_s
+      @video_status_url  = (@config["video_status_url"]  || "#{@base_url}/video/status").to_s
+      @speech_url        = (@config["speech_url"]        || "#{@base_url}/audio/speech").to_s
+      @transcription_url = (@config["transcription_url"] || "#{@base_url}/audio/transcriptions").to_s
+      @voice_upload_url  = (@config["voice_upload_url"]  || "#{@base_url}/uploads/audio/voice").to_s
+      @voice_list_url    = (@config["voice_list_url"]    || "#{@base_url}/audio/voice/list").to_s
+      @voice_delete_url  = (@config["voice_delete_url"]  || "#{@base_url}/audio/voice/deletions").to_s
+      SmartPrompt.logger.info "SiliconFlow base_url=#{@base_url}"
+    rescue => e
+      SmartPrompt.logger.error "Failed to initialize SiliconFlow client: #{e.message}"
+      raise e.is_a?(SmartPrompt::Error) ? e : LLMAPIError, "Invalid SiliconFlow configuration: #{e.message}"
+    end
+  end
+end

data/lib/smart_prompt/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SmartPrompt
-  VERSION = "0.5.2"
+  VERSION = "0.5.3"
 end

data/workers/siliconflow_workers.rb ADDED Viewed

@@ -0,0 +1,167 @@
+# 硅基流动 (SiliconFlow / SiliconCloud) workers for SmartPrompt
+#
+# One worker per model category, reusing the standard DSL (`use`, `model`, `sys_msg`,
+# `prompt`, `send_msg`) and the media helpers. Chat/vision/embed/image/image-edit go
+# through Conversation-delegated methods; rerank/video/tts/asr reach the adapter
+# directly via engine.llms[...] (the methods Conversation does not delegate).
+#
+# `send_msg` transparently becomes streaming when the engine invokes the worker via
+# call_worker_by_stream — so :siliconflow_chat serves both sync and stream callers.
+# 1. 文本对话 (sync + stream)
+SmartPrompt.define_worker :siliconflow_chat do
+  use "sf_chat"
+  model params[:model] if params[:model]
+  sys_msg(params[:system] || "你是一个有帮助的中文助手，回答简洁准确。", params)
+  prompt(params[:prompt] || "你好，请用一句话介绍硅基流动 SiliconFlow。")
+  send_msg
+end
+# 2. 多模态对话 (vision / video / audio). Accepts image_url/video_url/audio_url,
+#    a single media url, or arrays (image_urls).
+SmartPrompt.define_worker :siliconflow_vision do
+  use "sf_vision"
+  model params[:model] if params[:model]
+  sys_msg("你是一个专业的多模态分析助手，能够准确描述和分析图像/视频/音频内容。", params)
+  content = [{ type: "text", text: params[:question] || "请描述这张图片中的内容。" }]
+  ([params[:image_url]] + (params[:image_urls] || [])).compact.uniq.each do |url|
+    content << { type: "image_url", image_url: { url: url } }
+  end
+  content << { type: "video_url", video_url: { url: params[:video_url] } } if params[:video_url]
+  content << { type: "audio_url", audio_url: { url: params[:audio_url] } } if params[:audio_url]
+  add_message({ role: "user", content: content })
+  send_msg
+end
+# 3. 向量模型 (embeddings). Returns a normalized numeric vector of the user text.
+SmartPrompt.define_worker :siliconflow_embed do
+  use "sf_embed"
+  model params[:model] if params[:model]
+  prompt(params[:text] || "硅基流动 SiliconFlow 大模型")
+  embeddings(params[:length] || 1024)
+end
+# 4. 重排 (rerank). Reorders params[:documents] by relevance to params[:query].
+#    Conversation does not delegate rerank, so we reach the adapter directly.
+SmartPrompt.define_worker :siliconflow_rerank do
+  use "sf_rerank"
+  model params[:model] if params[:model]
+  adapter = engine.llms["sf_rerank"]
+  adapter.rerank(
+    params[:query],
+    params[:documents] || [],
+    model: params[:model],
+    top_n: params[:top_n],
+    return_documents: params[:return_documents],
+  )
+end
+# 5. 文生图 (text-to-image). Returns the generated image(s); optionally saves to disk.
+SmartPrompt.define_worker :siliconflow_image do
+  use "sf_image"
+  model params[:model] if params[:model]
+  images = generate_image(params[:prompt], {
+    model: params[:model],
+    negative_prompt: params[:negative_prompt],
+    image_size: params[:image_size] || params[:size],
+    batch_size: params[:batch_size] || params[:n],
+    seed: params[:seed],
+    num_inference_steps: params[:num_inference_steps],
+    guidance_scale: params[:guidance_scale],
+  })
+  if params[:save_to_file]
+    saved = save_image(images, params[:output_dir] || "./generated_images", params[:filename_prefix] || "siliconflow")
+    { images: images, saved_files: saved }
+  else
+    images
+  end
+end
+# 6. 图像编辑 / 图生图 (Qwen-Image-Edit). Accepts image (and image2/image3 for
+#    multi-image fusion) as local path, data URL, or http URL.
+SmartPrompt.define_worker :siliconflow_image_edit do
+  use "sf_image"
+  model params[:model] || "Qwen/Qwen-Image-Edit-2509"
+  images = edit_image(params[:prompt], {
+    model: params[:model] || "Qwen/Qwen-Image-Edit-2509",
+    image: params[:image] || params[:image_file],
+    image2: params[:image2],
+    image3: params[:image3],
+    negative_prompt: params[:negative_prompt],
+    seed: params[:seed],
+    guidance_scale: params[:guidance_scale],
+  })
+  if params[:save_to_file]
+    saved = save_image(images, params[:output_dir] || "./edited_images", params[:filename_prefix] || "siliconflow_edit")
+    { images: images, saved_files: saved }
+  else
+    images
+  end
+end
+# 7. 文生视频 / 图生视频 (async: submit -> poll -> download).
+SmartPrompt.define_worker :siliconflow_video do
+  use "sf_video"
+  model params[:model] if params[:model]
+  adapter = engine.llms["sf_video"]
+  submitted = adapter.generate_video(params[:prompt], params)
+  result = { submitted: submitted }
+  if params[:wait_for_completion]
+    completed = adapter.wait_for_video_completion(
+      submitted[:request_id],
+      check_interval: params[:check_interval] || 10,
+      timeout: params[:timeout] || 600
+    )
+    if completed[:video_url] && params[:download_to_file]
+      output_dir = params[:output_dir] || "./generated_videos"
+      prefix = params[:filename_prefix] || "siliconflow_video"
+      output_path = File.join(output_dir, "#{prefix}_#{submitted[:request_id]}.mp4")
+      downloaded = adapter.download_video(completed[:video_url], output_path)
+      result = { submitted: submitted, video: completed, downloaded_file: downloaded }
+    else
+      result = { submitted: submitted, video: completed }
+    end
+  end
+  result
+end
+# 8. 语音合成 (TTS — CosyVoice2 / MOSS-TTSD). Saves the synthesized audio to disk.
+SmartPrompt.define_worker :siliconflow_tts do
+  use "sf_tts"
+  model params[:model] if params[:model]
+  adapter = engine.llms["sf_tts"]
+  output_path = params[:output_path] || "./generated_audio/siliconflow_tts.mp3"
+  info = adapter.synthesize_to_file(
+    params[:text],
+    output_path,
+    voice: params[:voice],
+    model: params[:model],
+    response_format: params[:response_format] || "mp3",
+    speed: params[:speed],
+    language: params[:language],
+  )
+  info
+end
+# 9. 语音识别 (ASR — SenseVoiceSmall). Transcribes a local audio file.
+SmartPrompt.define_worker :siliconflow_asr do
+  use "sf_asr"
+  model params[:model] if params[:model]
+  adapter = engine.llms["sf_asr"]
+  adapter.transcribe_audio(
+    params[:audio_file],
+    model: params[:model],
+    language: params[:language],
+  )
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: smart_prompt
 version: !ruby/object:Gem::Version
-  version: 0.5.2
+  version: 0.5.3
 platform: ruby
 authors:
 - zhuang biaowei
@@ -152,6 +152,7 @@ files:
 - config/image_generation_config.yml
 - config/multimodal_config.yml
 - config/sensenova_config.yml
+- config/siliconflow_config.yml
 - config/zhipu_config.yml
 - docs/ANTHROPIC_EXAMPLES.md
 - docs/CONVERSATION_INTEGRATION_SUMMARY.md
@@ -176,14 +177,31 @@ files:
 - examples/multimodal_example.rb
 - examples/relevance_based_strategy_example.rb
 - examples/sensenova_example.rb
+- examples/siliconflow_example.rb
 - examples/stt_example.rb
 - examples/tts_example.rb
 - examples/video_generation_example.rb
 - examples/zhipu_example.rb
 - lib/smart_prompt.rb
+- lib/smart_prompt/adapters/siliconflow/embed.rb
+- lib/smart_prompt/adapters/siliconflow/image.rb
+- lib/smart_prompt/adapters/siliconflow/rerank.rb
+- lib/smart_prompt/adapters/siliconflow/text.rb
+- lib/smart_prompt/adapters/siliconflow/video.rb
+- lib/smart_prompt/adapters/siliconflow/voice.rb
+- lib/smart_prompt/adapters/zhipu/embed.rb
+- lib/smart_prompt/adapters/zhipu/image.rb
+- lib/smart_prompt/adapters/zhipu/rerank.rb
+- lib/smart_prompt/adapters/zhipu/text.rb
+- lib/smart_prompt/adapters/zhipu/video.rb
+- lib/smart_prompt/adapters/zhipu/voice.rb
 - lib/smart_prompt/anthropic_adapter.rb
 - lib/smart_prompt/api_handler.rb
 - lib/smart_prompt/compression_engine.rb
+- lib/smart_prompt/concerns/http_client.rb
+- lib/smart_prompt/concerns/image_persistence.rb
+- lib/smart_prompt/concerns/multimodal_messages.rb
+- lib/smart_prompt/concerns/openai_chat_shaping.rb
 - lib/smart_prompt/context_strategy.rb
 - lib/smart_prompt/conversation.rb
 - lib/smart_prompt/db_adapter.rb
@@ -202,6 +220,7 @@ files:
 - lib/smart_prompt/relevance_based_strategy.rb
 - lib/smart_prompt/sensenova_adapter.rb
 - lib/smart_prompt/session.rb
+- lib/smart_prompt/siliconflow_adapter.rb
 - lib/smart_prompt/sliding_window_strategy.rb
 - lib/smart_prompt/stt_adapter.rb
 - lib/smart_prompt/summary_based_strategy.rb
@@ -216,6 +235,7 @@ files:
 - workers/image_generation_workers.rb
 - workers/multimodal_workers.rb
 - workers/sensenova_workers.rb
+- workers/siliconflow_workers.rb
 - workers/stt_workers.rb
 - workers/tts_workers.rb
 - workers/video_generation_workers.rb