RubyGems - smart_prompt - Versions diffs - 0.5.0 → 0.5.1 - Mend

smart_prompt 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +18 -2
data/README.cn.md +55 -4
data/README.md +55 -4
data/docs/ANTHROPIC_EXAMPLES.md +559 -0
data/docs/CONVERSATION_INTEGRATION_SUMMARY.md +155 -0
data/docs/HISTORY_EXAMPLES_README.md +533 -0
data/docs/HISTORY_MANAGEMENT_GUIDE.md +797 -0
data/docs/MONITORING_GUIDE.md +278 -0
data/docs/MULTIMODAL_README.md +265 -0
data/docs/RELEVANCE_BASED_STRATEGY_IMPLEMENTATION.md +124 -0
data/docs/STT_README.md +302 -0
data/docs/TTS_README.md +303 -0
data/docs/VIDEO_GENERATION_README.md +246 -0
data/docs/delete_files_list.md +124 -0
data/lib/smart_prompt/anthropic_adapter.rb +167 -140
data/lib/smart_prompt/conversation.rb +195 -42
data/lib/smart_prompt/engine.rb +20 -10
data/lib/smart_prompt/openai_adapter.rb +25 -1
data/lib/smart_prompt/version.rb +1 -1
data/lib/smart_prompt/worker.rb +5 -2
data/lib/smart_prompt.rb +2 -1
metadata +33 -22

data/lib/smart_prompt/anthropic_adapter.rb CHANGED Viewed

@@ -1,7 +1,7 @@
-require "anthropic"
-require "base64"
-require "uri"
-require "json"
+require "anthropic"
+require "base64"
+require "uri"
+require "json"
 module SmartPrompt
   class AnthropicAdapter < LLMAdapter
@@ -78,7 +78,10 @@ module SmartPrompt
             content
           end
-        { role: role, content: [converted_content] }
+        # String/scalar content becomes a single-element block array;
+        # already-array (multimodal) content must not be double-wrapped.
+        final_content = converted_content.is_a?(Array) ? converted_content : [converted_content]
+        { role: role, content: final_content }
       end
     end
@@ -148,139 +151,163 @@ module SmartPrompt
           input_schema: parameters,
         }
       end.compact # Remove nil values from failed conversions
-    end
-    def convert_response_to_openai_format(response)
-      begin
-        # Normalize response to a Hash with symbol keys
-        raw_response = if response.respond_to?(:to_h)
-                         response.to_h
-                       elsif response.is_a?(Hash)
-                         response
-                       else
-                         JSON.parse(response.to_json)
-                       end
-        response_hash = deep_symbolize(raw_response)
-        # Handle content blocks (text, tool_use, etc.)
-        content_blocks = response_hash[:content] || []
-        text_content = ""
-        tool_calls = []
-        case content_blocks
-        when String
-          text_content = content_blocks
-        when Array
-          content_blocks.each do |block|
-            block_hash = block.respond_to?(:to_h) ? block.to_h : block
-            block_hash = deep_symbolize(block_hash)
-            next unless block_hash.is_a?(Hash)
-            case block_hash[:type]
-            when "text"
-              text_content << block_hash[:text].to_s
-            when "tool_use"
-              tool_calls << {
-                "index" => tool_calls.size,
-                "id" => block_hash[:id] || "tool_call_#{tool_calls.size}",
-                "type" => "function",
-                "function" => {
-                  "name" => block_hash[:name],
-                  "arguments" => JSON.generate(block_hash[:input] || {}),
-                },
-              }
-            end
-          end
-        else
-          text_content = content_blocks.to_s
-        end
-        # Map stop reason to OpenAI finish_reason semantics
-        stop_reason = response_hash[:stop_reason] || response_hash[:finish_reason]
-        finish_reason = case stop_reason
-                        when "tool_use"
-                          "tool_calls"
-                        when "end_turn", nil
-                          "stop"
-                        else
-                          stop_reason
-                        end
-        # Map usage information
-        usage = response_hash[:usage] || {}
-        prompt_tokens = usage[:input_tokens]
-        completion_tokens = usage[:output_tokens]
-        cache_read_tokens = usage[:cache_read_input_tokens]
-        cache_creation_tokens = usage[:cache_creation_input_tokens]
-        total_tokens = if prompt_tokens || completion_tokens
-                         [prompt_tokens, completion_tokens].compact.sum
-                       end
-        prompt_cache_hit_tokens = cache_read_tokens
-        prompt_cache_miss_tokens = if prompt_tokens && cache_read_tokens
-                                     prompt_tokens - cache_read_tokens
-                                   end
-        prompt_tokens_details = {}
-        prompt_tokens_details["cached_tokens"] = cache_read_tokens if cache_read_tokens
-        usage_hash = {}
-        usage_hash["prompt_tokens"] = prompt_tokens if prompt_tokens
-        usage_hash["completion_tokens"] = completion_tokens if completion_tokens
-        usage_hash["total_tokens"] = total_tokens if total_tokens
-        usage_hash["prompt_tokens_details"] = prompt_tokens_details unless prompt_tokens_details.empty?
-        usage_hash["prompt_cache_hit_tokens"] = prompt_cache_hit_tokens if prompt_cache_hit_tokens
-        usage_hash["prompt_cache_miss_tokens"] = prompt_cache_miss_tokens if prompt_cache_miss_tokens
-        created_ts = response_hash[:created_at] || response_hash[:created] || Time.now.to_i
-        message_role = response_hash[:role] || "assistant"
-        openai_response = {
-          "id" => response_hash[:id],
-          "object" => "chat.completion",
-          "created" => created_ts,
-          "model" => response_hash[:model],
-          "choices" => [
-            {
-              "index" => 0,
-              "message" => {
-                "role" => message_role,
-                "content" => text_content.empty? ? nil : text_content,
-              },
-              "finish_reason" => finish_reason,
-            },
-          ],
-        }
-        unless tool_calls.empty?
-          openai_response["choices"][0]["message"]["tool_calls"] = tool_calls
-        end
-        openai_response["usage"] = usage_hash unless usage_hash.empty?
-        openai_response["system_fingerprint"] = response_hash[:system_fingerprint] if response_hash[:system_fingerprint]
-        @last_response = openai_response
-        openai_response
-      rescue => e
-        SmartPrompt.logger.error "Failed to convert Anthropic response: #{e.message}"
-        raise LLMAPIError, "Failed to convert Anthropic response: #{e.message}"
-      end
-    end
-    # Deeply symbolize hash keys for consistent access
-    def deep_symbolize(obj)
-      case obj
-      when Hash
-        obj.each_with_object({}) do |(k, v), memo|
-          key = k.is_a?(String) || k.is_a?(Symbol) ? k.to_sym : k
-          memo[key] = deep_symbolize(v)
-        end
-      when Array
-        obj.map { |item| deep_symbolize(item) }
-      else
-        obj
-      end
-    end
+    end
+    # Extract plain text from an Anthropic response's `content` field.
+    # Handles a String, an Array of content blocks, nil, or an empty array.
+    # @param response [Hash] Anthropic response (or its `content` value)
+    # @return [String] Concatenated text, with multiple text blocks joined by newlines
+    def extract_content_from_response(response)
+      content = if response.is_a?(Hash)
+                  response["content"] || response[:content]
+                else
+                  response
+                end
+      case content
+      when String
+        content
+      when Array
+        content.map do |block|
+          next block unless block.is_a?(Hash)
+          block["text"] || block[:text]
+        end.compact.reject(&:empty?).join("\n")
+      else
+        content.to_s
+      end
+    end
+    def convert_response_to_openai_format(response)
+      begin
+        # Normalize response to a Hash with symbol keys
+        raw_response = if response.respond_to?(:to_h)
+                         response.to_h
+                       elsif response.is_a?(Hash)
+                         response
+                       else
+                         JSON.parse(response.to_json)
+                       end
+        response_hash = deep_symbolize(raw_response)
+        # Handle content blocks (text, tool_use, etc.)
+        content_blocks = response_hash[:content] || []
+        text_content = ""
+        tool_calls = []
+        case content_blocks
+        when String
+          text_content = content_blocks
+        when Array
+          content_blocks.each do |block|
+            block_hash = block.respond_to?(:to_h) ? block.to_h : block
+            block_hash = deep_symbolize(block_hash)
+            next unless block_hash.is_a?(Hash)
+            case block_hash[:type]
+            when "text"
+              text_content << block_hash[:text].to_s
+            when "tool_use"
+              tool_calls << {
+                "index" => tool_calls.size,
+                "id" => block_hash[:id] || "tool_call_#{tool_calls.size}",
+                "type" => "function",
+                "function" => {
+                  "name" => block_hash[:name],
+                  "arguments" => JSON.generate(block_hash[:input] || {}),
+                },
+              }
+            end
+          end
+        else
+          text_content = content_blocks.to_s
+        end
+        # Map stop reason to OpenAI finish_reason semantics
+        stop_reason = response_hash[:stop_reason] || response_hash[:finish_reason]
+        finish_reason = case stop_reason
+                        when "tool_use"
+                          "tool_calls"
+                        when "end_turn", nil
+                          "stop"
+                        else
+                          stop_reason
+                        end
+        # Map usage information
+        usage = response_hash[:usage] || {}
+        prompt_tokens = usage[:input_tokens]
+        completion_tokens = usage[:output_tokens]
+        cache_read_tokens = usage[:cache_read_input_tokens]
+        cache_creation_tokens = usage[:cache_creation_input_tokens]
+        total_tokens = if prompt_tokens || completion_tokens
+                         [prompt_tokens, completion_tokens].compact.sum
+                       end
+        prompt_cache_hit_tokens = cache_read_tokens
+        prompt_cache_miss_tokens = if prompt_tokens && cache_read_tokens
+                                     prompt_tokens - cache_read_tokens
+                                   end
+        prompt_tokens_details = {}
+        prompt_tokens_details["cached_tokens"] = cache_read_tokens if cache_read_tokens
+        usage_hash = {}
+        usage_hash["prompt_tokens"] = prompt_tokens if prompt_tokens
+        usage_hash["completion_tokens"] = completion_tokens if completion_tokens
+        usage_hash["total_tokens"] = total_tokens if total_tokens
+        usage_hash["prompt_tokens_details"] = prompt_tokens_details unless prompt_tokens_details.empty?
+        usage_hash["prompt_cache_hit_tokens"] = prompt_cache_hit_tokens if prompt_cache_hit_tokens
+        usage_hash["prompt_cache_miss_tokens"] = prompt_cache_miss_tokens if prompt_cache_miss_tokens
+        created_ts = response_hash[:created_at] || response_hash[:created] || Time.now.to_i
+        message_role = response_hash[:role] || "assistant"
+        openai_response = {
+          "id" => response_hash[:id],
+          "object" => "chat.completion",
+          "created" => created_ts,
+          "model" => response_hash[:model],
+          "choices" => [
+            {
+              "index" => 0,
+              "message" => {
+                "role" => message_role,
+                "content" => text_content.empty? ? nil : text_content,
+              },
+              "finish_reason" => finish_reason,
+            },
+          ],
+        }
+        unless tool_calls.empty?
+          openai_response["choices"][0]["message"]["tool_calls"] = tool_calls
+        end
+        openai_response["usage"] = usage_hash unless usage_hash.empty?
+        openai_response["system_fingerprint"] = response_hash[:system_fingerprint] if response_hash[:system_fingerprint]
+        @last_response = openai_response
+        openai_response
+      rescue => e
+        SmartPrompt.logger.error "Failed to convert Anthropic response: #{e.message}"
+        raise LLMAPIError, "Failed to convert Anthropic response: #{e.message}"
+      end
+    end
+    # Deeply symbolize hash keys for consistent access
+    def deep_symbolize(obj)
+      case obj
+      when Hash
+        obj.each_with_object({}) do |(k, v), memo|
+          key = k.is_a?(String) || k.is_a?(Symbol) ? k.to_sym : k
+          memo[key] = deep_symbolize(v)
+        end
+      when Array
+        obj.map { |item| deep_symbolize(item) }
+      else
+        obj
+      end
+    end
     public
@@ -290,8 +317,8 @@ module SmartPrompt
     # @param temperature [Float, nil] Temperature value (optional, uses config or 0.7 if nil)
     # @param tools [Array, nil] Array of tool definitions (optional)
     # @param proc [Proc, nil] Callback for streaming responses (optional)
-    # @return [Hash, nil] OpenAI-formatted response (nil for streaming mode)
-    def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
+    # @return [Hash, nil] OpenAI-formatted response (nil for streaming mode)
+    def send_request(messages, model = nil, temperature = nil, tools = nil, proc = nil)
       begin
         # Determine model name (parameter > config)
         model_name = model || @config["model"]

data/lib/smart_prompt/conversation.rb CHANGED Viewed

@@ -1,10 +1,23 @@
 require "yaml"
 require "retriable"
 require "numo/narray"
+require "base64"
 module SmartPrompt
   class Conversation
     include APIHandler
+    MODEL_REQUEST_OPTION_KEYS = %w[
+      max_tokens
+      max_completion_tokens
+      top_p
+      top_k
+      response_format
+      tool_choice
+      parallel_tool_calls
+      seed
+      stop
+    ].freeze
     attr_reader :messages, :last_response, :config_file
     attr_reader :last_call_id
     attr_reader :session_id
@@ -15,34 +28,66 @@ module SmartPrompt
       @engine = engine
       @adapters = engine.adapters
       @llms = engine.llms
+      @models = engine.models
       @current_llm_name = nil
       @templates = engine.templates
       @temperature = 0.7
       @current_adapter = engine.current_adapter
       @last_response = nil
       @tools = tools
+      @request_options = {}
+      @pending_content_parts = []
+      @thinking_enabled = nil
       @session_id = session_id
       @use_history_manager = false
     end
     def use(llm_name)
-      raise "LLM #{llm_name} not configured" unless @llms.key?(llm_name)
+      llm_name = llm_name.to_s
+      raise ConfigurationError, "LLM #{llm_name} not configured" unless @llms.key?(llm_name)
       @current_llm = @llms[llm_name]
       @current_llm_name = llm_name
       self
     end
+    def use_model(model_name)
+      model_name = model_name.to_s
+      model_config = @models[model_name] || @models[model_name.to_sym]
+      raise ConfigurationError, "Model #{model_name} not configured" unless model_config
+      llm_name = model_config["use"] || model_config[:use]
+      configured_model_name = model_config["model"] || model_config[:model]
+      raise ConfigurationError, "Model #{model_name} must define use" if llm_name.nil? || llm_name.empty?
+      raise ConfigurationError, "Model #{model_name} must define model" if configured_model_name.nil? || configured_model_name.empty?
+      use(llm_name)
+      model(configured_model_name)
+      merge_model_request_options(model_config)
+      self
+    end
     def model(model_name)
       @model_name = model_name
-      if @engine.config["better_prompt_db"]
-        BetterPrompt.add_model(@current_llm_name, @model_name)
-      end
     end
     def temperature(temperature)
       @temperature = temperature
     end
+    def request_options(options = {})
+      @request_options.merge!(options || {})
+      self
+    end
+    def thinking(enabled = true)
+      @thinking_enabled = enabled
+      if @sys_msg
+        @sys_msg = thinking_system_message(@sys_msg)
+        refresh_system_message(@sys_msg)
+      end
+      self
+    end
     def history_messages
       # If using HistoryManager, get messages from session
       if @use_history_manager && @engine.history_manager
@@ -77,32 +122,43 @@ module SmartPrompt
         SmartPrompt.logger.info "Use template #{template_name}"
         raise "Template #{template_name} not found" unless @templates.key?(template_name)
         content = @templates[template_name].render(params)
-        add_message({ role: "user", content: content }, with_history)
-        if @engine.config["better_prompt_db"]
-          BetterPrompt.add_prompt(template_name, "user", content)
-        end
+        add_user_content(content, with_history)
         self
       else
-        add_message({ role: "user", content: template_name }, with_history)
-        if @engine.config["better_prompt_db"]
-          BetterPrompt.add_prompt("NULL", "user", template_name)
-        end
+        add_user_content(template_name, with_history)
         self
       end
     end
     def sys_msg(message, params = {})
-      @sys_msg = message
-      add_message({ role: "system", content: message }, params[:with_history])
-      if @engine.config["better_prompt_db"]
-        BetterPrompt.add_prompt("NULL", "system", message)
-      end
+      @sys_msg = thinking_system_message(message)
+      add_message({ role: "system", content: @sys_msg }, params[:with_history])
+      self
+    end
+    def multimodal_prompt(parts, with_history: false)
+      add_message({ role: "user", content: normalize_content_parts(parts) }, with_history)
+      self
+    end
+    def image(source, token_budget: nil, **metadata)
+      @pending_content_parts << media_part("image", source, token_budget: token_budget, **metadata)
+      self
+    end
+    def audio(source, **metadata)
+      @pending_content_parts << media_part("audio", source, **metadata)
+      self
+    end
+    def video(source, fps: nil, max_seconds: nil, **metadata)
+      @pending_content_parts << media_part("video", source, fps: fps, max_seconds: max_seconds, **metadata)
       self
     end
     def send_msg_once
       raise "No LLM selected" if @current_llm.nil?
-      @last_response = @current_llm.send_request(@messages, @model_name, @temperature)
+      @last_response = send_llm_request(@messages, nil)
       @messages = []
       @messages << { role: "system", content: @sys_msg }
       @last_response
@@ -120,24 +176,14 @@ module SmartPrompt
     def send_msg(params = {})
       Retriable.retriable(RETRY_OPTIONS) do
         raise ConfigurationError, "No LLM selected" if @current_llm.nil?
-        if @engine.config["better_prompt_db"]
-          if params[:with_history]
-            @last_call_id = BetterPrompt.add_model_call(@current_llm_name, @model_name, history_messages, false, @temperature, 0, 0.0, 0, @tools)
-          else
-            @last_call_id = BetterPrompt.add_model_call(@current_llm_name, @model_name, @messages, false, @temperature, 0, 0.0, 0, @tools)
-          end
-        end
         if params[:with_history]
-          @last_response = @current_llm.send_request(history_messages, @model_name, @temperature, @tools, nil)
+          @last_response = send_llm_request(history_messages, nil)
         else
-          @last_response = @current_llm.send_request(@messages, @model_name, @temperature, @tools, nil)
+          @last_response = send_llm_request(@messages, nil)
         end
         if @last_response == ""
           @last_response = @current_llm.last_response
         end
-        if @engine.config["better_prompt_db"]
-          BetterPrompt.add_response(@last_call_id, @last_response, false)
-        end
         @messages = []
         @messages << { role: "system", content: @sys_msg }
         @last_response
@@ -149,20 +195,10 @@ module SmartPrompt
     def send_msg_by_stream(params = {}, &proc)
       Retriable.retriable(RETRY_OPTIONS) do
         raise ConfigurationError, "No LLM selected" if @current_llm.nil?
-        if @engine.config["better_prompt_db"]
-          if params[:with_history]
-            @last_call_id = BetterPrompt.add_model_call(@current_llm_name, @model_name, history_messages, true, @temperature, 0, 0.0, 0, @tools)
-          else
-            @last_call_id = BetterPrompt.add_model_call(@current_llm_name, @model_name, @messages, true, @temperature, 0, 0.0, 0, @tools)
-          end
-        end
         if params[:with_history]
-          @current_llm.send_request(history_messages, @model_name, @temperature, @tools, proc)
+          send_llm_request(history_messages, proc)
         else
-          @current_llm.send_request(@messages, @model_name, @temperature, @tools, proc)
-        end
-        if @engine.config["better_prompt_db"]
-          BetterPrompt.add_response(@last_call_id, @engine.stream_response, true)
+          send_llm_request(@messages, proc)
         end
         @messages = []
         @messages << { role: "system", content: @sys_msg }
@@ -197,6 +233,123 @@ module SmartPrompt
       end
     end
+    private
+    def send_llm_request(messages, proc)
+      parameters = @current_llm.method(:send_request).parameters
+      if parameters.length >= 6
+        @current_llm.send_request(messages, @model_name, @temperature, @tools, proc, @request_options)
+      else
+        @current_llm.send_request(messages, @model_name, @temperature, @tools, proc)
+      end
+    end
+    def merge_model_request_options(model_config)
+      explicit_options = model_config["request_options"] || model_config[:request_options] || {}
+      @request_options.merge!(explicit_options)
+      MODEL_REQUEST_OPTION_KEYS.each do |key|
+        value = model_config[key] || model_config[key.to_sym]
+        @request_options[key.to_sym] = value unless value.nil?
+      end
+    end
+    def add_user_content(content, with_history)
+      if @pending_content_parts.empty?
+        add_message({ role: "user", content: content }, with_history)
+      else
+        add_message({ role: "user", content: multimodal_content(content) }, with_history)
+        @pending_content_parts = []
+      end
+    end
+    def multimodal_content(text)
+      parts = @pending_content_parts
+      images_and_videos = parts.select { |part| ["image_url", "image", "video_url", "video"].include?(part[:type] || part["type"]) }
+      audio_parts = parts.select { |part| ["input_audio", "audio"].include?(part[:type] || part["type"]) }
+      other_parts = parts - images_and_videos - audio_parts
+      normalize_content_parts(images_and_videos + other_parts + [{ type: "text", text: text.to_s }] + audio_parts)
+    end
+    def normalize_content_parts(parts)
+      parts.map do |part|
+        normalized = part.transform_keys(&:to_s)
+        normalized["text"] = normalized.delete("content") if normalized["type"] == "text" && normalized.key?("content")
+        normalized
+      end
+    end
+    def media_part(type, source, **metadata)
+      case type
+      when "image"
+        mime_type = detect_image_mime(source)
+        data = File.binread(source)
+        base64_data = Base64.strict_encode64(data)
+        url = "data:#{mime_type};base64,#{base64_data}"
+        part = { type: "image_url", image_url: { url: url } }
+      when "audio"
+        format = detect_audio_format(source)
+        data = File.binread(source)
+        base64_data = Base64.strict_encode64(data)
+        part = { type: "input_audio", input_audio: { data: base64_data, format: format } }
+      when "video"
+        mime_type = detect_video_mime(source)
+        data = File.binread(source)
+        base64_data = Base64.strict_encode64(data)
+        url = "data:#{mime_type};base64,#{base64_data}"
+        part = { type: "video_url", video_url: { url: url } }
+      else
+        part = { type: type }
+      end
+      metadata.each do |key, value|
+        part[key] = value unless value.nil?
+      end
+      part
+    end
+    def detect_image_mime(path)
+      ext = File.extname(path).downcase
+      case ext
+      when ".png"  then "image/png"
+      when ".jpg", ".jpeg" then "image/jpeg"
+      when ".gif"  then "image/gif"
+      when ".webp" then "image/webp"
+      when ".bmp"  then "image/bmp"
+      when ".svg"  then "image/svg+xml"
+      else "application/octet-stream"
+      end
+    end
+    def detect_audio_format(path)
+      ext = File.extname(path).downcase.delete_prefix(".")
+      %w[wav mp3 ogg flac aac m4a].include?(ext) ? ext : "wav"
+    end
+    def detect_video_mime(path)
+      ext = File.extname(path).downcase
+      case ext
+      when ".mp4"  then "video/mp4"
+      when ".webm" then "video/webm"
+      when ".mov"  then "video/quicktime"
+      when ".avi"  then "video/x-msvideo"
+      else "application/octet-stream"
+      end
+    end
+    def thinking_system_message(message)
+      message = message.to_s.sub(/\A<\|think\|>\n?/, "")
+      return message if @thinking_enabled == false
+      return message unless @thinking_enabled == true
+      "<|think|>\n#{message}"
+    end
+    def refresh_system_message(message)
+      system_message = @messages.find { |item| (item[:role] || item["role"]) == "system" }
+      system_message[:content] = message if system_message
+    end
+    public
     def generate_image(prompt, params = {})
       @current_llm.generate_image(prompt, params)
     end