RubyGems - raif - Versions diffs - 1.3.0 → 1.5.0 - Mend

raif 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

data/app/models/raif/model_tool.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 class Raif::ModelTool
   include Raif::Concerns::JsonSchemaDefinition
-  delegate :tool_name, :tool_description, :tool_arguments_schema, :example_model_invocation, to: :class
+  delegate :tool_name, :tool_description, :example_model_invocation, to: :class
   class << self
     # The description of the tool that will be provided to the model
@@ -53,9 +53,9 @@ class Raif::ModelTool
       name.gsub("Raif::ModelTools::", "").underscore
     end
-    def tool_arguments_schema(&block)
+    def tool_arguments_schema(dynamic: false, &block)
       if block_given?
-        json_schema_definition(:tool_arguments, &block)
+        json_schema_definition(:tool_arguments, dynamic: dynamic, &block)
       elsif schema_defined?(:tool_arguments)
         schema_for(:tool_arguments)
       else
@@ -76,11 +76,14 @@ class Raif::ModelTool
       false
     end
-    def invoke_tool(tool_arguments:, source:)
+    def invoke_tool(provider_tool_call_id:, tool_arguments:, source:)
+      prepared_arguments = prepare_tool_arguments(tool_arguments)
       tool_invocation = Raif::ModelToolInvocation.new(
+        provider_tool_call_id: provider_tool_call_id,
         source: source,
         tool_type: name,
-        tool_arguments: tool_arguments
+        tool_arguments: prepared_arguments
       )
       ActiveRecord::Base.transaction do
@@ -94,6 +97,53 @@ class Raif::ModelTool
       tool_invocation.failed!
       raise e
     end
+    # Prepares tool arguments before validation and invocation. Override in subclasses
+    # to add tool-specific argument processing (e.g. type coercion, default injection).
+    # The base implementation strips keys not declared in the tool's argument schema,
+    # which handles LLMs that hallucinate extra parameters.
+    #
+    # @param arguments [Hash] The raw tool arguments from the LLM response
+    # @return [Hash] The prepared arguments ready for validation and processing
+    def prepare_tool_arguments(arguments)
+      strip_unknown_tool_arguments(arguments)
+    end
+  private
+    # Removes keys from the arguments hash that are not declared in the tool's
+    # argument schema. Logs a warning when keys are stripped so hallucination
+    # patterns can be monitored. Normalizes all keys to strings for consistent
+    # comparison since the schema builder uses symbol keys and LLM responses
+    # use string keys.
+    #
+    # @param arguments [Hash] The raw tool arguments
+    # @return [Hash] The arguments with only schema-declared keys
+    def strip_unknown_tool_arguments(arguments)
+      return arguments unless arguments.is_a?(Hash)
+      schema_properties = tool_arguments_schema[:properties] || tool_arguments_schema["properties"]
+      return arguments if schema_properties.blank?
+      normalized_arguments = arguments.deep_stringify_keys
+      allowed_keys = schema_properties.keys.map(&:to_s)
+      dropped_keys = normalized_arguments.keys - allowed_keys
+      if dropped_keys.any?
+        Rails.logger.warn(
+          "[Raif::ModelTool] Stripped unexpected tool arguments for #{name}: #{dropped_keys.join(", ")}"
+        )
+      end
+      normalized_arguments.slice(*allowed_keys)
+    end
+  end
+  # Instance method to get the tool arguments schema
+  # For instance-dependent schemas, builds the schema with this instance as context
+  # For class-level schemas, returns the class-level schema
+  def tool_arguments_schema
+    schema_for_instance(:tool_arguments)
   end
 end

data/app/models/raif/model_tool_invocation.rb CHANGED Viewed

@@ -1,5 +1,25 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_model_tool_invocations
+#
+#  id                    :bigint           not null, primary key
+#  completed_at          :datetime
+#  failed_at             :datetime
+#  result                :jsonb            not null
+#  source_type           :string           not null
+#  tool_arguments        :jsonb            not null
+#  tool_type             :string           not null
+#  created_at            :datetime         not null
+#  updated_at            :datetime         not null
+#  provider_tool_call_id :string
+#  source_id             :bigint           not null
+#
+# Indexes
+#
+#  index_raif_model_tool_invocations_on_source  (source_type,source_id)
+#
 class Raif::ModelToolInvocation < Raif::ApplicationRecord
   belongs_to :source, polymorphic: true
@@ -22,24 +42,66 @@ class Raif::ModelToolInvocation < Raif::ApplicationRecord
     @tool ||= tool_type.constantize
   end
-  def as_llm_message
-    "Invoking tool: #{tool_name} with arguments: #{tool_arguments.to_json}"
+  # Returns tool call in the format expected by LLM message formatting
+  # @param assistant_message [String, nil] Optional assistant message accompanying the tool call
+  # @return [Hash] Hash representation for JSONB storage and LLM APIs
+  def as_tool_call_message(assistant_message: nil)
+    Raif::Messages::ToolCall.new(
+      provider_tool_call_id: provider_tool_call_id,
+      name: tool_name,
+      arguments: tool_arguments,
+      assistant_message: assistant_message
+    ).to_h
   end
-  def result_llm_message
-    return unless tool.respond_to?(:observation_for_invocation)
-    tool.observation_for_invocation(self)
+  # Returns tool result in the format expected by LLM message formatting
+  # @return [Hash] Hash representation for JSONB storage and LLM APIs
+  def as_tool_call_result_message(result: self.result)
+    Raif::Messages::ToolCallResult.new(
+      provider_tool_call_id: provider_tool_call_id,
+      name: tool_name,
+      result: result
+    ).to_h
   end
   def to_partial_path
     "raif/model_tool_invocations/#{tool.invocation_partial_name}"
   end
+  def admin_observation
+    admin_observation_result[:observation]
+  end
+  def admin_observation_error
+    admin_observation_result[:error]
+  end
+  def admin_observation_available?
+    admin_observation.present? || admin_observation_error.present?
+  end
   def ensure_valid_tool_argument_schema
     unless JSON::Validator.validate(tool_arguments_schema, tool_arguments)
       errors.add(:tool_arguments, "does not match schema")
     end
   end
+private
+  # Best-effort reconstruction of the observation shown in admin. This uses the
+  # current formatter code against persisted invocation data, so failures are
+  # captured for display instead of breaking the page render.
+  def admin_observation_result
+    @admin_observation_result ||= if completed? && triggers_observation_to_model?
+      begin
+        observation = tool.observation_for_invocation(self)
+        { observation: observation.presence, error: nil }
+      rescue StandardError => e
+        { observation: nil, error: e.message }
+      end
+    else
+      { observation: nil, error: nil }
+    end
+  end
 end

data/app/models/raif/model_tools/agent_final_answer.rb CHANGED Viewed

@@ -20,16 +20,11 @@ class Raif::ModelTools::AgentFinalAnswer < Raif::ModelTool
     def observation_for_invocation(tool_invocation)
       return "No answer provided" unless tool_invocation.result.present?
-      tool_invocation.result["final_answer"]
+      tool_invocation.result
     end
     def process_invocation(tool_invocation)
-      tool_invocation.update!(
-        result: {
-          final_answer: tool_invocation.tool_arguments["final_answer"]
-        }
-      )
+      tool_invocation.update!(result: tool_invocation.tool_arguments["final_answer"])
       tool_invocation.result
     end
   end

data/app/models/raif/model_tools/provider_managed/code_execution.rb CHANGED Viewed

@@ -2,4 +2,8 @@
 class Raif::ModelTools::ProviderManaged::CodeExecution < Raif::ModelTools::ProviderManaged::Base
+  tool_description do
+    "Utilizes the model provider's built-in code execution capabilities."
+  end
 end

data/app/models/raif/model_tools/provider_managed/image_generation.rb CHANGED Viewed

@@ -2,4 +2,8 @@
 class Raif::ModelTools::ProviderManaged::ImageGeneration < Raif::ModelTools::ProviderManaged::Base
+  tool_description do
+    "Utilizes the model provider's built-in image generation capabilities."
+  end
 end

data/app/models/raif/model_tools/provider_managed/web_search.rb CHANGED Viewed

@@ -2,4 +2,8 @@
 class Raif::ModelTools::ProviderManaged::WebSearch < Raif::ModelTools::ProviderManaged::Base
+  tool_description do
+    "Utilizes the model provider's built-in web search capabilities."
+  end
 end

data/app/models/raif/prompt_studio_batch_run.rb ADDED Viewed

@@ -0,0 +1,155 @@
+# frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_prompt_studio_batch_runs
+#
+#  id                  :bigint           not null, primary key
+#  completed_at        :datetime
+#  completed_count     :integer          default(0), not null
+#  failed_at           :datetime
+#  failed_count        :integer          default(0), not null
+#  judge_config        :jsonb            not null
+#  judge_llm_model_key :string
+#  judge_type          :string
+#  llm_model_key       :string           not null
+#  started_at          :datetime
+#  task_type           :string           not null
+#  total_count         :integer          default(0), not null
+#  created_at          :datetime         not null
+#  updated_at          :datetime         not null
+#
+module Raif
+  class PromptStudioBatchRun < Raif::ApplicationRecord
+    ALLOWED_JUDGE_TYPES = [
+      "Raif::Evals::LlmJudges::Binary",
+      "Raif::Evals::LlmJudges::Scored",
+      "Raif::Evals::LlmJudges::Comparative",
+      "Raif::Evals::LlmJudges::Summarization"
+    ].freeze
+    after_initialize -> { self.judge_config ||= {} }
+    has_many :items,
+      class_name: "Raif::PromptStudioBatchRunItem",
+      foreign_key: :batch_run_id,
+      dependent: :destroy,
+      inverse_of: :batch_run
+    boolean_timestamp :started_at
+    boolean_timestamp :completed_at
+    boolean_timestamp :failed_at
+    validates :task_type, presence: true
+    validates :llm_model_key, presence: true
+    validates :judge_type, inclusion: { in: ALLOWED_JUDGE_TYPES }, allow_nil: true
+    def status
+      if completed_at?
+        :completed
+      elsif failed_at?
+        :failed
+      elsif started_at?
+        :in_progress
+      else
+        :pending
+      end
+    end
+    def progress_percentage
+      return 0 if total_count.zero?
+      ((completed_count + failed_count).to_f / total_count * 100).round
+    end
+    def has_judge?
+      judge_type.present?
+    end
+    def judge_class
+      judge_type&.safe_constantize
+    end
+    def judge_pass_rate
+      judge_tasks = completed_judge_tasks
+      return if judge_tasks.empty?
+      pass_count = judge_tasks.count(&:passes?)
+      percentage = ((pass_count.to_f / judge_tasks.size) * 100).round
+      "#{percentage}% (#{pass_count}/#{judge_tasks.size})"
+    end
+    def judge_average_score
+      scores = completed_judge_tasks.filter_map(&:judgment_score)
+      return if scores.empty?
+      (scores.sum.to_f / scores.size).round(1)
+    end
+    def judge_comparative_summary
+      completed_items = items.where.not(judge_task_id: nil).includes(:judge_task)
+      return if completed_items.empty?
+      new_wins = 0
+      original_wins = 0
+      ties = 0
+      completed_items.each do |item|
+        next unless item.judge_task&.completed?
+        parsed = item.judge_task.parsed_response
+        next unless parsed.is_a?(Hash)
+        winner = parsed["winner"]
+        if winner == "tie"
+          ties += 1
+        elsif winner == item.metadata&.dig("new_response_letter")
+          new_wins += 1
+        else
+          original_wins += 1
+        end
+      end
+      total = new_wins + original_wins + ties
+      return if total.zero?
+      {
+        new_wins: new_wins,
+        original_wins: original_wins,
+        ties: ties,
+        total: total,
+        new_win_pct: ((new_wins.to_f / total) * 100).round,
+        original_win_pct: ((original_wins.to_f / total) * 100).round,
+        tie_pct: ((ties.to_f / total) * 100).round
+      }
+    end
+  private
+    def completed_judge_tasks
+      Raif::Task.where(
+        id: items.where.not(judge_task_id: nil).select(:judge_task_id)
+      ).where.not(completed_at: nil)
+    end
+  public
+    def check_completion!
+      reload
+      remaining = items.where(status: %w[pending running judging]).count
+      self.completed_count = items.where(status: "completed").count
+      self.failed_count = items.where(status: "failed").count
+      if remaining.zero?
+        if failed_count > 0 && completed_count == 0
+          self.failed_at = Time.current
+        else
+          self.completed_at = Time.current
+        end
+      end
+      save!
+    end
+  end
+end

data/app/models/raif/prompt_studio_batch_run_item.rb ADDED Viewed

@@ -0,0 +1,220 @@
+# frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_prompt_studio_batch_run_items
+#
+#  id             :bigint           not null, primary key
+#  metadata       :jsonb
+#  status         :string           default("pending"), not null
+#  created_at     :datetime         not null
+#  updated_at     :datetime         not null
+#  batch_run_id   :bigint           not null
+#  judge_task_id  :bigint
+#  result_task_id :bigint
+#  source_task_id :bigint           not null
+#
+# Indexes
+#
+#  index_raif_prompt_studio_batch_run_items_on_batch_run_id    (batch_run_id)
+#  index_raif_prompt_studio_batch_run_items_on_judge_task_id   (judge_task_id)
+#  index_raif_prompt_studio_batch_run_items_on_result_task_id  (result_task_id)
+#  index_raif_prompt_studio_batch_run_items_on_source_task_id  (source_task_id)
+#  index_raif_prompt_studio_batch_run_items_on_status          (status)
+#
+# Foreign Keys
+#
+#  fk_rails_...  (batch_run_id => raif_prompt_studio_batch_runs.id)
+#  fk_rails_...  (judge_task_id => raif_tasks.id)
+#  fk_rails_...  (result_task_id => raif_tasks.id)
+#  fk_rails_...  (source_task_id => raif_tasks.id)
+#
+module Raif
+  class PromptStudioBatchRunItem < Raif::ApplicationRecord
+    include ActionView::RecordIdentifier
+    STATUSES = %w[pending running judging completed failed].freeze
+    after_initialize -> { self.metadata ||= {} }
+    belongs_to :batch_run,
+      class_name: "Raif::PromptStudioBatchRun",
+      inverse_of: :items
+    belongs_to :source_task,
+      class_name: "Raif::Task"
+    belongs_to :result_task,
+      class_name: "Raif::Task",
+      optional: true
+    belongs_to :judge_task,
+      class_name: "Raif::Task",
+      optional: true
+    validates :status, inclusion: { in: STATUSES }
+    def execute!
+      update!(status: "running")
+      broadcast_item
+      new_task = create_and_run_task
+      run_judge_if_configured(new_task)
+      update!(status: "completed")
+    rescue StandardError => e
+      Rails.logger.error "Error running batch run item ##{id}: #{e.message}"
+      Rails.logger.error e.backtrace&.join("\n")
+      update!(status: "failed")
+    ensure
+      broadcast_item
+      batch_run.check_completion!
+      broadcast_progress
+    end
+    def judge_summary
+      return unless judge_task&.completed?
+      parsed = judge_task.parsed_response
+      return unless parsed.is_a?(Hash)
+      case batch_run.judge_type
+      when "Raif::Evals::LlmJudges::Binary"
+        parsed["passes"] ? "PASS" : "FAIL"
+      when "Raif::Evals::LlmJudges::Scored"
+        "Score: #{parsed["score"]}"
+      when "Raif::Evals::LlmJudges::Comparative"
+        if parsed["winner"] == "tie"
+          I18n.t("raif.admin.prompt_studio.batch_runs.judge.tie")
+        else
+          winner_label = comparative_winner_label(parsed["winner"])
+          I18n.t("raif.admin.prompt_studio.batch_runs.judge.winner", name: winner_label)
+        end
+      when "Raif::Evals::LlmJudges::Summarization"
+        "Overall: #{parsed.dig("overall", "score")}/5"
+      end
+    end
+    def judge_reasoning
+      return unless judge_task&.completed?
+      parsed = judge_task.parsed_response
+      return unless parsed.is_a?(Hash)
+      parsed["reasoning"]
+    end
+    def comparative_winner_label(winner_letter)
+      new_response_letter = metadata&.dig("new_response_letter")
+      return winner_letter unless new_response_letter
+      if winner_letter == new_response_letter
+        I18n.t("raif.admin.prompt_studio.batch_runs.judge.new_response")
+      else
+        I18n.t("raif.admin.prompt_studio.batch_runs.judge.original_response")
+      end
+    end
+  private
+    def create_and_run_task
+      new_task = source_task.class.new(
+        creator: source_task.creator,
+        source: source_task,
+        llm_model_key: batch_run.llm_model_key,
+        available_model_tools: source_task.available_model_tools,
+        run_with: source_task.run_with,
+        prompt_studio_run: true,
+        started_at: Time.current
+      )
+      new_task.assign_attributes(source_task.prompt_studio_task_attributes)
+      new_task.save!
+      update!(result_task_id: new_task.id)
+      new_task.run
+      new_task
+    end
+    def run_judge_if_configured(new_task)
+      return unless batch_run.has_judge? && new_task.completed?
+      update!(status: "judging")
+      broadcast_item
+      judge_result = invoke_judge(new_task)
+      update!(judge_task_id: judge_result.id)
+    end
+    def invoke_judge(new_task)
+      judge_class = batch_run.judge_class
+      config = batch_run.judge_config
+      judge_args = {
+        creator: source_task.creator,
+        prompt_studio_run: true,
+        llm_model_key: batch_run.judge_llm_model_key
+      }
+      judge_args.merge!(source_task.prompt_studio_task_attributes)
+      if config["include_original_prompt_as_context"]
+        judge_args[:additional_context] =
+          "The content being evaluated was generated in response to the following prompt:\n\n#{source_task.prompt}"
+      end
+      case batch_run.judge_type
+      when "Raif::Evals::LlmJudges::Binary"
+        judge_class.run(
+          content_to_judge: new_task.raw_response,
+          criteria: config["criteria"],
+          strict_mode: config["strict_mode"],
+          **judge_args
+        )
+      when "Raif::Evals::LlmJudges::Scored"
+        rubric = Raif::Evals::ScoringRubric.send(config["scoring_rubric"])
+        judge_class.run(
+          content_to_judge: new_task.raw_response,
+          scoring_rubric: rubric,
+          **judge_args
+        )
+      when "Raif::Evals::LlmJudges::Comparative"
+        result = judge_class.run(
+          content_to_judge: new_task.raw_response,
+          over_content: source_task.raw_response,
+          comparison_criteria: config["comparison_criteria"],
+          **judge_args
+        )
+        # Store which letter was assigned to the new response so we can display
+        # "Winner: New Response" / "Winner: Original Response" instead of "A"/"B"
+        update!(metadata: metadata.merge("new_response_letter" => result.expected_winner))
+        result
+      when "Raif::Evals::LlmJudges::Summarization"
+        judge_class.run(
+          original_content: source_task.prompt,
+          summary: new_task.raw_response,
+          **judge_args
+        )
+      end
+    end
+    def broadcast_item
+      Turbo::StreamsChannel.broadcast_replace_to(
+        batch_run,
+        target: dom_id(self),
+        partial: "raif/admin/prompt_studio/batch_runs/batch_run_item",
+        locals: { item: self }
+      )
+    end
+    def broadcast_progress
+      batch_run.reload
+      Turbo::StreamsChannel.broadcast_replace_to(
+        batch_run,
+        target: dom_id(batch_run, :progress),
+        partial: "raif/admin/prompt_studio/batch_runs/progress",
+        locals: { batch_run: batch_run }
+      )
+    end
+  end
+end

data/app/models/raif/streaming_responses/bedrock.rb CHANGED Viewed

@@ -3,6 +3,8 @@
 class Raif::StreamingResponses::Bedrock
   def initialize_new_message
+    @reasoning_content_blocks = {}
     # Initialize empty AWS response object
     @message = Aws::BedrockRuntime::Types::Message.new(
       role: "assistant",
@@ -62,9 +64,12 @@ class Raif::StreamingResponses::Bedrock
         )
         @message.content[index].tool_use.input += event.delta.tool_use.input
+      elsif event.delta.is_a?(Aws::BedrockRuntime::Types::ContentBlockDelta::ReasoningContent)
+        accumulate_reasoning_content(index, event.delta.reasoning_content)
       end
     when :content_block_stop
-      content_block = @message.content[event.content_block_index]
+      index = event.content_block_index
+      content_block = @message.content[index]
       if content_block&.tool_use&.input.is_a?(String)
         begin
@@ -73,6 +78,8 @@ class Raif::StreamingResponses::Bedrock
           # If parsing fails, leave as a string
         end
       end
+      finalize_reasoning_content(index)
     when :message_stop
       @response.stop_reason = event.stop_reason
     when :metadata
@@ -86,4 +93,56 @@ class Raif::StreamingResponses::Bedrock
     @response
   end
+private
+  def accumulate_reasoning_content(index, reasoning_delta)
+    reasoning_content = reasoning_content_for(index)
+    reasoning_content[:seen] = true
+    case reasoning_delta
+    when Aws::BedrockRuntime::Types::ReasoningContentBlockDelta::Text
+      reasoning_content[:text] << reasoning_delta.text.to_s
+    when Aws::BedrockRuntime::Types::ReasoningContentBlockDelta::Signature
+      reasoning_content[:signature] = reasoning_delta.signature
+    when Aws::BedrockRuntime::Types::ReasoningContentBlockDelta::RedactedContent
+      reasoning_content[:redacted_content] << reasoning_delta.redacted_content.to_s
+    else
+      reasoning_content[:unknown] = true
+    end
+  end
+  def finalize_reasoning_content(index)
+    reasoning_content = @reasoning_content_blocks.delete(index)
+    return unless reasoning_content&.dig(:seen)
+    @message.content[index] = Aws::BedrockRuntime::Types::ContentBlock::ReasoningContent.new(
+      reasoning_content: build_reasoning_content(reasoning_content)
+    )
+  end
+  def build_reasoning_content(reasoning_content)
+    if reasoning_content[:text].blank? && reasoning_content[:signature].blank? && reasoning_content[:redacted_content].present?
+      return Aws::BedrockRuntime::Types::ReasoningContentBlock::RedactedContent.new(
+        redacted_content: reasoning_content[:redacted_content]
+      )
+    end
+    Aws::BedrockRuntime::Types::ReasoningContentBlock::ReasoningText.new(
+      reasoning_text: Aws::BedrockRuntime::Types::ReasoningTextBlock.new(
+        text: reasoning_content[:text],
+        signature: reasoning_content[:signature]
+      )
+    )
+  end
+  def reasoning_content_for(index)
+    @reasoning_content_blocks[index] ||= {
+      seen: false,
+      text: +"",
+      signature: nil,
+      redacted_content: +"",
+      unknown: false
+    }
+  end
 end