RubyGems - raif - Versions diffs - 1.3.0 → 1.5.0 - Mend

raif 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

data/app/models/raif/evals/llm_judge.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
+module Raif
+  module Evals
+    class LlmJudge < Raif::Task
+      # Set default temperature for consistent judging
+      llm_temperature 0.0
+      # Default to JSON response format for structured output
+      llm_response_format :json
+      run_with :content_to_judge # the content to judge
+      run_with :additional_context # additional context to be provided to the judge
+      def default_llm_model_key
+        Raif.config.evals_default_llm_judge_model_key || super
+      end
+      def judgment_reasoning
+        parsed_response["reasoning"] if completed?
+      end
+      def judgment_confidence
+        parsed_response["confidence"] if completed?
+      end
+      def low_confidence?
+        judgment_confidence && judgment_confidence < 0.5
+      end
+    end
+  end
+end

data/{lib → app/models}/raif/evals/llm_judges/binary.rb RENAMED Viewed

@@ -1,12 +1,50 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges
       class Binary < Raif::Evals::LlmJudge
-        task_run_arg :criteria
-        task_run_arg :examples
-        task_run_arg :strict_mode
+        run_with :criteria
+        run_with :examples
+        run_with :strict_mode
         json_response_schema do
           boolean :passes, description: "Whether the content passes the criteria"

data/{lib → app/models}/raif/evals/llm_judges/comparative.rb RENAMED Viewed

@@ -1,12 +1,50 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges
       class Comparative < Raif::Evals::LlmJudge
-        task_run_arg :over_content # the content to compare against
-        task_run_arg :comparison_criteria # the criteria to use when comparing content_to_judge to over_content
-        task_run_arg :allow_ties # whether to allow ties in the comparison
+        run_with :over_content # the content to compare against
+        run_with :comparison_criteria # the criteria to use when comparing content_to_judge to over_content
+        run_with :allow_ties # whether to allow ties in the comparison
         attr_accessor :content_a, :content_b, :expected_winner

data/{lib → app/models}/raif/evals/llm_judges/scored.rb RENAMED Viewed

@@ -1,10 +1,48 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges
       class Scored < Raif::Evals::LlmJudge
-        task_run_arg :scoring_rubric # the scoring rubric to use when evaluating the content
+        run_with :scoring_rubric # the scoring rubric to use when evaluating the content
         json_response_schema do
           number :score, description: "Numerical score based on the rubric"

data/{lib → app/models}/raif/evals/llm_judges/summarization.rb RENAMED Viewed

@@ -1,11 +1,49 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges
       class Summarization < Raif::Evals::LlmJudge
-        task_run_arg :original_content # the original content to evaluate the summary against
-        task_run_arg :summary # the summary to evaluate against the original content
+        run_with :original_content # the original content to evaluate the summary against
+        run_with :summary # the summary to evaluate against the original content
         json_response_schema do
           object :coverage do

data/app/models/raif/llm.rb CHANGED Viewed

@@ -7,6 +7,7 @@ module Raif
     attr_accessor :key,
       :api_name,
+      :display_name,
       :default_temperature,
       :default_max_completion_tokens,
       :supports_native_tool_use,
@@ -25,6 +26,7 @@ module Raif
     def initialize(
       key:,
       api_name:,
+      display_name: nil,
       model_provider_settings: {},
       supported_provider_managed_tools: [],
       supports_native_tool_use: true,
@@ -35,6 +37,7 @@ module Raif
     )
       @key = key
       @api_name = api_name
+      @display_name = display_name
       @provider_settings = model_provider_settings
       @supports_native_tool_use = supports_native_tool_use
       @default_temperature = temperature || 0.7
@@ -45,11 +48,11 @@ module Raif
     end
     def name
-      I18n.t("raif.model_names.#{key}")
+      I18n.t("raif.model_names.#{key}", default: display_name || key.to_s.humanize)
     end
     def chat(message: nil, messages: nil, response_format: :text, available_model_tools: [], source: nil, system_prompt: nil, temperature: nil,
-      max_completion_tokens: nil, &block)
+      max_completion_tokens: nil, tool_choice: nil, anthropic_prompt_caching_enabled: false, bedrock_prompt_caching_enabled: false, &block)
       unless response_format.is_a?(Symbol)
         raise ArgumentError,
           "Raif::Llm#chat - Invalid response format: #{response_format}. Must be a symbol (you passed #{response_format.class}) and be one of: #{VALID_RESPONSE_FORMATS.join(", ")}" # rubocop:disable Layout/LineLength
@@ -67,6 +70,19 @@ module Raif
         raise ArgumentError, "Raif::Llm#chat - You must provide either a message: or messages: argument, not both"
       end
+      # Normalize :required / "required" to the symbol form for validation
+      tool_choice = :required if tool_choice.to_s == "required"
+      if tool_choice == :required
+        if available_model_tools.blank?
+          raise ArgumentError,
+            "Raif::Llm#chat - tool_choice: :required requires at least one available model tool"
+        end
+      elsif tool_choice.present? && !available_model_tools.map(&:to_s).include?(tool_choice.to_s)
+        raise ArgumentError,
+          "Raif::Llm#chat - Invalid tool choice: #{tool_choice} is not included in the available model tools: #{available_model_tools.join(", ")}"
+      end
       unless Raif.config.llm_api_requests_enabled
         Raif.logger.warn("LLM API requests are disabled. Skipping request to #{api_name}.")
         return
@@ -87,20 +103,33 @@ module Raif
         temperature: temperature,
         max_completion_tokens: max_completion_tokens,
         available_model_tools: available_model_tools,
+        tool_choice: tool_choice&.to_s,
         stream_response: block_given?
       )
+      model_completion.anthropic_prompt_caching_enabled = anthropic_prompt_caching_enabled
+      model_completion.bedrock_prompt_caching_enabled = bedrock_prompt_caching_enabled
+      model_completion.started!
       retry_with_backoff(model_completion) do
         perform_model_completion!(model_completion, &block)
+        ensure_model_completion_present!(model_completion)
       end
+      model_completion.completed!
       model_completion
     rescue Raif::Errors::StreamingError => e
       Rails.logger.error("Raif streaming error -- code: #{e.code} -- type: #{e.type} -- message: #{e.message} -- event: #{e.event}")
+      model_completion&.record_failure!(e) unless model_completion&.failed?
       raise e
     rescue Faraday::Error => e
       Raif.logger.error("LLM API request failed (status: #{e.response_status}): #{e.message}")
       Raif.logger.error(e.response_body)
+      model_completion&.record_failure!(e) unless model_completion&.failed?
+      raise e
+    rescue StandardError => e
+      model_completion&.record_failure!(e) unless model_completion&.failed?
       raise e
     end
@@ -112,10 +141,52 @@ module Raif
       VALID_RESPONSE_FORMATS
     end
+    # Override in subclasses to indicate whether prompt_tokens reported by the
+    # provider already include cached tokens as a subset (OpenAI, Google,
+    # OpenRouter) or whether cached tokens are reported separately and are
+    # additive to prompt_tokens (Anthropic, Bedrock).
+    def self.prompt_tokens_include_cached_tokens?
+      true
+    end
+    # Multiplier applied to the base input_token_cost to derive the per-token
+    # cost for cache reads.  Return nil when the provider has no cache pricing.
+    def self.cache_read_input_token_cost_multiplier
+      nil
+    end
+    # Multiplier applied to the base input_token_cost to derive the per-token
+    # cost for cache creation writes.  Return nil when there is no write surcharge.
+    def self.cache_creation_input_token_cost_multiplier
+      nil
+    end
     def supports_provider_managed_tool?(tool_klass)
       supported_provider_managed_tools&.include?(tool_klass.to_s)
     end
+    # Build the tool_choice parameter to force a specific tool to be called.
+    # Each provider implements this to return the correct format.
+    # @param tool_name [String] The name of the tool to force
+    # @return [Hash] The tool_choice parameter for the provider's API
+    def build_forced_tool_choice(tool_name)
+      raise NotImplementedError, "#{self.class.name} must implement #build_forced_tool_choice"
+    end
+    # Build the tool_choice parameter to require the model to call any tool (but not a specific one).
+    # Each provider implements this to return the correct format.
+    # @return [Hash, String] The tool_choice parameter for the provider's API
+    def build_required_tool_choice
+      raise NotImplementedError, "#{self.class.name} must implement #build_required_tool_choice"
+    end
+    # Whether the provider can faithfully enforce tool_choice: :required for
+    # the given tool set. Override in subclasses when a provider can only
+    # enforce required tool use for some tool types.
+    def supports_faithful_required_tool_choice?(available_model_tools)
+      available_model_tools.present?
+    end
     def validate_provider_managed_tool_support!(tool)
       unless supports_provider_managed_tool?(tool)
         raise Raif::Errors::UnsupportedFeatureError,
@@ -125,6 +196,10 @@ module Raif
   private
+    def retriable_exceptions
+      Raif.config.llm_request_retriable_exceptions
+    end
     def retry_with_backoff(model_completion)
       retries = 0
       max_retries = Raif.config.llm_request_max_retries
@@ -133,11 +208,11 @@ module Raif
       begin
         yield
-      rescue *Raif.config.llm_request_retriable_exceptions => e
+      rescue *retriable_exceptions => e
         retries += 1
         if retries <= max_retries
           delay = [base_delay * (2**(retries - 1)), max_delay].min
-          Raif.logger.warn("Retrying LLM API request after error: #{e.message}. Attempt #{retries}/#{max_retries}. Waiting #{delay} seconds...")
+          log_retry(e, model_completion, retries, max_retries, delay)
           model_completion.increment!(:retry_count)
           sleep delay
           retry
@@ -148,10 +223,35 @@ module Raif
       end
     end
+    def log_retry(error, model_completion, attempt, max_retries, delay)
+      if error.is_a?(Raif::Errors::BlankResponseError)
+        has_reasoning = model_completion.response_array&.any? do |block|
+          block.is_a?(Hash) ? block.key?("reasoning_content") : block.respond_to?(:reasoning_content)
+        end
+        Raif.logger.warn(
+          "Blank response retry #{attempt}/#{max_retries} for #{api_name} " \
+            "(ModelCompletion##{model_completion.id}, source: #{model_completion.source_type}##{model_completion.source_id}, " \
+            "completion_tokens: #{model_completion.completion_tokens}, reasoning_content_present: #{has_reasoning}). " \
+            "Waiting #{delay} seconds..."
+        )
+      else
+        Raif.logger.warn("Retrying LLM API request after error: #{error.message}. Attempt #{attempt}/#{max_retries}. Waiting #{delay} seconds...")
+      end
+    end
     def streaming_response_type
       raise NotImplementedError, "#{self.class.name} must implement #streaming_response_type"
     end
+    def ensure_model_completion_present!(model_completion)
+      # response_array/raw provider data may still be present for debugging even when
+      # the normalized response has no text or tool calls.
+      return if model_completion.raw_response.present? || model_completion.response_tool_calls.present?
+      raise Raif::Errors::BlankResponseError,
+        "Model completion #{model_completion.id} returned no text response and no tool calls"
+    end
     def streaming_chunk_handler(model_completion, &block)
       return unless model_completion.stream_response?

data/app/models/raif/llms/anthropic.rb CHANGED Viewed

@@ -3,6 +3,19 @@
 class Raif::Llms::Anthropic < Raif::Llm
   include Raif::Concerns::Llms::Anthropic::MessageFormatting
   include Raif::Concerns::Llms::Anthropic::ToolFormatting
+  include Raif::Concerns::Llms::Anthropic::ResponseToolCalls
+  def self.prompt_tokens_include_cached_tokens?
+    false
+  end
+  def self.cache_read_input_token_cost_multiplier
+    0.1
+  end
+  def self.cache_creation_input_token_cost_multiplier
+    1.25
+  end
   def perform_model_completion!(model_completion, &block)
     params = build_request_parameters(model_completion)
@@ -21,7 +34,7 @@ class Raif::Llms::Anthropic < Raif::Llm
 private
   def connection
-    @connection ||= Faraday.new(url: "https://api.anthropic.com/v1") do |f|
+    @connection ||= Faraday.new(url: "https://api.anthropic.com/v1", request: Raif.default_request_options) do |f|
       f.headers["x-api-key"] = Raif.config.anthropic_api_key
       f.headers["anthropic-version"] = "2023-06-01"
       f.request :json
@@ -48,22 +61,33 @@ private
     model_completion.completion_tokens = response_json&.dig("usage", "output_tokens")
     model_completion.prompt_tokens = response_json&.dig("usage", "input_tokens")
     model_completion.total_tokens = model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i
+    model_completion.cache_read_input_tokens = response_json&.dig("usage", "cache_read_input_tokens")
+    model_completion.cache_creation_input_tokens = response_json&.dig("usage", "cache_creation_input_tokens")
     model_completion.save!
   end
   def build_request_parameters(model_completion)
     params = {
       model: model_completion.model_api_name,
-      messages: model_completion.messages,
-      temperature: (model_completion.temperature || default_temperature).to_f,
-      max_tokens: model_completion.max_completion_tokens || default_max_completion_tokens
+      messages: model_completion.messages
     }
+    params[:temperature] = (model_completion.temperature || default_temperature).to_f if supports_temperature?
+    params[:max_tokens] = model_completion.max_completion_tokens || default_max_completion_tokens
     params[:system] = model_completion.system_prompt if model_completion.system_prompt.present?
+    params[:cache_control] = { type: "ephemeral" } if model_completion.anthropic_prompt_caching_enabled
     if supports_native_tool_use?
       tools = build_tools_parameter(model_completion)
       params[:tools] = tools unless tools.blank?
+      if model_completion.tool_choice == "required"
+        params[:tool_choice] = build_required_tool_choice
+      elsif model_completion.tool_choice.present?
+        tool_klass = model_completion.tool_choice.constantize
+        params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
+      end
     end
     params[:stream] = true if model_completion.stream_response?
@@ -71,6 +95,10 @@ private
     params
   end
+  def supports_temperature?
+    provider_settings.key?(:supports_temperature) ? provider_settings[:supports_temperature] : true
+  end
   def extract_text_response(resp)
     return if resp&.dig("content").blank?
@@ -92,24 +120,6 @@ private
     end
   end
-  def extract_response_tool_calls(resp)
-    return if resp&.dig("content").nil?
-    # Find any tool_use content blocks
-    tool_uses = resp&.dig("content")&.select do |content|
-      content["type"] == "tool_use"
-    end
-    return if tool_uses.blank?
-    tool_uses.map do |tool_use|
-      {
-        "name" => tool_use["name"],
-        "arguments" => tool_use["input"]
-      }
-    end
-  end
   def extract_citations(resp)
     return [] if resp&.dig("content").nil?