RubyGems - raif - Versions diffs - 1.4.0 → 1.5.0 - Mend

raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

data/{lib → app/models}/raif/evals/llm_judges/binary.rb RENAMED Viewed

@@ -1,5 +1,43 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges

data/{lib → app/models}/raif/evals/llm_judges/comparative.rb RENAMED Viewed

@@ -1,5 +1,43 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges

data/{lib → app/models}/raif/evals/llm_judges/scored.rb RENAMED Viewed

@@ -1,5 +1,43 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges

data/{lib → app/models}/raif/evals/llm_judges/summarization.rb RENAMED Viewed

@@ -1,5 +1,43 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
 module Raif
   module Evals
     module LlmJudges

data/app/models/raif/llm.rb CHANGED Viewed

@@ -52,7 +52,7 @@ module Raif
     end
     def chat(message: nil, messages: nil, response_format: :text, available_model_tools: [], source: nil, system_prompt: nil, temperature: nil,
-      max_completion_tokens: nil, tool_choice: nil, &block)
+      max_completion_tokens: nil, tool_choice: nil, anthropic_prompt_caching_enabled: false, bedrock_prompt_caching_enabled: false, &block)
       unless response_format.is_a?(Symbol)
         raise ArgumentError,
           "Raif::Llm#chat - Invalid response format: #{response_format}. Must be a symbol (you passed #{response_format.class}) and be one of: #{VALID_RESPONSE_FORMATS.join(", ")}" # rubocop:disable Layout/LineLength
@@ -70,7 +70,15 @@ module Raif
         raise ArgumentError, "Raif::Llm#chat - You must provide either a message: or messages: argument, not both"
       end
-      if tool_choice.present? && !available_model_tools.map(&:to_s).include?(tool_choice.to_s)
+      # Normalize :required / "required" to the symbol form for validation
+      tool_choice = :required if tool_choice.to_s == "required"
+      if tool_choice == :required
+        if available_model_tools.blank?
+          raise ArgumentError,
+            "Raif::Llm#chat - tool_choice: :required requires at least one available model tool"
+        end
+      elsif tool_choice.present? && !available_model_tools.map(&:to_s).include?(tool_choice.to_s)
         raise ArgumentError,
           "Raif::Llm#chat - Invalid tool choice: #{tool_choice} is not included in the available model tools: #{available_model_tools.join(", ")}"
       end
@@ -99,25 +107,29 @@ module Raif
         stream_response: block_given?
       )
+      model_completion.anthropic_prompt_caching_enabled = anthropic_prompt_caching_enabled
+      model_completion.bedrock_prompt_caching_enabled = bedrock_prompt_caching_enabled
       model_completion.started!
       retry_with_backoff(model_completion) do
         perform_model_completion!(model_completion, &block)
+        ensure_model_completion_present!(model_completion)
       end
       model_completion.completed!
       model_completion
     rescue Raif::Errors::StreamingError => e
       Rails.logger.error("Raif streaming error -- code: #{e.code} -- type: #{e.type} -- message: #{e.message} -- event: #{e.event}")
-      model_completion&.record_failure!(e)
+      model_completion&.record_failure!(e) unless model_completion&.failed?
       raise e
     rescue Faraday::Error => e
       Raif.logger.error("LLM API request failed (status: #{e.response_status}): #{e.message}")
       Raif.logger.error(e.response_body)
-      model_completion&.record_failure!(e)
+      model_completion&.record_failure!(e) unless model_completion&.failed?
       raise e
     rescue StandardError => e
-      model_completion&.record_failure!(e)
+      model_completion&.record_failure!(e) unless model_completion&.failed?
       raise e
     end
@@ -129,6 +141,26 @@ module Raif
       VALID_RESPONSE_FORMATS
     end
+    # Override in subclasses to indicate whether prompt_tokens reported by the
+    # provider already include cached tokens as a subset (OpenAI, Google,
+    # OpenRouter) or whether cached tokens are reported separately and are
+    # additive to prompt_tokens (Anthropic, Bedrock).
+    def self.prompt_tokens_include_cached_tokens?
+      true
+    end
+    # Multiplier applied to the base input_token_cost to derive the per-token
+    # cost for cache reads.  Return nil when the provider has no cache pricing.
+    def self.cache_read_input_token_cost_multiplier
+      nil
+    end
+    # Multiplier applied to the base input_token_cost to derive the per-token
+    # cost for cache creation writes.  Return nil when there is no write surcharge.
+    def self.cache_creation_input_token_cost_multiplier
+      nil
+    end
     def supports_provider_managed_tool?(tool_klass)
       supported_provider_managed_tools&.include?(tool_klass.to_s)
     end
@@ -141,6 +173,20 @@ module Raif
       raise NotImplementedError, "#{self.class.name} must implement #build_forced_tool_choice"
     end
+    # Build the tool_choice parameter to require the model to call any tool (but not a specific one).
+    # Each provider implements this to return the correct format.
+    # @return [Hash, String] The tool_choice parameter for the provider's API
+    def build_required_tool_choice
+      raise NotImplementedError, "#{self.class.name} must implement #build_required_tool_choice"
+    end
+    # Whether the provider can faithfully enforce tool_choice: :required for
+    # the given tool set. Override in subclasses when a provider can only
+    # enforce required tool use for some tool types.
+    def supports_faithful_required_tool_choice?(available_model_tools)
+      available_model_tools.present?
+    end
     def validate_provider_managed_tool_support!(tool)
       unless supports_provider_managed_tool?(tool)
         raise Raif::Errors::UnsupportedFeatureError,
@@ -150,6 +196,10 @@ module Raif
   private
+    def retriable_exceptions
+      Raif.config.llm_request_retriable_exceptions
+    end
     def retry_with_backoff(model_completion)
       retries = 0
       max_retries = Raif.config.llm_request_max_retries
@@ -158,11 +208,11 @@ module Raif
       begin
         yield
-      rescue *Raif.config.llm_request_retriable_exceptions => e
+      rescue *retriable_exceptions => e
         retries += 1
         if retries <= max_retries
           delay = [base_delay * (2**(retries - 1)), max_delay].min
-          Raif.logger.warn("Retrying LLM API request after error: #{e.message}. Attempt #{retries}/#{max_retries}. Waiting #{delay} seconds...")
+          log_retry(e, model_completion, retries, max_retries, delay)
           model_completion.increment!(:retry_count)
           sleep delay
           retry
@@ -173,10 +223,35 @@ module Raif
       end
     end
+    def log_retry(error, model_completion, attempt, max_retries, delay)
+      if error.is_a?(Raif::Errors::BlankResponseError)
+        has_reasoning = model_completion.response_array&.any? do |block|
+          block.is_a?(Hash) ? block.key?("reasoning_content") : block.respond_to?(:reasoning_content)
+        end
+        Raif.logger.warn(
+          "Blank response retry #{attempt}/#{max_retries} for #{api_name} " \
+            "(ModelCompletion##{model_completion.id}, source: #{model_completion.source_type}##{model_completion.source_id}, " \
+            "completion_tokens: #{model_completion.completion_tokens}, reasoning_content_present: #{has_reasoning}). " \
+            "Waiting #{delay} seconds..."
+        )
+      else
+        Raif.logger.warn("Retrying LLM API request after error: #{error.message}. Attempt #{attempt}/#{max_retries}. Waiting #{delay} seconds...")
+      end
+    end
     def streaming_response_type
       raise NotImplementedError, "#{self.class.name} must implement #streaming_response_type"
     end
+    def ensure_model_completion_present!(model_completion)
+      # response_array/raw provider data may still be present for debugging even when
+      # the normalized response has no text or tool calls.
+      return if model_completion.raw_response.present? || model_completion.response_tool_calls.present?
+      raise Raif::Errors::BlankResponseError,
+        "Model completion #{model_completion.id} returned no text response and no tool calls"
+    end
     def streaming_chunk_handler(model_completion, &block)
       return unless model_completion.stream_response?

data/app/models/raif/llms/anthropic.rb CHANGED Viewed

@@ -5,6 +5,18 @@ class Raif::Llms::Anthropic < Raif::Llm
   include Raif::Concerns::Llms::Anthropic::ToolFormatting
   include Raif::Concerns::Llms::Anthropic::ResponseToolCalls
+  def self.prompt_tokens_include_cached_tokens?
+    false
+  end
+  def self.cache_read_input_token_cost_multiplier
+    0.1
+  end
+  def self.cache_creation_input_token_cost_multiplier
+    1.25
+  end
   def perform_model_completion!(model_completion, &block)
     params = build_request_parameters(model_completion)
     response = connection.post("messages") do |req|
@@ -49,24 +61,30 @@ private
     model_completion.completion_tokens = response_json&.dig("usage", "output_tokens")
     model_completion.prompt_tokens = response_json&.dig("usage", "input_tokens")
     model_completion.total_tokens = model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i
+    model_completion.cache_read_input_tokens = response_json&.dig("usage", "cache_read_input_tokens")
+    model_completion.cache_creation_input_tokens = response_json&.dig("usage", "cache_creation_input_tokens")
     model_completion.save!
   end
   def build_request_parameters(model_completion)
     params = {
       model: model_completion.model_api_name,
-      messages: model_completion.messages,
-      temperature: (model_completion.temperature || default_temperature).to_f,
-      max_tokens: model_completion.max_completion_tokens || default_max_completion_tokens
+      messages: model_completion.messages
     }
+    params[:temperature] = (model_completion.temperature || default_temperature).to_f if supports_temperature?
+    params[:max_tokens] = model_completion.max_completion_tokens || default_max_completion_tokens
     params[:system] = model_completion.system_prompt if model_completion.system_prompt.present?
+    params[:cache_control] = { type: "ephemeral" } if model_completion.anthropic_prompt_caching_enabled
     if supports_native_tool_use?
       tools = build_tools_parameter(model_completion)
       params[:tools] = tools unless tools.blank?
-      if model_completion.tool_choice.present?
+      if model_completion.tool_choice == "required"
+        params[:tool_choice] = build_required_tool_choice
+      elsif model_completion.tool_choice.present?
         tool_klass = model_completion.tool_choice.constantize
         params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
       end
@@ -77,6 +95,10 @@ private
     params
   end
+  def supports_temperature?
+    provider_settings.key?(:supports_temperature) ? provider_settings[:supports_temperature] : true
+  end
   def extract_text_response(resp)
     return if resp&.dig("content").blank?

data/app/models/raif/llms/bedrock.rb CHANGED Viewed

@@ -5,10 +5,20 @@ class Raif::Llms::Bedrock < Raif::Llm
   include Raif::Concerns::Llms::Bedrock::ToolFormatting
   include Raif::Concerns::Llms::Bedrock::ResponseToolCalls
+  def self.prompt_tokens_include_cached_tokens?
+    false
+  end
+  def self.cache_read_input_token_cost_multiplier
+    0.1
+  end
+  def self.cache_creation_input_token_cost_multiplier
+    1.25
+  end
   def perform_model_completion!(model_completion, &block)
-    if Raif.config.aws_bedrock_model_name_prefix.present?
-      model_completion.model_api_name = "#{Raif.config.aws_bedrock_model_name_prefix}.#{model_completion.model_api_name}"
-    end
+    model_completion.model_api_name = resolve_model_api_name(model_completion.model_api_name)
     params = build_request_parameters(model_completion)
@@ -39,10 +49,29 @@ class Raif::Llms::Bedrock < Raif::Llm
 private
   def bedrock_client
-    @bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
+    @bedrock_client ||= begin
+      client_options = {
+        region: Raif.config.aws_bedrock_region,
+        max_attempts: 1
+      }
+      client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
+      client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
+      Aws::BedrockRuntime::Client.new(client_options)
+    end
+  end
+  def retriable_exceptions
+    super + [
+      Aws::BedrockRuntime::Errors::ServiceError,
+      Seahorse::Client::NetworkingError
+    ]
   end
   def update_model_completion(model_completion, resp)
+    return if resp.nil?
     model_completion.raw_response = if model_completion.response_format_json?
       extract_json_response(resp)
     else
@@ -54,6 +83,8 @@ private
     model_completion.completion_tokens = resp.usage.output_tokens
     model_completion.prompt_tokens = resp.usage.input_tokens
     model_completion.total_tokens = resp.usage.total_tokens
+    model_completion.cache_read_input_tokens = resp.usage.try(:cache_read_input_tokens)
+    model_completion.cache_creation_input_tokens = resp.usage.try(:cache_write_input_tokens)
     model_completion.save!
   end
@@ -74,12 +105,20 @@ private
       tools = build_tools_parameter(model_completion)
       params[:tool_config] = tools unless tools.blank?
-      if model_completion.tool_choice.present?
+      if model_completion.tool_choice == "required"
+        params[:tool_config][:tool_choice] = build_required_tool_choice
+      elsif model_completion.tool_choice.present?
         tool_klass = model_completion.tool_choice.constantize
         params[:tool_config][:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
       end
     end
+    if model_completion.bedrock_prompt_caching_enabled
+      cache_point = { cache_point: { type: "default" } }
+      params[:system] << cache_point if params[:system].present?
+      messages_param.last[:content] << cache_point.dup if messages_param.last.present?
+    end
     params
   end
@@ -148,4 +187,19 @@ private
     end
   end
+  def resolve_model_api_name(model_api_name)
+    api_name = model_api_name.to_s
+    prefix = Raif.config.aws_bedrock_model_name_prefix.to_s.presence
+    return api_name if prefix.blank?
+    return api_name if api_name.start_with?("#{prefix}.")
+    # Some Bedrock model IDs are provider IDs (not inference profile IDs),
+    # so they should not be prefixed.
+    return api_name if api_name.start_with?("openai.gpt-oss-")
+    return api_name if api_name.start_with?("deepseek.")
+    "#{prefix}.#{api_name}"
+  end
 end

data/app/models/raif/llms/google.rb CHANGED Viewed

@@ -5,6 +5,10 @@ class Raif::Llms::Google < Raif::Llm
   include Raif::Concerns::Llms::Google::ToolFormatting
   include Raif::Concerns::Llms::Google::ResponseToolCalls
+  def self.cache_read_input_token_cost_multiplier
+    0.25
+  end
   def perform_model_completion!(model_completion, &block)
     params = build_request_parameters(model_completion)
     endpoint = build_endpoint(model_completion)
@@ -21,10 +25,17 @@ class Raif::Llms::Google < Raif::Llm
     model_completion
   end
+  def supports_faithful_required_tool_choice?(available_model_tools)
+    super && Array(available_model_tools).none? do |tool|
+      tool_class = tool.is_a?(String) ? tool.constantize : tool
+      tool_class.provider_managed?
+    end
+  end
 private
   def connection
-    @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta") do |f|
+    @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
       f.headers["x-goog-api-key"] = Raif.config.google_api_key
       f.request :json
       f.response :json
@@ -58,6 +69,7 @@ private
     model_completion.prompt_tokens = response_json&.dig("usageMetadata", "promptTokenCount")
     model_completion.total_tokens = response_json&.dig("usageMetadata", "totalTokenCount") ||
       (model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i)
+    model_completion.cache_read_input_tokens = response_json&.dig("usageMetadata", "cachedContentTokenCount")
     model_completion.save!
   end
@@ -76,7 +88,13 @@ private
       tools = build_tools_parameter(model_completion)
       params[:tools] = tools unless tools.blank?
-      if model_completion.tool_choice.present?
+      if model_completion.tool_choice == "required"
+        if supports_faithful_required_tool_choice?(model_completion.available_model_tools)
+          params[:toolConfig] = { functionCallingConfig: build_required_tool_choice }
+        else
+          log_required_tool_choice_fallback(model_completion)
+        end
+      elsif model_completion.tool_choice.present?
         tool_klass = model_completion.tool_choice.constantize
         params[:toolConfig] = { functionCallingConfig: build_forced_tool_choice(tool_klass.tool_name) }
       end
@@ -85,6 +103,14 @@ private
     params
   end
+  def log_required_tool_choice_fallback(model_completion)
+    Raif.logger.warn(
+      "Google AI cannot faithfully enforce tool_choice: :required when provider-managed tools are present. " \
+        "Falling back to runtime validation for #{model_completion.model_api_name} " \
+        "(tools: #{model_completion.available_model_tools_map.keys.join(", ")})"
+    )
+  end
   def build_generation_config(model_completion)
     config = {}

data/app/models/raif/llms/open_ai_base.rb CHANGED Viewed

@@ -3,6 +3,10 @@
 class Raif::Llms::OpenAiBase < Raif::Llm
   include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
+  def self.cache_read_input_token_cost_multiplier
+    0.5
+  end
   def perform_model_completion!(model_completion, &block)
     if supports_temperature?
       model_completion.temperature ||= default_temperature

data/app/models/raif/llms/open_ai_completions.rb CHANGED Viewed

@@ -16,6 +16,8 @@ private
   end
   def update_model_completion(model_completion, response_json)
+    return if response_json.nil?
     model_completion.update!(
       response_id: response_json["id"],
       response_tool_calls: extract_response_tool_calls(response_json),
@@ -23,7 +25,8 @@ private
       response_array: response_json["choices"],
       completion_tokens: response_json.dig("usage", "completion_tokens"),
       prompt_tokens: response_json.dig("usage", "prompt_tokens"),
-      total_tokens: response_json.dig("usage", "total_tokens")
+      total_tokens: response_json.dig("usage", "total_tokens"),
+      cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
     )
   end
@@ -51,9 +54,13 @@ private
       tools = build_tools_parameter(model_completion)
       parameters[:tools] = tools unless tools.blank?
-      if model_completion.tool_choice.present?
+      if model_completion.tool_choice == "required"
+        parameters[:tool_choice] = build_required_tool_choice
+        parameters[:parallel_tool_calls] = false unless tools.blank?
+      elsif model_completion.tool_choice.present?
         tool_klass = model_completion.tool_choice.constantize
         parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
+        parameters[:parallel_tool_calls] = false unless tools.blank?
       end
     end

data/app/models/raif/llms/open_ai_responses.rb CHANGED Viewed

@@ -16,6 +16,8 @@ private
   end
   def update_model_completion(model_completion, response_json)
+    return if response_json.nil?
     model_completion.update!(
       response_id: response_json["id"],
       response_tool_calls: extract_response_tool_calls(response_json),
@@ -24,7 +26,8 @@ private
       citations: extract_citations(response_json),
       completion_tokens: response_json.dig("usage", "output_tokens"),
       prompt_tokens: response_json.dig("usage", "input_tokens"),
-      total_tokens: response_json.dig("usage", "total_tokens")
+      total_tokens: response_json.dig("usage", "total_tokens"),
+      cache_read_input_tokens: response_json.dig("usage", "input_tokens_details", "cached_tokens")
     )
   end
@@ -96,9 +99,13 @@ private
       tools = build_tools_parameter(model_completion)
       parameters[:tools] = tools unless tools.blank?
-      if model_completion.tool_choice.present?
+      if model_completion.tool_choice == "required"
+        parameters[:tool_choice] = build_required_tool_choice
+        parameters[:parallel_tool_calls] = false unless tools.blank?
+      elsif model_completion.tool_choice.present?
         tool_klass = model_completion.tool_choice.constantize
         parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
+        parameters[:parallel_tool_calls] = false unless tools.blank?
       end
     end