RubyGems - raif - Versions diffs - 1.3.0 → 1.5.0 - Mend

raif 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

data/app/models/raif/llms/bedrock.rb CHANGED Viewed

@@ -3,11 +3,22 @@
 class Raif::Llms::Bedrock < Raif::Llm
   include Raif::Concerns::Llms::Bedrock::MessageFormatting
   include Raif::Concerns::Llms::Bedrock::ToolFormatting
+  include Raif::Concerns::Llms::Bedrock::ResponseToolCalls
+  def self.prompt_tokens_include_cached_tokens?
+    false
+  end
+  def self.cache_read_input_token_cost_multiplier
+    0.1
+  end
+  def self.cache_creation_input_token_cost_multiplier
+    1.25
+  end
   def perform_model_completion!(model_completion, &block)
-    if Raif.config.aws_bedrock_model_name_prefix.present?
-      model_completion.model_api_name = "#{Raif.config.aws_bedrock_model_name_prefix}.#{model_completion.model_api_name}"
-    end
+    model_completion.model_api_name = resolve_model_api_name(model_completion.model_api_name)
     params = build_request_parameters(model_completion)
@@ -38,10 +49,29 @@ class Raif::Llms::Bedrock < Raif::Llm
 private
   def bedrock_client
-    @bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
+    @bedrock_client ||= begin
+      client_options = {
+        region: Raif.config.aws_bedrock_region,
+        max_attempts: 1
+      }
+      client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
+      client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
+      Aws::BedrockRuntime::Client.new(client_options)
+    end
+  end
+  def retriable_exceptions
+    super + [
+      Aws::BedrockRuntime::Errors::ServiceError,
+      Seahorse::Client::NetworkingError
+    ]
   end
   def update_model_completion(model_completion, resp)
+    return if resp.nil?
     model_completion.raw_response = if model_completion.response_format_json?
       extract_json_response(resp)
     else
@@ -53,6 +83,8 @@ private
     model_completion.completion_tokens = resp.usage.output_tokens
     model_completion.prompt_tokens = resp.usage.input_tokens
     model_completion.total_tokens = resp.usage.total_tokens
+    model_completion.cache_read_input_tokens = resp.usage.try(:cache_read_input_tokens)
+    model_completion.cache_creation_input_tokens = resp.usage.try(:cache_write_input_tokens)
     model_completion.save!
   end
@@ -72,6 +104,19 @@ private
     if supports_native_tool_use?
       tools = build_tools_parameter(model_completion)
       params[:tool_config] = tools unless tools.blank?
+      if model_completion.tool_choice == "required"
+        params[:tool_config][:tool_choice] = build_required_tool_choice
+      elsif model_completion.tool_choice.present?
+        tool_klass = model_completion.tool_choice.constantize
+        params[:tool_config][:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
+      end
+    end
+    if model_completion.bedrock_prompt_caching_enabled
+      cache_point = { cache_point: { type: "default" } }
+      params[:system] << cache_point if params[:system].present?
+      messages_param.last[:content] << cache_point.dup if messages_param.last.present?
     end
     params
@@ -121,26 +166,6 @@ private
     end
   end
-  def extract_response_tool_calls(resp)
-    # Get the message from the response object
-    message = resp.output.message
-    return if message.content.nil?
-    # Find any tool_use blocks in the content array
-    tool_uses = message.content.select do |content|
-      content.respond_to?(:tool_use) && content.tool_use.present?
-    end
-    return if tool_uses.blank?
-    tool_uses.map do |content|
-      {
-        "name" => content.tool_use.name,
-        "arguments" => content.tool_use.input
-      }
-    end
-  end
   def streaming_chunk_handler(model_completion, &block)
     return unless model_completion.stream_response?
@@ -162,4 +187,19 @@ private
     end
   end
+  def resolve_model_api_name(model_api_name)
+    api_name = model_api_name.to_s
+    prefix = Raif.config.aws_bedrock_model_name_prefix.to_s.presence
+    return api_name if prefix.blank?
+    return api_name if api_name.start_with?("#{prefix}.")
+    # Some Bedrock model IDs are provider IDs (not inference profile IDs),
+    # so they should not be prefixed.
+    return api_name if api_name.start_with?("openai.gpt-oss-")
+    return api_name if api_name.start_with?("deepseek.")
+    "#{prefix}.#{api_name}"
+  end
 end

data/app/models/raif/llms/google.rb ADDED Viewed

@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+class Raif::Llms::Google < Raif::Llm
+  include Raif::Concerns::Llms::Google::MessageFormatting
+  include Raif::Concerns::Llms::Google::ToolFormatting
+  include Raif::Concerns::Llms::Google::ResponseToolCalls
+  def self.cache_read_input_token_cost_multiplier
+    0.25
+  end
+  def perform_model_completion!(model_completion, &block)
+    params = build_request_parameters(model_completion)
+    endpoint = build_endpoint(model_completion)
+    response = connection.post(endpoint) do |req|
+      req.body = params
+      req.options.on_data = streaming_chunk_handler(model_completion, &block) if model_completion.stream_response?
+    end
+    unless model_completion.stream_response?
+      update_model_completion(model_completion, response.body)
+    end
+    model_completion
+  end
+  def supports_faithful_required_tool_choice?(available_model_tools)
+    super && Array(available_model_tools).none? do |tool|
+      tool_class = tool.is_a?(String) ? tool.constantize : tool
+      tool_class.provider_managed?
+    end
+  end
+private
+  def connection
+    @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
+      f.headers["x-goog-api-key"] = Raif.config.google_api_key
+      f.request :json
+      f.response :json
+      f.response :raise_error
+    end
+  end
+  def build_endpoint(model_completion)
+    if model_completion.stream_response?
+      "models/#{model_completion.model_api_name}:streamGenerateContent?alt=sse"
+    else
+      "models/#{model_completion.model_api_name}:generateContent"
+    end
+  end
+  def streaming_response_type
+    Raif::StreamingResponses::Google
+  end
+  def update_model_completion(model_completion, response_json)
+    model_completion.raw_response = if model_completion.response_format_json?
+      extract_json_response(response_json)
+    else
+      extract_text_response(response_json)
+    end
+    model_completion.response_array = response_json&.dig("candidates", 0, "content", "parts")
+    model_completion.response_tool_calls = extract_response_tool_calls(response_json)
+    model_completion.citations = extract_citations(response_json)
+    model_completion.completion_tokens = response_json&.dig("usageMetadata", "candidatesTokenCount")
+    model_completion.prompt_tokens = response_json&.dig("usageMetadata", "promptTokenCount")
+    model_completion.total_tokens = response_json&.dig("usageMetadata", "totalTokenCount") ||
+      (model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i)
+    model_completion.cache_read_input_tokens = response_json&.dig("usageMetadata", "cachedContentTokenCount")
+    model_completion.save!
+  end
+  def build_request_parameters(model_completion)
+    params = {
+      contents: model_completion.messages
+    }
+    if model_completion.system_prompt.present?
+      params[:system_instruction] = { parts: [{ text: model_completion.system_prompt }] }
+    end
+    params[:generationConfig] = build_generation_config(model_completion)
+    if supports_native_tool_use?
+      tools = build_tools_parameter(model_completion)
+      params[:tools] = tools unless tools.blank?
+      if model_completion.tool_choice == "required"
+        if supports_faithful_required_tool_choice?(model_completion.available_model_tools)
+          params[:toolConfig] = { functionCallingConfig: build_required_tool_choice }
+        else
+          log_required_tool_choice_fallback(model_completion)
+        end
+      elsif model_completion.tool_choice.present?
+        tool_klass = model_completion.tool_choice.constantize
+        params[:toolConfig] = { functionCallingConfig: build_forced_tool_choice(tool_klass.tool_name) }
+      end
+    end
+    params
+  end
+  def log_required_tool_choice_fallback(model_completion)
+    Raif.logger.warn(
+      "Google AI cannot faithfully enforce tool_choice: :required when provider-managed tools are present. " \
+        "Falling back to runtime validation for #{model_completion.model_api_name} " \
+        "(tools: #{model_completion.available_model_tools_map.keys.join(", ")})"
+    )
+  end
+  def build_generation_config(model_completion)
+    config = {}
+    temperature = model_completion.temperature || default_temperature
+    config[:temperature] = temperature.to_f if temperature.present?
+    max_tokens = model_completion.max_completion_tokens || default_max_completion_tokens
+    config[:maxOutputTokens] = max_tokens if max_tokens.present?
+    # Use native JSON schema support for structured output
+    if model_completion.response_format_json? && model_completion.json_response_schema.present?
+      config[:responseMimeType] = "application/json"
+      config[:responseSchema] = sanitize_schema_for_google(model_completion.json_response_schema)
+    end
+    config
+  end
+  def extract_text_response(resp)
+    parts = resp&.dig("candidates", 0, "content", "parts")
+    return if parts.blank?
+    parts.select { |p| p.key?("text") }.map { |p| p["text"] }.join
+  end
+  def extract_json_response(resp)
+    # Google AI supports native JSON schema output, so the response should be in the text field
+    extract_text_response(resp)
+  end
+  def extract_citations(resp)
+    # Google AI returns grounding metadata for search results
+    grounding_metadata = resp&.dig("candidates", 0, "groundingMetadata")
+    return [] if grounding_metadata.blank?
+    citations = []
+    # Extract from grounding chunks
+    grounding_chunks = grounding_metadata["groundingChunks"] || []
+    grounding_chunks.each do |chunk|
+      web = chunk["web"]
+      next unless web.present?
+      citations << {
+        "url" => Raif::Utils::HtmlFragmentProcessor.strip_tracking_parameters(web["uri"]),
+        "title" => web["title"]
+      }
+    end
+    citations.uniq { |citation| citation["url"] }
+  end
+end

data/app/models/raif/llms/open_ai_base.rb CHANGED Viewed

@@ -3,6 +3,10 @@
 class Raif::Llms::OpenAiBase < Raif::Llm
   include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
+  def self.cache_read_input_token_cost_multiplier
+    0.5
+  end
   def perform_model_completion!(model_completion, &block)
     if supports_temperature?
       model_completion.temperature ||= default_temperature
@@ -28,11 +32,25 @@ class Raif::Llms::OpenAiBase < Raif::Llm
 private
   def connection
-    @connection ||= Faraday.new(url: "https://api.openai.com/v1") do |f|
-      f.headers["Authorization"] = "Bearer #{Raif.config.open_ai_api_key}"
-      f.request :json
-      f.response :json
-      f.response :raise_error
+    @connection ||= begin
+      conn = Faraday.new(url: Raif.config.open_ai_base_url, request: Raif.default_request_options) do |f|
+        case Raif.config.open_ai_auth_header_style
+        when :bearer
+          f.headers["Authorization"] = "Bearer #{Raif.config.open_ai_api_key}"
+        when :api_key
+          f.headers["api-key"] = Raif.config.open_ai_api_key
+        else
+          raise Raif::Errors::InvalidConfigError,
+            "Raif.config.open_ai_auth_header_style must be either :bearer or :api_key"
+        end
+        f.request :json
+        f.response :json
+        f.response :raise_error
+      end
+      conn.params["api-version"] = Raif.config.open_ai_api_version if Raif.config.open_ai_api_version.present?
+      conn
     end
   end

data/app/models/raif/llms/open_ai_completions.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 class Raif::Llms::OpenAiCompletions < Raif::Llms::OpenAiBase
   include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
   include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
+  include Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
 private
@@ -15,6 +16,8 @@ private
   end
   def update_model_completion(model_completion, response_json)
+    return if response_json.nil?
     model_completion.update!(
       response_id: response_json["id"],
       response_tool_calls: extract_response_tool_calls(response_json),
@@ -22,21 +25,11 @@ private
       response_array: response_json["choices"],
       completion_tokens: response_json.dig("usage", "completion_tokens"),
       prompt_tokens: response_json.dig("usage", "prompt_tokens"),
-      total_tokens: response_json.dig("usage", "total_tokens")
+      total_tokens: response_json.dig("usage", "total_tokens"),
+      cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
     )
   end
-  def extract_response_tool_calls(resp)
-    return if resp.dig("choices", 0, "message", "tool_calls").blank?
-    resp.dig("choices", 0, "message", "tool_calls").map do |tool_call|
-      {
-        "name" => tool_call["function"]["name"],
-        "arguments" => JSON.parse(tool_call["function"]["arguments"])
-      }
-    end
-  end
   def build_request_parameters(model_completion)
     formatted_system_prompt = format_system_prompt(model_completion)
@@ -60,6 +53,15 @@ private
     if supports_native_tool_use?
       tools = build_tools_parameter(model_completion)
       parameters[:tools] = tools unless tools.blank?
+      if model_completion.tool_choice == "required"
+        parameters[:tool_choice] = build_required_tool_choice
+        parameters[:parallel_tool_calls] = false unless tools.blank?
+      elsif model_completion.tool_choice.present?
+        tool_klass = model_completion.tool_choice.constantize
+        parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
+        parameters[:parallel_tool_calls] = false unless tools.blank?
+      end
     end
     if model_completion.stream_response?

data/app/models/raif/llms/open_ai_responses.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 class Raif::Llms::OpenAiResponses < Raif::Llms::OpenAiBase
   include Raif::Concerns::Llms::OpenAiResponses::MessageFormatting
   include Raif::Concerns::Llms::OpenAiResponses::ToolFormatting
+  include Raif::Concerns::Llms::OpenAiResponses::ResponseToolCalls
 private
@@ -15,6 +16,8 @@ private
   end
   def update_model_completion(model_completion, response_json)
+    return if response_json.nil?
     model_completion.update!(
       response_id: response_json["id"],
       response_tool_calls: extract_response_tool_calls(response_json),
@@ -23,26 +26,11 @@ private
       citations: extract_citations(response_json),
       completion_tokens: response_json.dig("usage", "output_tokens"),
       prompt_tokens: response_json.dig("usage", "input_tokens"),
-      total_tokens: response_json.dig("usage", "total_tokens")
+      total_tokens: response_json.dig("usage", "total_tokens"),
+      cache_read_input_tokens: response_json.dig("usage", "input_tokens_details", "cached_tokens")
     )
   end
-  def extract_response_tool_calls(resp)
-    return if resp["output"].blank?
-    tool_calls = []
-    resp["output"].each do |output_item|
-      next unless output_item["type"] == "function_call"
-      tool_calls << {
-        "name" => output_item["name"],
-        "arguments" => JSON.parse(output_item["arguments"])
-      }
-    end
-    tool_calls.any? ? tool_calls : nil
-  end
   def extract_raw_response(resp)
     text_outputs = []
@@ -110,6 +98,15 @@ private
     if supports_native_tool_use?
       tools = build_tools_parameter(model_completion)
       parameters[:tools] = tools unless tools.blank?
+      if model_completion.tool_choice == "required"
+        parameters[:tool_choice] = build_required_tool_choice
+        parameters[:parallel_tool_calls] = false unless tools.blank?
+      elsif model_completion.tool_choice.present?
+        tool_klass = model_completion.tool_choice.constantize
+        parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
+        parameters[:parallel_tool_calls] = false unless tools.blank?
+      end
     end
     # Add response format if needed. Default will be { "type": "text" }

data/app/models/raif/llms/open_router.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 class Raif::Llms::OpenRouter < Raif::Llm
   include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
   include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
+  include Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
   include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
   def perform_model_completion!(model_completion, &block)
@@ -23,7 +24,7 @@ class Raif::Llms::OpenRouter < Raif::Llm
 private
   def connection
-    @connection ||= Faraday.new(url: "https://openrouter.ai/api/v1") do |f|
+    @connection ||= Faraday.new(url: "https://openrouter.ai/api/v1", request: Raif.default_request_options) do |f|
       f.headers["Authorization"] = "Bearer #{Raif.config.open_router_api_key}"
       f.headers["HTTP-Referer"] = Raif.config.open_router_site_url if Raif.config.open_router_site_url.present?
       f.headers["X-Title"] = Raif.config.open_router_app_name if Raif.config.open_router_app_name.present?
@@ -38,6 +39,8 @@ private
   end
   def update_model_completion(model_completion, response_json)
+    return if response_json.nil?
     raw_response = if model_completion.response_format_json?
       extract_json_response(response_json)
     else
@@ -51,7 +54,8 @@ private
       response_array: response_json["choices"],
       completion_tokens: response_json.dig("usage", "completion_tokens"),
       prompt_tokens: response_json.dig("usage", "prompt_tokens"),
-      total_tokens: response_json.dig("usage", "total_tokens")
+      total_tokens: response_json.dig("usage", "total_tokens"),
+      cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
     )
   end
@@ -85,6 +89,15 @@ private
       end
       params[:tools] = tools unless tools.blank?
+      if model_completion.tool_choice == "required"
+        params[:tool_choice] = build_required_tool_choice
+        params[:parallel_tool_calls] = false unless tools.blank?
+      elsif model_completion.tool_choice.present?
+        tool_klass = model_completion.tool_choice.constantize
+        params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
+        params[:parallel_tool_calls] = false unless tools.blank?
+      end
     end
     if model_completion.stream_response?
@@ -108,7 +121,7 @@ private
   end
   def extract_json_response(resp)
-    tool_calls = resp.dig("choices", 0, "message", "tool_calls")
+    tool_calls = resp&.dig("choices", 0, "message", "tool_calls")
     return extract_text_response(resp) if tool_calls.blank?
     tool_response = tool_calls.find do |tool_call|
@@ -121,16 +134,4 @@ private
       extract_text_response(resp)
     end
   end
-  def extract_response_tool_calls(resp)
-    tool_calls = resp.dig("choices", 0, "message", "tool_calls")
-    return if tool_calls.blank?
-    tool_calls.map do |tool_call|
-      {
-        "name" => tool_call["function"]["name"],
-        "arguments" => JSON.parse(tool_call["function"]["arguments"])
-      }
-    end
-  end
 end

data/app/models/raif/model_completion.rb CHANGED Viewed

@@ -1,14 +1,74 @@
 # frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_model_completions
+#
+#  id                          :bigint           not null, primary key
+#  available_model_tools       :jsonb            not null
+#  cache_creation_input_tokens :integer
+#  cache_read_input_tokens     :integer
+#  citations                   :jsonb
+#  completed_at                :datetime
+#  completion_tokens           :integer
+#  failed_at                   :datetime
+#  failure_error               :string
+#  failure_reason              :text
+#  llm_model_key               :string           not null
+#  max_completion_tokens       :integer
+#  messages                    :jsonb            not null
+#  model_api_name              :string           not null
+#  output_token_cost           :decimal(10, 6)
+#  prompt_token_cost           :decimal(10, 6)
+#  prompt_tokens               :integer
+#  raw_response                :text
+#  response_array              :jsonb
+#  response_format             :integer          default("text"), not null
+#  response_format_parameter   :string
+#  response_tool_calls         :jsonb
+#  retry_count                 :integer          default(0), not null
+#  source_type                 :string
+#  started_at                  :datetime
+#  stream_response             :boolean          default(FALSE), not null
+#  system_prompt               :text
+#  temperature                 :decimal(5, 3)
+#  tool_choice                 :string
+#  total_cost                  :decimal(10, 6)
+#  total_tokens                :integer
+#  created_at                  :datetime         not null
+#  updated_at                  :datetime         not null
+#  response_id                 :string
+#  source_id                   :bigint
+#
+# Indexes
+#
+#  index_raif_model_completions_on_completed_at  (completed_at)
+#  index_raif_model_completions_on_created_at    (created_at)
+#  index_raif_model_completions_on_failed_at     (failed_at)
+#  index_raif_model_completions_on_source        (source_type,source_id)
+#  index_raif_model_completions_on_started_at    (started_at)
+#
 class Raif::ModelCompletion < Raif::ApplicationRecord
   include Raif::Concerns::LlmResponseParsing
   include Raif::Concerns::HasAvailableModelTools
+  include Raif::Concerns::HasRuntimeDuration
+  include Raif::Concerns::ProviderManagedToolCalls
+  include Raif::Concerns::BooleanTimestamp
+  attr_accessor :anthropic_prompt_caching_enabled, :bedrock_prompt_caching_enabled
+  boolean_timestamp :started_at
+  boolean_timestamp :completed_at
+  boolean_timestamp :failed_at
   belongs_to :source, polymorphic: true, optional: true
   validates :llm_model_key, presence: true, inclusion: { in: ->{ Raif.available_llm_keys.map(&:to_s) } }
   validates :model_api_name, presence: true
+  # Scope to find completions that have response tool calls
+  scope :with_response_tool_calls, -> { where_json_not_blank(:response_tool_calls) }
   delegate :json_response_schema, to: :source, allow_nil: true
   before_save :set_total_tokens
@@ -28,8 +88,12 @@ class Raif::ModelCompletion < Raif::ApplicationRecord
   end
   def calculate_costs
+    # Each retry resends the same prompt, so the provider charges input tokens
+    # for every attempt. Factor in retry_count to reflect actual billing.
+    total_attempts = (retry_count || 0) + 1
     if prompt_tokens.present? && llm_config[:input_token_cost].present?
-      self.prompt_token_cost = llm_config[:input_token_cost] * prompt_tokens
+      self.prompt_token_cost = calculate_prompt_token_cost(total_attempts)
     end
     if completion_tokens.present? && llm_config[:output_token_cost].present?
@@ -41,8 +105,46 @@ class Raif::ModelCompletion < Raif::ApplicationRecord
     end
   end
+  def record_failure!(exception)
+    self.failed_at = Time.current
+    self.failure_error = exception.class.name
+    self.failure_reason = exception.message.truncate(255)
+    save!
+  end
 private
+  def calculate_prompt_token_cost(total_attempts)
+    input_cost = llm_config[:input_token_cost]
+    llm_class = llm_config[:llm_class]
+    cache_read_multiplier = llm_class&.cache_read_input_token_cost_multiplier
+    cache_creation_multiplier = llm_class&.cache_creation_input_token_cost_multiplier
+    cached_reads = cache_read_input_tokens.to_i
+    cached_writes = cache_creation_input_tokens.to_i
+    if cached_reads > 0 && cache_read_multiplier.present?
+      cache_read_cost = input_cost * cache_read_multiplier
+      if llm_class.prompt_tokens_include_cached_tokens?
+        # OpenAI / Google / OpenRouter: cached tokens are a subset of prompt_tokens
+        non_cached = prompt_tokens - cached_reads
+        cost = (non_cached * input_cost) + (cached_reads * cache_read_cost)
+      else
+        # Anthropic / Bedrock: cached tokens are separate from prompt_tokens
+        cost = (prompt_tokens * input_cost) + (cached_reads * cache_read_cost)
+      end
+    else
+      cost = prompt_tokens * input_cost
+    end
+    # Cache creation surcharge (Anthropic / Bedrock)
+    if cached_writes > 0 && cache_creation_multiplier.present?
+      cost += cached_writes * input_cost * cache_creation_multiplier
+    end
+    cost * total_attempts
+  end
   def llm_config
     @llm_config ||= Raif.llm_config(llm_model_key.to_sym)
   end