RubyGems - raif - Versions diffs - 1.4.0 → 1.5.0 - Mend

raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

data/app/models/raif/concerns/has_runtime_duration.rb ADDED Viewed

@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+module Raif::Concerns::HasRuntimeDuration
+  extend ActiveSupport::Concern
+  def runtime_ended_at
+    completed_at || failed_at
+  end
+  def runtime_duration_seconds
+    return if started_at.blank? || runtime_ended_at.blank?
+    duration_in_seconds = runtime_ended_at - started_at
+    return if duration_in_seconds.negative?
+    duration_in_seconds
+  end
+  def runtime_duration
+    duration_in_seconds = runtime_duration_seconds
+    return "-" if duration_in_seconds.nil?
+    if duration_in_seconds < 1
+      "#{(duration_in_seconds * 1000).round}ms"
+    elsif duration_in_seconds < 60
+      seconds = (duration_in_seconds * 100).round / 100.0
+      "#{seconds.to_s.sub(/\.0+\z/, "").sub(/(\.\d*[1-9])0+\z/, "\\1")}s"
+    else
+      total_seconds = duration_in_seconds.round
+      hours = total_seconds / 3600
+      minutes = (total_seconds % 3600) / 60
+      seconds = total_seconds % 60
+      parts = []
+      parts << "#{hours}h" if hours.positive?
+      parts << "#{minutes}m" if minutes.positive? || hours.positive?
+      parts << "#{seconds}s"
+      parts.join(" ")
+    end
+  end
+end

data/app/models/raif/concerns/json_schema_definition.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Raif
       extend ActiveSupport::Concern
       class_methods do
-        def json_schema_definition(schema_name, &block)
+        def json_schema_definition(schema_name, dynamic: false, &block)
           raise ArgumentError, "A block must be provided to define the JSON schema" unless block_given?
           # Check if block expects an instance parameter (arity == 1)
@@ -16,6 +16,10 @@ module Raif
             # Store block for instance-dependent schema building
             @schema_blocks ||= {}
             @schema_blocks[schema_name] = block
+          elsif dynamic
+            # Store block for class-level dynamic schema (re-evaluated each call)
+            @dynamic_schema_blocks ||= {}
+            @dynamic_schema_blocks[schema_name] = block
           else
             # Build schema immediately for class-level (backward compatible)
             @schemas ||= {}
@@ -25,7 +29,9 @@ module Raif
         end
         def schema_defined?(schema_name)
-          @schemas&.dig(schema_name).present? || @schema_blocks&.dig(schema_name).present?
+          @schemas&.dig(schema_name).present? ||
+            @schema_blocks&.dig(schema_name).present? ||
+            @dynamic_schema_blocks&.dig(schema_name).present?
         end
         def schema_for(schema_name)
@@ -36,6 +42,13 @@ module Raif
                 "Call this method on an instance instead."
           end
+          # Check if this is a dynamic schema (re-evaluate each call)
+          if @dynamic_schema_blocks&.dig(schema_name).present?
+            builder = Raif::JsonSchemaBuilder.new
+            builder.instance_eval(&@dynamic_schema_blocks[schema_name])
+            return builder.to_schema
+          end
           @schemas[schema_name].to_schema
         end
@@ -54,7 +67,7 @@ module Raif
           builder.build_with_instance(self, &block)
           builder.to_schema
         elsif self.class.schema_defined?(schema_name)
-          # Fall back to class-level schema
+          # Fall back to class-level schema (handles both static and dynamic)
           self.class.schema_for(schema_name)
         end
       end

data/app/models/raif/concerns/llm_prompt_caching.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+module Raif::Concerns::LlmPromptCaching
+  extend ActiveSupport::Concern
+  included do
+    class_attribute :anthropic_prompt_caching_enabled, instance_writer: false, default: false
+    class_attribute :bedrock_prompt_caching_enabled, instance_writer: false, default: false
+  end
+  class_methods do
+    def enable_anthropic_prompt_caching
+      self.anthropic_prompt_caching_enabled = true
+    end
+    def enable_bedrock_prompt_caching
+      self.bedrock_prompt_caching_enabled = true
+    end
+  end
+end

data/app/models/raif/concerns/llms/anthropic/message_formatting.rb CHANGED Viewed

@@ -3,6 +3,12 @@
 module Raif::Concerns::Llms::Anthropic::MessageFormatting
   extend ActiveSupport::Concern
+  def format_messages(messages)
+    # Anthropic tool results come back as user-role content blocks, so conversation
+    # continuations may need adjacent user messages collapsed after formatting.
+    consolidate_consecutive_role_messages(super, content_key: "content")
+  end
   def format_model_image_input_message(image_input)
     if image_input.source_type == :url
       {

data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb CHANGED Viewed

@@ -55,6 +55,10 @@ module Raif::Concerns::Llms::Anthropic::ToolFormatting
   end
   def build_forced_tool_choice(tool_name)
-    { "type" => "tool", "name" => tool_name }
+    { "type" => "tool", "name" => tool_name, "disable_parallel_tool_use" => true }
+  end
+  def build_required_tool_choice
+    { "type" => "any", "disable_parallel_tool_use" => true }
   end
 end

data/app/models/raif/concerns/llms/bedrock/message_formatting.rb CHANGED Viewed

@@ -3,6 +3,13 @@
 module Raif::Concerns::Llms::Bedrock::MessageFormatting
   extend ActiveSupport::Concern
+  def format_messages(messages)
+    # Bedrock tool results are represented as user-role content blocks, so a
+    # tool_result followed by the next user prompt must be merged into one user
+    # message before sending it to the provider.
+    consolidate_consecutive_role_messages(super, content_key: "content")
+  end
   def format_string_message(content, role: nil)
     { "text" => content }
   end

data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb CHANGED Viewed

@@ -38,4 +38,8 @@ module Raif::Concerns::Llms::Bedrock::ToolFormatting
   def build_forced_tool_choice(tool_name)
     { tool: { name: tool_name } }
   end
+  def build_required_tool_choice
+    { any: {} }
+  end
 end

data/app/models/raif/concerns/llms/google/message_formatting.rb CHANGED Viewed

@@ -3,9 +3,10 @@
 module Raif::Concerns::Llms::Google::MessageFormatting
   extend ActiveSupport::Concern
-  # Override the base format_messages to use Google's message format
+  # Google uses a different envelope ("parts") and also represents tool results as
+  # user-role messages, so we normalize adjacent same-role messages after formatting.
   def format_messages(messages)
-    messages.map do |message|
+    formatted_messages = messages.map do |message|
       if message.is_a?(Hash) && message["type"] == "tool_call"
         format_tool_call_message(message)
       elsif message.is_a?(Hash) && message["type"] == "tool_call_result"
@@ -20,6 +21,8 @@ module Raif::Concerns::Llms::Google::MessageFormatting
         }
       end
     end
+    consolidate_consecutive_role_messages(formatted_messages, content_key: "parts")
   end
   def format_string_message(content, role: nil)

data/app/models/raif/concerns/llms/google/tool_formatting.rb CHANGED Viewed

@@ -49,6 +49,10 @@ module Raif::Concerns::Llms::Google::ToolFormatting
     { mode: "ANY", allowedFunctionNames: [tool_name] }
   end
+  def build_required_tool_choice
+    { mode: "ANY" }
+  end
 private
   # Google's API doesn't support additionalProperties in JSON schemas

data/app/models/raif/concerns/llms/message_formatting.rb CHANGED Viewed

@@ -45,4 +45,34 @@ module Raif::Concerns::Llms::MessageFormatting
     { "type" => "text", "text" => content }
   end
+  def consolidate_consecutive_role_messages(messages, content_key:)
+    # Bedrock, Anthropic, and Google all model tool results as normal role-based
+    # message content blocks. After formatting, a tool result can therefore be a
+    # "user" message immediately followed by the next user turn. Those providers
+    # expect alternating roles, so their adapters collapse adjacent same-role blocks.
+    return messages if messages.size <= 1
+    messages.each_with_object([]) do |message, consolidated|
+      candidate = message.deep_dup
+      previous_message = consolidated.last
+      if mergeable_consecutive_role_messages?(previous_message, candidate, content_key:)
+        previous_message[content_key] += candidate[content_key]
+      else
+        consolidated << candidate
+      end
+    end
+  end
+private
+  def mergeable_consecutive_role_messages?(previous_message, message, content_key:)
+    previous_message.is_a?(Hash) &&
+      message.is_a?(Hash) &&
+      previous_message["role"].present? &&
+      previous_message["role"] == message["role"] &&
+      previous_message[content_key].is_a?(Array) &&
+      message[content_key].is_a?(Array)
+  end
 end

data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
   extend ActiveSupport::Concern
   def extract_response_tool_calls(resp)
-    tool_calls = resp.dig("choices", 0, "message", "tool_calls")
+    tool_calls = resp&.dig("choices", 0, "message", "tool_calls")
     return if tool_calls.blank?
     tool_calls.map do |tool_call|

data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb CHANGED Viewed

@@ -27,4 +27,8 @@ module Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
   def build_forced_tool_choice(tool_name)
     { "type" => "function", "function" => { "name" => tool_name } }
   end
+  def build_required_tool_choice
+    "required"
+  end
 end

data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb CHANGED Viewed

@@ -43,4 +43,8 @@ module Raif::Concerns::Llms::OpenAiResponses::ToolFormatting
   def build_forced_tool_choice(tool_name)
     { "type" => "function", "name" => tool_name }
   end
+  def build_required_tool_choice
+    "required"
+  end
 end

data/app/models/raif/concerns/provider_managed_tool_calls.rb ADDED Viewed

@@ -0,0 +1,162 @@
+# frozen_string_literal: true
+module Raif::Concerns::ProviderManagedToolCalls
+  extend ActiveSupport::Concern
+  # Provider-managed tool data is not normalized by the provider SDKs the same
+  # way developer-managed tool calls are. This method smooths those differences
+  # into one admin-friendly structure for the model completion page.
+  def provider_managed_tool_calls
+    # Memoized for repeated reads during a request/render. This assumes the
+    # completion's response payload is not mutated after first access.
+    @provider_managed_tool_calls ||= begin
+      tool_calls = extract_provider_managed_tool_calls
+      tool_calls = inferred_provider_managed_tool_calls if tool_calls.empty?
+      tool_calls.map do |tool_call|
+        next tool_call unless tool_call["tool_name"] == "web_search"
+        # Search sources can come from explicit provider result blocks
+        # (Anthropic) or from top-level citations (OpenAI / Google), so we
+        # merge both.
+        tool_call.merge("sources" => merge_provider_managed_sources(tool_call["sources"], citations))
+      end
+    end
+  end
+  # Returns citations with URLs sanitized to only allow http/https schemes.
+  def sanitized_citations
+    @sanitized_citations ||= Array(citations).map do |citation|
+      url = citation["url"]
+      safe_url = url.present? && url.match?(%r{\Ahttps?://}i) ? url : nil
+      citation.merge("url" => safe_url)
+    end
+  end
+private
+  def extract_provider_managed_tool_calls
+    response_blocks = Array(response_array).select { |block| block.is_a?(Hash) }
+    result_blocks_by_tool_use_id = response_blocks.each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |block, hash|
+      next if block["tool_use_id"].blank?
+      hash[block["tool_use_id"]] << block
+    end
+    response_blocks.filter_map do |block|
+      case block["type"]
+      when "server_tool_use"
+        # Anthropic stores the tool invocation in one block and the result in a
+        # separate block keyed by `tool_use_id`.
+        build_provider_managed_server_tool_call(block, result_blocks_by_tool_use_id)
+      when "web_search_call", "web_search_preview",
+           "code_interpreter_call", "code_interpreter",
+           "image_generation_call", "image_generation"
+        # OpenAI Responses persists provider-managed calls as top-level typed
+        # blocks like `web_search_call`, `code_interpreter`, etc.
+        build_provider_managed_tool_call_from_type(block)
+      end
+    end
+  end
+  def build_provider_managed_server_tool_call(block, result_blocks_by_tool_use_id)
+    tool_name = normalize_provider_managed_tool_name(block["name"])
+    return unless provider_managed_tool_available?(tool_name)
+    raw_result = result_blocks_by_tool_use_id[block["id"]].presence
+    {
+      "tool_name" => tool_name,
+      "provider_tool_call_id" => block["id"],
+      "status" => block["status"],
+      "arguments" => block["input"].presence,
+      "sources" => extract_provider_managed_sources(raw_result),
+      "raw_result" => raw_result,
+      "inferred" => false
+    }
+  end
+  def build_provider_managed_tool_call_from_type(block)
+    tool_name = normalize_provider_managed_tool_name(block["type"])
+    return unless provider_managed_tool_available?(tool_name)
+    payload = block.except("id", "type", "status").presence
+    {
+      "tool_name" => tool_name,
+      "provider_tool_call_id" => block["id"],
+      "status" => block["status"],
+      "arguments" => payload,
+      "sources" => [],
+      "raw_result" => payload,
+      "inferred" => false
+    }
+  end
+  def inferred_provider_managed_tool_calls
+    # Google currently gives us citations for provider-managed web search, but
+    # not a first-class tool call block in `response_array`, so we infer a
+    # single search invocation when web search was available and citations exist.
+    return [] unless provider_managed_tool_available?("web_search") && citations.present?
+    [{
+      "tool_name" => "web_search",
+      "provider_tool_call_id" => nil,
+      "status" => "completed",
+      "arguments" => nil,
+      "sources" => merge_provider_managed_sources([], citations),
+      "raw_result" => nil,
+      "inferred" => true
+    }]
+  end
+  def extract_provider_managed_sources(result_blocks)
+    Array(result_blocks).flat_map do |result_block|
+      Array(result_block["content"]).filter_map do |content_block|
+        next unless content_block.is_a?(Hash) && content_block["type"] == "web_search_result"
+        {
+          "title" => content_block["title"],
+          "url" => normalize_provider_managed_source_url(content_block["url"]),
+          "page_age" => content_block["page_age"]
+        }.compact
+      end
+    end.uniq { |source| source["url"].presence || source["title"] }
+  end
+  def merge_provider_managed_sources(existing_sources, extra_sources)
+    (Array(existing_sources) + Array(extra_sources)).filter_map do |source|
+      next unless source.is_a?(Hash)
+      {
+        "title" => source["title"],
+        "url" => normalize_provider_managed_source_url(source["url"]),
+        "page_age" => source["page_age"]
+      }.compact.presence
+    end.uniq { |source| source["url"].presence || source["title"] }
+  end
+  def normalize_provider_managed_tool_name(name)
+    case name.to_s
+    when "web_search", "web_search_call", "web_search_preview"
+      "web_search"
+    when "code_execution", "code_interpreter", "code_interpreter_call"
+      "code_execution"
+    when "image_generation", "image_generation_call"
+      "image_generation"
+    end
+  end
+  def provider_managed_tool_available?(tool_name)
+    return false if tool_name.blank?
+    available_model_tools_map[tool_name]&.provider_managed?
+  end
+  def normalize_provider_managed_source_url(url)
+    return if url.blank?
+    url = Raif::Utils::HtmlFragmentProcessor.strip_tracking_parameters(url)
+    return unless url.match?(%r{\Ahttps?://}i)
+    url
+  end
+end

data/app/models/raif/conversation.rb CHANGED Viewed

@@ -29,10 +29,13 @@
 #  index_raif_conversations_on_source      (source_type,source_id)
 #
 class Raif::Conversation < Raif::ApplicationRecord
+  prepend Raif::Concerns::HasPromptTemplates
   include Raif::Concerns::HasLlm
   include Raif::Concerns::HasRequestedLanguage
   include Raif::Concerns::HasAvailableModelTools
   include Raif::Concerns::LlmResponseParsing
+  include Raif::Concerns::LlmPromptCaching
   belongs_to :creator, polymorphic: true
   belongs_to :source, polymorphic: true, optional: true
@@ -103,6 +106,8 @@ class Raif::Conversation < Raif::ApplicationRecord
       response_format: response_format.to_sym,
       system_prompt: system_prompt,
       available_model_tools: available_model_tools,
+      anthropic_prompt_caching_enabled: self.class.anthropic_prompt_caching_enabled,
+      bedrock_prompt_caching_enabled: self.class.bedrock_prompt_caching_enabled,
       &block
     )
@@ -153,15 +158,18 @@ class Raif::Conversation < Raif::ApplicationRecord
       tool_invocations = entry.raif_model_tool_invocations.to_a
       if tool_invocations.any?
-        # First tool call includes the assistant's message (if any)
+        # First tool call includes the assistant's message (if any).
+        # For the result payload we send the model-facing observation when the tool
+        # opts into observations, while keeping the raw invocation.result persisted
+        # for admin/UI rendering.
         first_invocation = tool_invocations.shift
         messages << first_invocation.as_tool_call_message(assistant_message: entry.model_response_message.presence)
-        messages << first_invocation.as_tool_call_result_message
+        messages << first_invocation.as_tool_call_result_message(result: tool_result_for_llm(first_invocation))
         # Remaining tool calls (if multiple)
         tool_invocations.each do |tool_invocation|
           messages << tool_invocation.as_tool_call_message
-          messages << tool_invocation.as_tool_call_result_message
+          messages << tool_invocation.as_tool_call_result_message(result: tool_result_for_llm(tool_invocation))
         end
       elsif entry.model_response_message.present?
         # No tool calls, just a regular assistant response
@@ -176,4 +184,17 @@ class Raif::Conversation < Raif::ApplicationRecord
     available_user_tools.map(&:constantize)
   end
+private
+  def tool_result_for_llm(tool_invocation)
+    # Some tools persist a compact structured result for display/admin purposes but
+    # need to send richer text/XML back to the model for the continuation turn.
+    return tool_invocation.result unless tool_invocation.triggers_observation_to_model?
+    tool = tool_invocation.tool
+    return tool_invocation.result unless tool.respond_to?(:observation_for_invocation)
+    tool.observation_for_invocation(tool_invocation).presence || tool_invocation.result
+  end
 end

data/app/models/raif/conversation_entry.rb CHANGED Viewed

@@ -59,8 +59,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
   def add_user_tool_invocation_to_user_message
     return unless raif_user_tool_invocation.present?
-    separator = response_format == "html" ? "<br>" : "\n\n"
-    self.user_message = [user_message, raif_user_tool_invocation.as_user_message].join(separator)
+    self.user_message = [user_message, raif_user_tool_invocation.as_user_message].join("\n\n")
   end
   def response_format
@@ -74,7 +73,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
   def process_entry!
     self.model_response_message = ""
-    self.raif_model_completion = raif_conversation.prompt_model_for_entry_response(entry: self) do |model_completion, _delta, _sse_event|
+    model_completion = raif_conversation.prompt_model_for_entry_response(entry: self) do |model_completion, _delta, _sse_event|
       self.raw_response = model_completion.raw_response
       self.model_response_message = raif_conversation.process_model_response_message(
         message: model_completion.parsed_response(force_reparse: true),
@@ -90,6 +89,10 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
       broadcast_replace_to raif_conversation
     end
+    # Failed prompt attempts can still persist a model completion for debugging.
+    # Avoid clearing the has_one association with nil, which would delete that row.
+    self.raif_model_completion = model_completion if model_completion.present?
     if raif_model_completion.present? && (raif_model_completion.parsed_response.present? || raif_model_completion.response_tool_calls.present?)
       extract_message_and_invoke_tools!
       create_entry_for_observation! if triggers_observation_to_model?

data/app/models/raif/embedding_models/bedrock.rb CHANGED Viewed

@@ -29,6 +29,15 @@ private
   end
   def bedrock_client
-    @bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
+    @bedrock_client ||= begin
+      client_options = {
+        region: Raif.config.aws_bedrock_region
+      }
+      client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
+      client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
+      Aws::BedrockRuntime::Client.new(client_options)
+    end
   end
 end

data/app/models/raif/embedding_models/google.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+class Raif::EmbeddingModels::Google < Raif::EmbeddingModel
+  def generate_embedding!(input, dimensions: nil)
+    unless input.is_a?(String)
+      raise ArgumentError, "Raif::EmbeddingModels::Google#generate_embedding! input must be a string"
+    end
+    response = connection.post("models/#{api_name}:embedContent") do |req|
+      req.body = build_request_parameters(input, dimensions:)
+    end
+    response.body.dig("embedding", "values")
+  end
+private
+  def build_request_parameters(input, dimensions: nil)
+    params = {
+      content: {
+        parts: [{ text: input }]
+      }
+    }
+    params[:outputDimensionality] = dimensions if dimensions.present?
+    params
+  end
+  def connection
+    @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
+      f.headers["x-goog-api-key"] = Raif.config.google_api_key
+      f.request :json
+      f.response :json
+      f.response :raise_error
+    end
+  end
+end

data/app/models/raif/evals/llm_judge.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+# == Schema Information
+#
+# Table name: raif_tasks
+#
+#  id                     :bigint           not null, primary key
+#  available_model_tools  :jsonb            not null
+#  completed_at           :datetime
+#  creator_type           :string
+#  failed_at              :datetime
+#  llm_model_key          :string           not null
+#  prompt                 :text
+#  prompt_studio_run      :boolean          default(FALSE), not null
+#  raw_response           :text
+#  requested_language_key :string
+#  response_format        :integer          default("text"), not null
+#  run_with               :jsonb
+#  source_type            :string
+#  started_at             :datetime
+#  system_prompt          :text
+#  type                   :string           not null
+#  created_at             :datetime         not null
+#  updated_at             :datetime         not null
+#  creator_id             :bigint
+#  source_id              :bigint
+#
+# Indexes
+#
+#  index_raif_tasks_on_completed_at           (completed_at)
+#  index_raif_tasks_on_created_at             (created_at)
+#  index_raif_tasks_on_creator                (creator_type,creator_id)
+#  index_raif_tasks_on_failed_at              (failed_at)
+#  index_raif_tasks_on_source                 (source_type,source_id)
+#  index_raif_tasks_on_started_at             (started_at)
+#  index_raif_tasks_on_type                   (type)
+#  index_raif_tasks_on_type_and_completed_at  (type,completed_at)
+#  index_raif_tasks_on_type_and_failed_at     (type,failed_at)
+#  index_raif_tasks_on_type_and_started_at    (type,started_at)
+#
+module Raif
+  module Evals
+    class LlmJudge < Raif::Task
+      # Set default temperature for consistent judging
+      llm_temperature 0.0
+      # Default to JSON response format for structured output
+      llm_response_format :json
+      run_with :content_to_judge # the content to judge
+      run_with :additional_context # additional context to be provided to the judge
+      def default_llm_model_key
+        Raif.config.evals_default_llm_judge_model_key || super
+      end
+      def judgment_reasoning
+        parsed_response["reasoning"] if completed?
+      end
+      def judgment_confidence
+        parsed_response["confidence"] if completed?
+      end
+      def low_confidence?
+        judgment_confidence && judgment_confidence < 0.5
+      end
+    end
+  end
+end