RubyGems - raif - Versions diffs - 1.2.1 → 1.3.0 - Mend

raif 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

checksums.yaml +4 -4
data/README.md +29 -935
data/app/assets/builds/raif_admin.css +5 -1
data/app/assets/images/raif-logo-white.svg +8 -0
data/app/assets/stylesheets/raif_admin.scss +4 -0
data/app/jobs/raif/conversation_entry_job.rb +1 -1
data/app/models/raif/agents/re_act_step.rb +1 -2
data/app/models/raif/concerns/has_llm.rb +1 -1
data/app/models/raif/concerns/task_run_args.rb +62 -0
data/app/models/raif/conversation.rb +8 -0
data/app/models/raif/conversation_entry.rb +6 -9
data/app/models/raif/llm.rb +1 -1
data/app/models/raif/llms/open_router.rb +47 -4
data/app/models/raif/task.rb +22 -9
data/app/views/layouts/raif/admin.html.erb +3 -1
data/app/views/raif/conversation_entries/_form.html.erb +1 -1
data/app/views/raif/conversations/_full_conversation.html.erb +3 -6
data/app/views/raif/conversations/_initial_chat_message.html.erb +5 -0
data/config/locales/en.yml +8 -0
data/db/migrate/20250804013843_add_task_run_args_to_raif_tasks.rb +13 -0
data/db/migrate/20250811171150_make_raif_task_creator_optional.rb +8 -0
data/exe/raif +7 -0
data/lib/generators/raif/agent/agent_generator.rb +22 -7
data/lib/generators/raif/agent/templates/agent.rb.tt +20 -24
data/lib/generators/raif/agent/templates/agent_eval_set.rb.tt +48 -0
data/lib/generators/raif/agent/templates/application_agent.rb.tt +0 -2
data/lib/generators/raif/base_generator.rb +19 -0
data/lib/generators/raif/conversation/conversation_generator.rb +21 -2
data/lib/generators/raif/conversation/templates/application_conversation.rb.tt +0 -2
data/lib/generators/raif/conversation/templates/conversation.rb.tt +29 -33
data/lib/generators/raif/conversation/templates/conversation_eval_set.rb.tt +70 -0
data/lib/generators/raif/eval_set/eval_set_generator.rb +28 -0
data/lib/generators/raif/eval_set/templates/eval_set.rb.tt +21 -0
data/lib/generators/raif/evals/setup/setup_generator.rb +47 -0
data/lib/generators/raif/install/install_generator.rb +15 -0
data/lib/generators/raif/install/templates/initializer.rb +14 -3
data/lib/generators/raif/model_tool/model_tool_generator.rb +5 -2
data/lib/generators/raif/model_tool/templates/model_tool.rb.tt +78 -76
data/lib/generators/raif/model_tool/templates/model_tool_invocation_partial.html.erb.tt +10 -0
data/lib/generators/raif/task/task_generator.rb +22 -3
data/lib/generators/raif/task/templates/application_task.rb.tt +0 -2
data/lib/generators/raif/task/templates/task.rb.tt +55 -59
data/lib/generators/raif/task/templates/task_eval_set.rb.tt +54 -0
data/lib/raif/cli/base.rb +39 -0
data/lib/raif/cli/evals.rb +47 -0
data/lib/raif/cli/evals_setup.rb +27 -0
data/lib/raif/cli.rb +67 -0
data/lib/raif/configuration.rb +23 -9
data/lib/raif/engine.rb +2 -1
data/lib/raif/evals/eval.rb +30 -0
data/lib/raif/evals/eval_set.rb +111 -0
data/lib/raif/evals/eval_sets/expectations.rb +53 -0
data/lib/raif/evals/eval_sets/llm_judge_expectations.rb +255 -0
data/lib/raif/evals/expectation_result.rb +39 -0
data/lib/raif/evals/llm_judge.rb +32 -0
data/lib/raif/evals/llm_judges/binary.rb +94 -0
data/lib/raif/evals/llm_judges/comparative.rb +89 -0
data/lib/raif/evals/llm_judges/scored.rb +63 -0
data/lib/raif/evals/llm_judges/summarization.rb +166 -0
data/lib/raif/evals/run.rb +201 -0
data/lib/raif/evals/scoring_rubric.rb +174 -0
data/lib/raif/evals.rb +26 -0
data/lib/raif/llm_registry.rb +33 -0
data/lib/raif/migration_checker.rb +3 -3
data/lib/raif/utils/colors.rb +23 -0
data/lib/raif/utils.rb +1 -0
data/lib/raif/version.rb +1 -1
data/lib/raif.rb +4 -0
data/spec/support/current_temperature_test_tool.rb +34 -0
data/spec/support/test_conversation.rb +1 -1
metadata +37 -3

data/lib/generators/raif/model_tool/templates/model_tool.rb.tt CHANGED Viewed

@@ -1,87 +1,89 @@
-# frozen_string_literal: true
+<% raif_module_namespacing(["ModelTools"]) do -%>
+  class <%= class_name.demodulize %> < Raif::ModelTool
+    # For example tool implementations, see:
+    # Wikipedia Search Tool: https://github.com/CultivateLabs/raif/blob/main/app/models/raif/model_tools/wikipedia_search.rb
+    # Fetch URL Tool: https://github.com/CultivateLabs/raif/blob/main/app/models/raif/model_tools/fetch_url.rb
-class Raif::ModelTools::<%= class_name %> < Raif::ModelTool
-  # For example tool implementations, see:
-  # Wikipedia Search Tool: https://github.com/CultivateLabs/raif/blob/main/app/models/raif/model_tools/wikipedia_search.rb
-  # Fetch URL Tool: https://github.com/CultivateLabs/raif/blob/main/app/models/raif/model_tools/fetch_url.rb
-  tool_description do
-    "Description of your tool that will be provided to the LLM so it knows when to invoke it"
-  end
+    tool_description do
+      "Description of your tool that will be provided to the LLM so it knows when to invoke it"
+    end
-  # Define the schema for the arguments that the LLM should use when invoking your tool.
-  # It should be a valid JSON schema. When the model invokes your tool,
-  # the arguments it provides will be validated against this schema using JSON::Validator from the json-schema gem.
-  #
-  # All attributes will be required and additionalProperties will be set to false.
-  tool_arguments_schema do
-    # string :title, description: "The title of the operation", minLength: 3
+    # Define the schema for the arguments that the LLM should use when invoking your tool.
+    # It should be a valid JSON schema. When the model invokes your tool,
+    # the arguments it provides will be validated against this schema using JSON::Validator from the json-schema gem.
     #
-    # object :widget, description: "A widget's description" do
-    #   boolean :is_red, description: "Whether the widget is red"
-    #   integer :rating, description: "A rating of the widget from 1 to 10", minimum: 1, maximum: 10
-    #   array :tags, description: "Associated tags" do
-    #     items type: "string"
-    #   end
-    # end
+    # All attributes will be required and additionalProperties will be set to false.
     #
-    # array :products, description: "List of products" do
-    #   object do
-    #     integer :id, description: "Product identifier"
-    #     string :name, description: "Product name"
-    #     number :price, description: "Product price", minimum: 0
-    #   end
-    # end
-  end
+    # See https://docs.raif.ai/learn_more/json_schemas for more information about defining JSON schemas.
+    tool_arguments_schema do
+      # string :title, description: "The title of the operation", minLength: 3
+      #
+      # object :widget, description: "A widget's description" do
+      #   boolean :is_red, description: "Whether the widget is red"
+      #   integer :rating, description: "A rating of the widget from 1 to 10", minimum: 1, maximum: 10
+      #   array :tags, description: "Associated tags" do
+      #     items type: "string"
+      #   end
+      # end
+      #
+      # array :products, description: "List of products" do
+      #   object do
+      #     integer :id, description: "Product identifier"
+      #     string :name, description: "Product name"
+      #     number :price, description: "Product price", minimum: 0
+      #   end
+      # end
+    end
-  # An example of how the LLM should invoke your tool. This should return a hash with name and arguments keys.
-  # `to_json` will be called on it and provided to the LLM as an example of how to invoke your tool.
-  example_model_invocation do
-    {
-      "name": tool_name,
-      "arguments": { }
-    }
-  end
+    # An example of how the LLM should invoke your tool. This should return a hash with name and arguments keys.
+    # `to_json` will be called on it and provided to the LLM as an example of how to invoke your tool.
+    example_model_invocation do
+      {
+        "name": tool_name,
+        "arguments": {}
+      }
+    end
-  class << self
-    # When your tool is invoked by the LLM in a Raif::Agent loop,
-    # the results of the tool invocation are provided back to the LLM as an observation.
-    # This method should return whatever you want provided to the LLM.
-    # For example, if you were implementing a GoogleSearch tool, this might return a JSON
-    # object containing search results for the query.
-    def observation_for_invocation(tool_invocation)
-      return "No results found" unless tool_invocation.result.present?
+    class << self
+      # When your tool is invoked by the LLM in a Raif::Agent loop,
+      # the results of the tool invocation are provided back to the LLM as an observation.
+      # This method should return whatever you want provided to the LLM.
+      # For example, if you were implementing a GoogleSearch tool, this might return a JSON
+      # object containing search results for the query.
+      def observation_for_invocation(tool_invocation)
+        return "No results found" unless tool_invocation.result.present?
-      JSON.pretty_generate(tool_invocation.result)
-    end
+        JSON.pretty_generate(tool_invocation.result)
+      end
-    # When your tool is invoked in a Raif::Conversation, should the result be automatically provided back to the model?
-    # When true, observation_for_invocation will be used to produce the observation provided to the model
-    def triggers_observation_to_model?
-      false
-    end
+      # When your tool is invoked in a Raif::Conversation, should the result be automatically provided back to the model?
+      # When true, observation_for_invocation will be used to produce the observation provided to the model
+      def triggers_observation_to_model?
+        false
+      end
+      # When the LLM invokes your tool, this method will be called with a `Raif::ModelToolInvocation` record as an argument.
+      # It should handle the actual execution of the tool.
+      # For example, if you are implementing a GoogleSearch tool, this method should run the actual search
+      # and store the results in the tool_invocation's result JSON column.
+      def process_invocation(tool_invocation)
+        # Extract arguments from tool_invocation.tool_arguments
+        # query = tool_invocation.tool_arguments["query"]
+        #
+        # Process the invocation and perform the desired action
+        # ...
+        #
+        # Store the results in the tool_invocation
+        # tool_invocation.update!(
+        #   result: {
+        #     # Your result data structure
+        #   }
+        # )
+        #
+        # Return the result
+        # tool_invocation.result
+      end
-    # When the LLM invokes your tool, this method will be called with a `Raif::ModelToolInvocation` record as an argument.
-    # It should handle the actual execution of the tool.
-    # For example, if you are implementing a GoogleSearch tool, this method should run the actual search
-    # and store the results in the tool_invocation's result JSON column.
-    def process_invocation(tool_invocation)
-      # Extract arguments from tool_invocation.tool_arguments
-      # query = tool_invocation.tool_arguments["query"]
-      #
-      # Process the invocation and perform the desired action
-      # ...
-      #
-      # Store the results in the tool_invocation
-      # tool_invocation.update!(
-      #   result: {
-      #     # Your result data structure
-      #   }
-      # )
-      #
-      # Return the result
-      # tool_invocation.result
     end
   end
-end
+<% end -%>

data/lib/generators/raif/model_tool/templates/model_tool_invocation_partial.html.erb.tt ADDED Viewed

@@ -0,0 +1,10 @@
+<%%#
+This partial is used to render a model tool invocation to the user in the conversation interface.
+If you don't want the tool invocation to be displayed to the user, you can override the `renderable?` method in your model tool class to return false
+%>
+<div class="raif-model-tool-invocation">
+  <h5><%%= <%= file_name %>.tool_type.demodulize.titleize %> Result</h5>
+  <pre><%%= JSON.pretty_generate(<%= file_name %>.result || {}) %></pre>
+  <p>Edit this file in <code><%%= __FILE__ %></code> to customize the display of the tool invocation.</p>
+</div>

data/lib/generators/raif/task/task_generator.rb CHANGED Viewed

@@ -1,8 +1,10 @@
 # frozen_string_literal: true
+require_relative "../base_generator"
 module Raif
   module Generators
-    class TaskGenerator < Rails::Generators::NamedBase
+    class TaskGenerator < BaseGenerator
       source_root File.expand_path("templates", __dir__)
       class_option :response_format,
@@ -10,6 +12,11 @@ module Raif
         default: "text",
         desc: "Response format for the task (text, html, or json)"
+      class_option :skip_eval_set,
+        type: :boolean,
+        default: false,
+        desc: "Skip generating the corresponding eval set"
       def create_application_task
         template "application_task.rb.tt", "app/models/raif/application_task.rb" unless File.exist?("app/models/raif/application_task.rb")
       end
@@ -18,11 +25,23 @@ module Raif
         template "task.rb.tt", File.join("app/models/raif/tasks", class_path, "#{file_name}.rb")
       end
+      def create_eval_set
+        return if options[:skip_eval_set]
+        template "task_eval_set.rb.tt", eval_set_file_path
+      end
+      def show_instructions
+        say "\nTask created!"
+        say ""
+      end
     private
-      def task_class_name
-        class_name
+      def eval_set_file_path
+        File.join("raif_evals", "eval_sets", "tasks", class_path, "#{file_name}_eval_set.rb")
       end
     end
   end
 end

data/lib/generators/raif/task/templates/application_task.rb.tt CHANGED Viewed

@@ -1,5 +1,3 @@
-# frozen_string_literal: true
 module Raif
   class ApplicationTask < Raif::Task
     # Add any shared task behavior here

data/lib/generators/raif/task/templates/task.rb.tt CHANGED Viewed

@@ -1,63 +1,59 @@
-# frozen_string_literal: true
-module Raif
-  module Tasks
-    class <%= task_class_name %> < Raif::ApplicationTask
-      # Set the response format for the task. Options are :html, :text, or :json.
-      llm_response_format :<%= options[:response_format] %>
-      # Set the temperature for the task
-      # llm_temperature 0.7
-      # Optional: Set the allowed tags for the task. Only relevant if response_format is :html.
-      # Defaults to Rails::HTML5::SafeListSanitizer.allowed_tags
-      # llm_response_allowed_tags %w[p b i div strong]
-      # Optional: Set the allowed attributes for the task. Only relevant if response_format is :html.
-      # Defaults to Rails::HTML5::SafeListSanitizer.allowed_attributes
-      # llm_response_allowed_attributes %w[style]
-      # Define any attributes that are needed for the task.
-      # You can then pass them when running the task and they will be available in build_prompt:
-      # Raif::Tasks::<%= task_class_name %>.run(your_attribute: "some value")
-      # attr_accessor :your_attribute
-      <%- if options[:response_format] == "json" -%>
-      # Define a JSON schema that the model's response should adhere to
+<% raif_module_namespacing(["Tasks"]) do -%>
+  class <%= class_name.demodulize %> < Raif::ApplicationTask
+    # Set the response format for the task. Options are :html, :text, or :json.
+    llm_response_format :<%= options[:response_format] %>
+    # Set the temperature for the task
+    # llm_temperature 0.7
+    # Optional: Set the allowed tags for the task. Only relevant if response_format is :html.
+    # Defaults to Rails::HTML5::SafeListSanitizer.allowed_tags
+    # llm_response_allowed_tags %w[p b i div strong]
+    # Optional: Set the allowed attributes for the task. Only relevant if response_format is :html.
+    # Defaults to Rails::HTML5::SafeListSanitizer.allowed_attributes
+    # llm_response_allowed_attributes %w[style]
+    # Define any attributes that are needed for the task.
+    # You can then pass them when running the task and they will be available in build_prompt:
+    # Raif::Tasks::<%= class_name %>.run(your_attribute: "some value")
+    # task_run_arg :your_attribute
+    <%- if options[:response_format] == "json" -%>
+    # Define a JSON schema that the model's response should adhere to
+    #
+    # All attributes will be required and additionalProperties will be set to false.
+    json_response_schema do
+      # string :title, description: "The title of the operation", minLength: 3
       #
-      # All attributes will be required and additionalProperties will be set to false.
-      json_response_schema do
-        # string :title, description: "The title of the operation", minLength: 3
-        #
-        # object :widget, description: "A widget's description" do
-        #   boolean :is_red, description: "Whether the widget is red"
-        #   integer :rating, description: "A rating of the widget from 1 to 10", minimum: 1, maximum: 10
-        #   array :tags, description: "Associated tags" do
-        #     items type: "string"
-        #   end
-        # end
-        #
-        # array :products, description: "List of products" do
-        #   object do
-        #     integer :id, description: "Product identifier"
-        #     string :name, description: "Product name"
-        #     number :price, description: "Product price", minimum: 0
-        #   end
-        # end
-      end
-      <%- end -%>
-      def build_prompt
-        # Implement the LLM prompt for this task.
-        raise NotImplementedError, "Implement #build_prompt in #{self.class.name}"
-      end
-      # Optional: Override build_system_prompt if you need custom system instructions.
-      # The default implementation, which you'll get if you call super, will use Raif.config.task_system_prompt_intro
-      # and append the system_prompt_language_preference if the task's requested_language_key is set.
-      # def build_system_prompt
-      #   super + "\nAdditional system instructions..."
+      # object :widget, description: "A widget's description" do
+      #   boolean :is_red, description: "Whether the widget is red"
+      #   integer :rating, description: "A rating of the widget from 1 to 10", minimum: 1, maximum: 10
+      #   array :tags, description: "Associated tags" do
+      #     items type: "string"
+      #   end
+      # end
+      #
+      # array :products, description: "List of products" do
+      #   object do
+      #     integer :id, description: "Product identifier"
+      #     string :name, description: "Product name"
+      #     number :price, description: "Product price", minimum: 0
+      #   end
       # end
     end
+    <%- end -%>
+    def build_prompt
+      # Implement the LLM prompt for this task.
+      raise NotImplementedError, "Implement #build_prompt in #{self.class.name}"
+    end
+    # Optional: Override build_system_prompt if you need custom system instructions.
+    # The default implementation, which you'll get if you call super, will use Raif.config.task_system_prompt_intro
+    # and append the system_prompt_language_preference if the task's requested_language_key is set.
+    # def build_system_prompt
+    #   super + "\nAdditional system instructions..."
+    # end
   end
-end
+<% end -%>

data/lib/generators/raif/task/templates/task_eval_set.rb.tt ADDED Viewed

@@ -0,0 +1,54 @@
+<% raif_module_namespacing(["Evals", "Tasks"]) do -%>
+  class <%= class_name.demodulize %>EvalSet < Raif::Evals::EvalSet
+    # Run this eval set with:
+    # bundle exec raif evals ./<%= eval_set_file_path %>
+    # Setup method runs before each eval
+    setup do
+      # Common setup code
+    end
+    # Teardown runs after each eval
+    teardown do
+      # Cleanup code
+    end
+    eval "<%= class_name %> produces expected output" do
+      # task = Raif::Tasks::<%= class_name %>.run(
+      #   Add your task parameters here that produce the expected output
+      # )
+      # The return value of the block determines if the expectation passes or fails
+      # expect "task completes successfully" do
+      #   task.completed?
+      # end
+      # expect "contains the word 'hello' in the output" do
+      #   task.parsed_response.include?("hello")
+      # end
+      # Add more specific expectations based on your task's behavior
+    end
+    eval "properly handles refusals" do
+      # task = Raif::Tasks::<%= class_name %>.run(
+      #   Add your task parameters here to trigger a refusal
+      # )
+      # expect "returns exactly the text 'I'm sorry, I can't do that.'" do
+      #   task.parsed_response == "I'm sorry, I can't do that."
+      # end
+    end
+    eval "<%= class_name %> uses appropriate LLM tools" do
+      # Test that the task uses the expected tools if applicable
+      # task = Raif::Tasks::<%= class_name %>.run(
+      #   Add parameters that trigger the use of the expected tools
+      # )
+      # Example tool invocation expectations (if your task uses tools):
+      # expect_tool_invocation(task, "tool_name", with: { param: "value" })
+      # expect_no_tool_invocation(task, "unwanted_tool")
+    end
+  end
+<% end -%>

data/lib/raif/cli/base.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+module Raif
+  module CLI
+    class Base
+      attr_reader :args, :options
+      def initialize(args = [])
+        @args = args
+        @options = {}
+      end
+    protected
+      def find_rails_root
+        current = Dir.pwd
+        until File.exist?(File.join(current, "config", "environment.rb"))
+          parent = File.dirname(current)
+          if parent == current
+            puts "Error: Could not find Rails application root"
+            puts "Please run this command from within a Rails application directory"
+            exit 1
+          end
+          current = parent
+        end
+        current
+      end
+      def load_rails_application
+        rails_root = find_rails_root
+        Dir.chdir(rails_root)
+        require File.join(rails_root, "config", "environment")
+      end
+    end
+  end
+end

data/lib/raif/cli/evals.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+require "optparse"
+require_relative "base"
+module Raif
+  module CLI
+    class Evals < Base
+      def run
+        # Set test environment by default for evals
+        ENV["RAILS_ENV"] ||= "test"
+        ENV["RAIF_RUNNING_EVALS"] = "true"
+        OptionParser.new do |opts|
+          opts.banner = "Usage: raif evals [options] [FILE_PATHS]"
+          opts.on("-e", "--environment ENV", "Rails environment (default: test)") do |env|
+            ENV["RAILS_ENV"] = env
+          end
+          opts.on("-h", "--help", "Show this help message") do
+            puts opts
+            exit
+          end
+        end.parse!(args)
+        # Parse file paths with optional line numbers
+        file_paths = args.map do |arg|
+          if arg.include?(":")
+            file_path, line_number = arg.split(":", 2)
+            { file_path: file_path, line_number: line_number.to_i }
+          else
+            { file_path: arg, line_number: nil }
+          end
+        end if args.any?
+        # Find and load Rails application
+        load_rails_application
+        require "raif/evals"
+        run = Raif::Evals::Run.new(file_paths: file_paths)
+        run.execute
+      end
+    end
+  end
+end

data/lib/raif/cli/evals_setup.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+require "optparse"
+require_relative "base"
+module Raif
+  module CLI
+    class EvalsSetup < Base
+      def run
+        OptionParser.new do |opts|
+          opts.banner = "Usage: raif evals:setup [options]"
+          opts.on("-h", "--help", "Show this help message") do
+            puts opts
+            exit
+          end
+        end.parse!(args)
+        # Load Rails application to use generators
+        load_rails_application
+        # Invoke the Rails generator
+        require "rails/generators"
+        Rails::Generators.invoke("raif:evals:setup", args)
+      end
+    end
+  end
+end

data/lib/raif/cli.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+require_relative "cli/base"
+require_relative "cli/evals"
+require_relative "cli/evals_setup"
+module Raif
+  module CLI
+    COMMANDS = {
+      "evals" => "Run Raif evaluation sets",
+      "evals:setup" => "Setup Raif evals directory structure",
+      "version" => "Show Raif version",
+      "help" => "Show this help message"
+    }.freeze
+    class Runner
+      def initialize(args)
+        @args = args
+        @command = args.shift
+      end
+      def run
+        case @command
+        when "evals"
+          Evals.new(@args).run
+        when "evals:setup"
+          EvalsSetup.new(@args).run
+        when "version", "--version", "-v"
+          show_version
+        when "help", "--help", "-h", nil
+          show_help
+        else
+          puts "Unknown command: #{@command}"
+          puts ""
+          show_help
+          exit 1
+        end
+      end
+    private
+      def show_version
+        require_relative "../raif/version"
+        puts "Raif #{Raif::VERSION}"
+      end
+      def show_help
+        puts "Usage: raif COMMAND [options]"
+        puts ""
+        puts "Commands:"
+        COMMANDS.each do |command, description|
+          puts format("  %-12s %s", command, description)
+        end
+        puts ""
+        puts "For help on a specific command:"
+        puts "  raif COMMAND --help"
+        puts ""
+        puts "Examples:"
+        puts "  raif evals:setup                  # Setup eval directory structure"
+        puts "  raif evals                        # Run all eval sets in test environment"
+        puts "  raif evals CustomerSupportEvalSet # Run specific eval set"
+        puts "  raif evals -e development         # Run evals in development environment"
+        puts "  raif version                      # Show Raif version"
+      end
+    end
+  end
+end