RubyGems - raif - Versions diffs - 1.2.2 → 1.3.0 - Mend

raif 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +4 -4
data/app/jobs/raif/conversation_entry_job.rb +1 -1
data/app/models/raif/agents/re_act_step.rb +1 -2
data/app/models/raif/concerns/has_llm.rb +1 -1
data/app/models/raif/concerns/task_run_args.rb +62 -0
data/app/models/raif/conversation.rb +5 -1
data/app/models/raif/conversation_entry.rb +6 -8
data/app/models/raif/llm.rb +1 -1
data/app/models/raif/llms/open_router.rb +3 -1
data/app/models/raif/task.rb +22 -9
data/app/views/raif/conversation_entries/_form.html.erb +1 -1
data/app/views/raif/conversations/_full_conversation.html.erb +3 -6
data/app/views/raif/conversations/_initial_chat_message.html.erb +5 -0
data/config/locales/en.yml +8 -0
data/db/migrate/20250804013843_add_task_run_args_to_raif_tasks.rb +13 -0
data/db/migrate/20250811171150_make_raif_task_creator_optional.rb +8 -0
data/exe/raif +7 -0
data/lib/generators/raif/agent/agent_generator.rb +22 -7
data/lib/generators/raif/agent/templates/agent.rb.tt +20 -24
data/lib/generators/raif/agent/templates/agent_eval_set.rb.tt +48 -0
data/lib/generators/raif/agent/templates/application_agent.rb.tt +0 -2
data/lib/generators/raif/base_generator.rb +19 -0
data/lib/generators/raif/conversation/conversation_generator.rb +21 -2
data/lib/generators/raif/conversation/templates/application_conversation.rb.tt +0 -2
data/lib/generators/raif/conversation/templates/conversation.rb.tt +29 -33
data/lib/generators/raif/conversation/templates/conversation_eval_set.rb.tt +70 -0
data/lib/generators/raif/eval_set/eval_set_generator.rb +28 -0
data/lib/generators/raif/eval_set/templates/eval_set.rb.tt +21 -0
data/lib/generators/raif/evals/setup/setup_generator.rb +47 -0
data/lib/generators/raif/install/install_generator.rb +15 -0
data/lib/generators/raif/install/templates/initializer.rb +11 -0
data/lib/generators/raif/model_tool/model_tool_generator.rb +5 -5
data/lib/generators/raif/model_tool/templates/model_tool.rb.tt +78 -78
data/lib/generators/raif/model_tool/templates/model_tool_invocation_partial.html.erb.tt +1 -1
data/lib/generators/raif/task/task_generator.rb +22 -3
data/lib/generators/raif/task/templates/application_task.rb.tt +0 -2
data/lib/generators/raif/task/templates/task.rb.tt +55 -59
data/lib/generators/raif/task/templates/task_eval_set.rb.tt +54 -0
data/lib/raif/cli/base.rb +39 -0
data/lib/raif/cli/evals.rb +47 -0
data/lib/raif/cli/evals_setup.rb +27 -0
data/lib/raif/cli.rb +67 -0
data/lib/raif/configuration.rb +20 -6
data/lib/raif/evals/eval.rb +30 -0
data/lib/raif/evals/eval_set.rb +111 -0
data/lib/raif/evals/eval_sets/expectations.rb +53 -0
data/lib/raif/evals/eval_sets/llm_judge_expectations.rb +255 -0
data/lib/raif/evals/expectation_result.rb +39 -0
data/lib/raif/evals/llm_judge.rb +32 -0
data/lib/raif/evals/llm_judges/binary.rb +94 -0
data/lib/raif/evals/llm_judges/comparative.rb +89 -0
data/lib/raif/evals/llm_judges/scored.rb +63 -0
data/lib/raif/evals/llm_judges/summarization.rb +166 -0
data/lib/raif/evals/run.rb +201 -0
data/lib/raif/evals/scoring_rubric.rb +174 -0
data/lib/raif/evals.rb +26 -0
data/lib/raif/llm_registry.rb +33 -0
data/lib/raif/migration_checker.rb +3 -3
data/lib/raif/utils/colors.rb +23 -0
data/lib/raif/utils.rb +1 -0
data/lib/raif/version.rb +1 -1
data/lib/raif.rb +4 -0
data/spec/support/current_temperature_test_tool.rb +34 -0
data/spec/support/test_conversation.rb +1 -1
metadata +35 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0e3f7403ecd4de813aef00da87918f1e7335df85f2e89593ac61f89fb3f6eb6b
-  data.tar.gz: 2f8247a1a4d249157bc85afa14df34127631a8821adf790ed88db9c4c58b38e6
+  metadata.gz: '074678c5fc61a6b08ddae200f82baaa948b0c26b0178a1c868c6d3e9d1ed6e0e'
+  data.tar.gz: bea8da28b1245ccaeb52a5f81fdc52f4932016d774b4ccdde20c3af61871c5c3
 SHA512:
-  metadata.gz: d7c573743a9aa6011994de4ddbf705cbed47f4c4d0fc269428315777a2c22e87fabf4f146cfaac5810a58dc4ab8a7efdfd8b5bb4ec69d1d617a2d5a5937c355d
-  data.tar.gz: d1a0f35875fd4dacf565f44b330a066e47fad000aa53d70804785a60cd2e3ace6c3c05d051094dfb3b951f2550be349246f68e299287cc2bb5ea07b700f38034
+  metadata.gz: c7420da7d6676db141ffae3ddc43787631db1190bf0b617f7c284312c85832101fffca6fe0600d35b893ccb8586474d7c22d31a549605a8c51e09dadc132aed2
+  data.tar.gz: 8cd9681ec60a6d20b53aebedef7512051bf434f625354a3e05ec340419321765a2e9d18c381e4fa947565b99c38fc6ac108e037c6622e6d96b2f06f1f7bc3f35

data/app/jobs/raif/conversation_entry_job.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module Raif
       Turbo::StreamsChannel.broadcast_action_to(
         conversation,
         action: :raif_scroll_to_bottom,
-        target: dom_id(conversation, :entries)
+        target: ActionView::RecordIdentifier.dom_id(conversation, :entries)
       )
     rescue StandardError => e
       logger.error "Error processing conversation entry: #{e.message}"

data/app/models/raif/agents/re_act_step.rb CHANGED Viewed

@@ -25,9 +25,8 @@ module Raif
       def extract_tag_content(tag_name)
         match = model_response_text.match(%r{<#{tag_name}>(.*?)</#{tag_name}>}m)
-        match ? match[1].strip : nil
+        match && match[1] ? match[1].strip : nil
       end
     end
   end
 end

data/app/models/raif/concerns/has_llm.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module Raif::Concerns::HasLlm
   end
   def default_llm_model_key
-    Rails.env.test? ? :raif_test_llm : Raif.config.default_llm_model_key
+    Raif.config.default_llm_model_key
   end
   def llm

data/app/models/raif/concerns/task_run_args.rb ADDED Viewed

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+module Raif::Concerns::TaskRunArgs
+  extend ActiveSupport::Concern
+  included do
+    class_attribute :_task_run_args, instance_writer: false, default: []
+  end
+  class_methods do
+    # DSL for declaring persistent task arguments that will be serialized to the database
+    # @param name [Symbol] The name of the argument
+    def task_run_arg(name)
+      # Ensure each class has its own array copy
+      self._task_run_args = _task_run_args.dup
+      _task_run_args << name.to_sym
+      # Define getter that pulls from task_run_args JSON
+      define_method(name) do
+        return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
+        value = task_run_args&.dig(name.to_s)
+        return unless value
+        # Deserialize GID if it's a string starting with gid://
+        deserialized = if value.is_a?(String) && value.start_with?("gid://")
+          begin
+            GlobalID::Locator.locate(value)
+          rescue ActiveRecord::RecordNotFound
+            nil
+          end
+        else
+          value
+        end
+        instance_variable_set("@#{name}", deserialized)
+      end
+      # Define setter that stores in memory (for use during run)
+      define_method("#{name}=") do |value|
+        instance_variable_set("@#{name}", value)
+      end
+    end
+    # Transform run args into a hash that can be stored in the task_run_args database column
+    def serialize_task_run_args(args)
+      serialized_args = {}
+      _task_run_args.each do |arg_name|
+        next unless args.key?(arg_name)
+        value = args[arg_name]
+        serialized_args[arg_name.to_s] = if value.respond_to?(:to_global_id)
+          value.to_global_id.to_s
+        else
+          value
+        end
+      end
+      serialized_args
+    end
+  end
+end

data/app/models/raif/conversation.rb CHANGED Viewed

@@ -34,6 +34,10 @@ class Raif::Conversation < Raif::ApplicationRecord
     I18n.t("#{self.class.name.underscore.gsub("/", ".")}.initial_chat_message")
   end
+  def initial_chat_message_partial_path
+    "raif/conversations/initial_chat_message"
+  end
   def prompt_model_for_entry_response(entry:, &block)
     update(system_prompt: build_system_prompt)
@@ -59,7 +63,7 @@ class Raif::Conversation < Raif::ApplicationRecord
       Airbrake.notify(notice)
     end
-    entry
+    nil
   end
   def process_model_response_message(message:, entry:)

data/app/models/raif/conversation_entry.rb CHANGED Viewed

@@ -63,7 +63,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
       broadcast_replace_to raif_conversation
     end
-    if raif_model_completion.parsed_response.present? || raif_model_completion.response_tool_calls.present?
+    if raif_model_completion.present? && (raif_model_completion.parsed_response.present? || raif_model_completion.response_tool_calls.present?)
       extract_message_and_invoke_tools!
       create_entry_for_observation! if triggers_observation_to_model?
     else
@@ -83,7 +83,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
   def create_entry_for_observation!
     follow_up_entry = raif_conversation.entries.create!(creator: creator)
     Raif::ConversationEntryJob.perform_later(conversation_entry: follow_up_entry)
-    follow_up_entry.broadcast_append_to raif_conversation, target: dom_id(raif_conversation, :entries)
+    follow_up_entry.broadcast_append_to raif_conversation, target: ActionView::RecordIdentifier.dom_id(raif_conversation, :entries)
   end
 private
@@ -94,13 +94,11 @@ private
       self.model_response_message = raif_conversation.process_model_response_message(message: raif_model_completion.parsed_response, entry: self)
       save!
-      if raif_model_completion.response_tool_calls.present?
-        raif_model_completion.response_tool_calls.each do |tool_call|
-          tool_klass = available_model_tools_map[tool_call["name"]]
-          next if tool_klass.nil?
+      raif_model_completion.response_tool_calls&.each do |tool_call|
+        tool_klass = available_model_tools_map[tool_call["name"]]
+        next if tool_klass.nil?
-          tool_klass.invoke_tool(tool_arguments: tool_call["arguments"], source: self)
-        end
+        tool_klass.invoke_tool(tool_arguments: tool_call["arguments"], source: self)
       end
       completed!

data/app/models/raif/llm.rb CHANGED Viewed

@@ -77,7 +77,7 @@ module Raif
       temperature ||= default_temperature
       max_completion_tokens ||= default_max_completion_tokens
-      model_completion = Raif::ModelCompletion.new(
+      model_completion = Raif::ModelCompletion.create!(
         messages: format_messages(messages),
         system_prompt: system_prompt,
         response_format: response_format,

data/app/models/raif/llms/open_router.rb CHANGED Viewed

@@ -93,7 +93,9 @@ private
       params[:stream_options] = { include_usage: true }
     end
-    if model_completion.response_format_json?
+    # OpenRouter will sometimes complain about combining response_format json and tool calling.
+    # If we're telling it to use the json_response tool, then the json_object response_format should be irrelevant.
+    if model_completion.response_format_json? && params[:tools].blank?
       params[:response_format] = { type: "json_object" }
       model_completion.response_format_parameter = "json_object"
     end

data/app/models/raif/task.rb CHANGED Viewed

@@ -9,10 +9,13 @@ module Raif
     include Raif::Concerns::LlmResponseParsing
     include Raif::Concerns::LlmTemperature
     include Raif::Concerns::JsonSchemaDefinition
+    include Raif::Concerns::TaskRunArgs
     llm_temperature 0.7
-    belongs_to :creator, polymorphic: true
+    belongs_to :creator, polymorphic: true, optional: true
+    validates :creator, presence: true, unless: -> { Raif.config.task_creator_optional }
     has_one :raif_model_completion, as: :source, dependent: :destroy, class_name: "Raif::ModelCompletion"
@@ -32,6 +35,7 @@ module Raif
     attr_accessor :files, :images
     after_initialize -> { self.available_model_tools ||= [] }
+    after_initialize -> { self.task_run_args ||= {} }
     def status
       if completed_at?
@@ -48,15 +52,24 @@ module Raif
     # The primary interface for running a task. It will hit the LLM with the task's prompt and system prompt and return a Raif::Task object.
     # It will also create a new Raif::ModelCompletion record.
     #
-    # @param creator [Object] The creator of the task (polymorphic association)
+    # @param creator [Object, nil] The creator of the task (polymorphic association), optional
     # @param available_model_tools [Array<Class>] Optional array of model tool classes that will be provided to the LLM for it to invoke.
     # @param llm_model_key [Symbol, String] Optional key for the LLM model to use. If blank, Raif.config.default_llm_model_key will be used.
     # @param images [Array] Optional array of Raif::ModelImageInput objects to include with the prompt.
     # @param files [Array] Optional array of Raif::ModelFileInput objects to include with the prompt.
     # @param args [Hash] Additional arguments to pass to the instance of the task that is created.
     # @return [Raif::Task, nil] The task instance that was created and run.
-    def self.run(creator:, available_model_tools: [], llm_model_key: nil, images: [], files: [], **args)
-      task = new(creator:, llm_model_key:, available_model_tools:, started_at: Time.current, images: images, files: files, **args)
+    def self.run(creator: nil, available_model_tools: [], llm_model_key: nil, images: [], files: [], **args)
+      task = new(
+        creator: creator,
+        llm_model_key: llm_model_key,
+        available_model_tools: available_model_tools,
+        started_at: Time.current,
+        images: images,
+        files: files,
+        task_run_args: serialize_task_run_args(args),
+        **args
+      )
       task.save!
       task.run
@@ -109,19 +122,19 @@ module Raif
     # Returns the LLM prompt for the task.
     #
-    # @param creator [Object] The creator of the task (polymorphic association)
+    # @param creator [Object, nil] The creator of the task (polymorphic association), optional
     # @param args [Hash] Additional arguments to pass to the instance of the task that is created.
     # @return [String] The LLM prompt for the task.
-    def self.prompt(creator:, **args)
+    def self.prompt(creator: nil, **args)
       new(creator:, **args).build_prompt
     end
     # Returns the LLM system prompt for the task.
     #
-    # @param creator [Object] The creator of the task (polymorphic association)
+    # @param creator [Object, nil] The creator of the task (polymorphic association), optional
     # @param args [Hash] Additional arguments to pass to the instance of the task that is created.
     # @return [String] The LLM system prompt for the task.
-    def self.system_prompt(creator:, **args)
+    def self.system_prompt(creator: nil, **args)
       new(creator:, **args).build_system_prompt
     end
@@ -170,7 +183,7 @@ module Raif
     end
     def populate_prompts
-      self.requested_language_key ||= creator.preferred_language_key if creator.respond_to?(:preferred_language_key)
+      self.requested_language_key ||= creator&.preferred_language_key if creator&.respond_to?(:preferred_language_key)
       self.prompt = build_prompt
       self.system_prompt = build_system_prompt
     end

data/app/views/raif/conversation_entries/_form.html.erb CHANGED Viewed

@@ -10,7 +10,7 @@
     <% end %>
   <% end %>
-  <div class="d-flex px-2">
+  <div class="d-flex px-2 align-items-center">
     <%= f.text_field :user_message,
           class: "form-control me-2",
           placeholder: conversation_entry.raif_user_tool_invocation&.message_input_placeholder.presence || t("raif.common.type_your_message"),

data/app/views/raif/conversations/_full_conversation.html.erb CHANGED Viewed

@@ -1,14 +1,11 @@
 <%= turbo_stream_from conversation %>
-<div id="<%= dom_id(conversation, :entries) %>" class="flex-grow-1 overflow-auto" data-controller="raif--conversations">
-  <%= render "raif/conversation_entries/message",
-        content: conversation.initial_chat_message,
-        message_type: :model_response %>
+<div id="<%= dom_id(conversation, :entries) %>" class="flex-grow-1 overflow-auto raif-conversation-entries-container" data-controller="raif--conversations">
+  <%= render conversation.initial_chat_message_partial_path, conversation: conversation %>
   <%= render conversation.entries.oldest_first %>
 </div>
-<div id="<%= dom_id(conversation, :entry_input) %>">
+<div id="<%= dom_id(conversation, :entry_input) %>" class="raif-conversation-entry-input-container">
   <%= render "raif/conversation_entries/form_with_available_tools",
         conversation: conversation,
         conversation_entry: Raif::ConversationEntry.new %>

data/app/views/raif/conversations/_initial_chat_message.html.erb ADDED Viewed

@@ -0,0 +1,5 @@
+<%# locals: (conversation:) %>
+<%= render "raif/conversation_entries/message",
+      content: conversation.initial_chat_message,
+      message_type: :model_response %>

data/config/locales/en.yml CHANGED Viewed

@@ -68,6 +68,9 @@ en:
       open_ai_gpt_4_1_nano: OpenAI GPT-4.1 Nano
       open_ai_gpt_4o: OpenAI GPT-4o
       open_ai_gpt_4o_mini: OpenAI GPT-4o Mini
+      open_ai_gpt_5: OpenAI GPT-5
+      open_ai_gpt_5_mini: OpenAI GPT-5 Mini
+      open_ai_gpt_5_nano: OpenAI GPT-5 Nano
       open_ai_o1: OpenAI o1
       open_ai_o1_mini: OpenAI o1 Mini
       open_ai_o3: OpenAI o3
@@ -79,6 +82,9 @@ en:
       open_ai_responses_gpt_4_1_nano: OpenAI GPT-4.1 Nano (Responses API)
       open_ai_responses_gpt_4o: OpenAI GPT-4o (Responses API)
       open_ai_responses_gpt_4o_mini: OpenAI GPT-4o Mini (Responses API)
+      open_ai_responses_gpt_5: OpenAI GPT-5 (Responses API)
+      open_ai_responses_gpt_5_mini: OpenAI GPT-5 Mini (Responses API)
+      open_ai_responses_gpt_5_nano: OpenAI GPT-5 Nano (Responses API)
       open_ai_responses_o1: OpenAI o1 (Responses API)
       open_ai_responses_o1_mini: OpenAI o1 Mini (Responses API)
       open_ai_responses_o1_pro: OpenAI o1 Pro (Responses API)
@@ -93,4 +99,6 @@ en:
       open_router_llama_3_3_70b_instruct: Meta Llama 3.3 70B Instruct (via OpenRouter)
       open_router_llama_4_maverick: Meta Llama 4 Maverick (via OpenRouter)
       open_router_llama_4_scout: Meta Llama 4 Scout (via OpenRouter)
+      open_router_open_ai_gpt_oss_120b: OpenAI GPT-OSS 120B (via OpenRouter)
+      open_router_open_ai_gpt_oss_20b: OpenAI GPT-OSS 20B (via OpenRouter)
       raif_test_llm: Raif Test LLM

data/db/migrate/20250804013843_add_task_run_args_to_raif_tasks.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+class AddTaskRunArgsToRaifTasks < ActiveRecord::Migration[7.1]
+  def change
+    json_column_type = if connection.adapter_name.downcase.include?("postgresql")
+      :jsonb
+    else
+      :json
+    end
+    add_column :raif_tasks, :task_run_args, json_column_type
+  end
+end

data/db/migrate/20250811171150_make_raif_task_creator_optional.rb ADDED Viewed

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+class MakeRaifTaskCreatorOptional < ActiveRecord::Migration[7.1]
+  def change
+    change_column_null :raif_tasks, :creator_id, true
+    change_column_null :raif_tasks, :creator_type, true
+  end
+end

data/exe/raif ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+require_relative "../lib/raif/cli"
+# Run the CLI
+Raif::CLI::Runner.new(ARGV).run

data/lib/generators/raif/agent/agent_generator.rb CHANGED Viewed

@@ -1,32 +1,47 @@
 # frozen_string_literal: true
+require_relative "../base_generator"
 module Raif
   module Generators
-    class AgentGenerator < Rails::Generators::NamedBase
+    class AgentGenerator < BaseGenerator
       source_root File.expand_path("templates", __dir__)
       desc "Creates a new Raif::Agent subclass in app/models/raif/agents"
+      class_option :skip_eval_set,
+        type: :boolean,
+        default: false,
+        desc: "Skip generating the corresponding eval set"
       def create_application_agent
         template "application_agent.rb.tt", "app/models/raif/application_agent.rb" unless File.exist?("app/models/raif/application_agent.rb")
       end
       def create_agent
-        template "agent.rb.tt", "app/models/raif/agents/#{file_name}.rb"
+        template "agent.rb.tt", File.join("app/models/raif/agents", class_path, "#{file_name}.rb")
       end
       def create_directory
         empty_directory "app/models/raif/agents" unless File.directory?("app/models/raif/agents")
       end
-    private
+      def create_eval_set
+        return if options[:skip_eval_set]
+        template "agent_eval_set.rb.tt", eval_set_file_path
+      end
-      def class_name
-        name.classify
+      def show_instructions
+        say "\nAgent created!"
+        say ""
       end
-      def file_name
-        name.underscore
+    private
+      def eval_set_file_path
+        File.join("raif_evals", "eval_sets", "agents", class_path, "#{file_name}_eval_set.rb")
       end
     end
   end
 end

data/lib/generators/raif/agent/templates/agent.rb.tt CHANGED Viewed

@@ -1,28 +1,24 @@
-# frozen_string_literal: true
+<% raif_module_namespacing(["Agents"]) do -%>
+  class <%= class_name.demodulize %> < Raif::ApplicationAgent
+    # If you want to always include a certain set of model tools with this agent type,
+    # uncomment this callback to populate the available_model_tools attribute with your desired model tools.
+    # def populate_default_model_tools
+    #   self.available_model_tools = [
+    #     Raif::ModelTools::WikipediaSearch,
+    #     Raif::ModelTools::FetchUrl
+    #   ]
+    # end
-module Raif
-  module Agents
-    class <%= class_name %> < Raif::ApplicationAgent
-      # If you want to always include a certain set of model tools with this agent type,
-      # uncomment this callback to populate the available_model_tools attribute with your desired model tools.
-      # def populate_default_model_tools
-      #   self.available_model_tools ||= [
-      #     Raif::ModelTools::WikipediaSearchTool,
-      #     Raif::ModelTools::FetchUrlTool
-      #   ]
-      # end
-      # Enter your agent's system prompt here. Alternatively, you can change your agent's superclass
-      # to an existing agent types (like Raif::Agents::ReActAgent) to utilize an existing system prompt.
-      def build_system_prompt
-        # TODO: Implement your system prompt here
-      end
+    # Enter your agent's system prompt here. Alternatively, you can change your agent's superclass
+    # to an existing agent types (like Raif::Agents::ReActAgent) to utilize an existing system prompt.
+    def build_system_prompt
+      # TODO: Implement your system prompt here
+    end
-      # Each iteration of the agent loop will generate a new Raif::ModelCompletion record and
-      # then call this method with it as an argument.
-      def process_iteration_model_completion(model_completion)
-        # TODO: Implement your iteration processing here
-      end
+    # Each iteration of the agent loop will generate a new Raif::ModelCompletion record and
+    # then call this method with it as an argument.
+    def process_iteration_model_completion(model_completion)
+      # TODO: Implement your iteration processing here
     end
   end
-end
+<% end -%>

data/lib/generators/raif/agent/templates/agent_eval_set.rb.tt ADDED Viewed

@@ -0,0 +1,48 @@
+<% raif_module_namespacing(["Evals", "Agents"]) do -%>
+  class <%= class_name.demodulize %>EvalSet < Raif::Evals::EvalSet
+    # Run this eval set with:
+    # bundle exec raif evals ./<%= eval_set_file_path %>
+    # Setup method runs before each eval
+    setup do
+      # Common setup code
+      # @user = User.create!(email: "test@example.com")
+    end
+    # Teardown runs after each eval
+    teardown do
+      # Cleanup code
+    end
+    eval "<%= class_name %> completes task successfully" do
+      # agent = Raif::Agents::<%= class_name %>.create!(
+      #   creator: @user,
+      #   task: "Your specific task here",
+      #   available_model_tools: []  # Add your tools here if needed
+      # )
+      # agent.run!
+      # expect "agent completes successfully" do
+      #   agent.completed?
+      # end
+      # expect "produces expected output" do
+      #   agent.final_answer.include?("expected content")
+      # end
+    end
+    eval "<%= class_name %> uses tools correctly" do
+      # agent = Raif::Agents::<%= class_name %>.create!(
+      #   creator: @user,
+      #   task: "A task that requires tool usage",
+      #   available_model_tools: ["expected_tool_name"]
+      # )
+      # agent.run!
+      # expect_tool_invocation(agent, "expected_tool_name")
+    end
+  end
+<% end -%>

data/lib/generators/raif/agent/templates/application_agent.rb.tt CHANGED Viewed

@@ -1,5 +1,3 @@
-# frozen_string_literal: true
 module Raif
   class ApplicationAgent < Raif::Agent
     # Add any shared agent behavior here

data/lib/generators/raif/base_generator.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+module Raif
+  class BaseGenerator < Rails::Generators::NamedBase
+  private
+    def raif_module_namespacing(intermediate_modules = [], &block)
+      content = capture(&block).rstrip
+      modules_names = intermediate_modules + class_path.map(&:camelize)
+      modules_names.reverse.each do |module_name|
+        content = indent "module #{module_name}\n#{content}\nend", 2
+      end
+      concat("module Raif\n#{content}\nend\n")
+    end
+  end
+end

data/lib/generators/raif/conversation/conversation_generator.rb CHANGED Viewed

@@ -1,8 +1,10 @@
 # frozen_string_literal: true
+require_relative "../base_generator"
 module Raif
   module Generators
-    class ConversationGenerator < Rails::Generators::NamedBase
+    class ConversationGenerator < BaseGenerator
       source_root File.expand_path("templates", __dir__)
       desc "Creates a new conversation type in the app/models/raif/conversations directory"
@@ -12,19 +14,30 @@ module Raif
         default: "text",
         desc: "Response format for the task (text, html, or json)"
+      class_option :skip_eval_set,
+        type: :boolean,
+        default: false,
+        desc: "Skip generating the corresponding eval set"
       def create_application_conversation
         template "application_conversation.rb.tt",
           "app/models/raif/application_conversation.rb" unless File.exist?("app/models/raif/application_conversation.rb")
       end
       def create_conversation_file
-        template "conversation.rb.tt", File.join("app/models/raif/conversations", "#{file_name}.rb")
+        template "conversation.rb.tt", File.join("app/models/raif/conversations", class_path, "#{file_name}.rb")
       end
       def create_directory
         empty_directory "app/models/raif/conversations" unless File.directory?("app/models/raif/conversations")
       end
+      def create_eval_set
+        return if options[:skip_eval_set]
+        template "conversation_eval_set.rb.tt", eval_set_file_path
+      end
       def success_message
         say_status :success, "Conversation type created successfully", :green
         say "\nYou can now implement your conversation type in:"
@@ -32,6 +45,12 @@ module Raif
         say "\nDon't forget to add it to the config.conversation_types in your Raif configuration"
         say "For example: config.conversation_types += ['Raif::Conversations::#{class_name}']\n\n"
       end
+    private
+      def eval_set_file_path
+        File.join("raif_evals", "eval_sets", "conversations", class_path, "#{file_name}_eval_set.rb")
+      end
     end
   end
 end

data/lib/generators/raif/conversation/templates/application_conversation.rb.tt CHANGED Viewed

@@ -1,5 +1,3 @@
-# frozen_string_literal: true
 module Raif
   class ApplicationConversation < Raif::Conversation
     # Add any shared conversation behavior here