raif 1.2.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +6 -5
  3. data/app/assets/builds/raif.css +4 -1
  4. data/app/assets/builds/raif_admin.css +13 -1
  5. data/app/assets/javascript/raif/controllers/conversations_controller.js +1 -1
  6. data/app/assets/stylesheets/raif/admin/conversation.scss +16 -0
  7. data/app/assets/stylesheets/raif/conversations.scss +3 -0
  8. data/app/assets/stylesheets/raif.scss +2 -1
  9. data/app/controllers/raif/admin/application_controller.rb +16 -0
  10. data/app/controllers/raif/admin/configs_controller.rb +94 -0
  11. data/app/controllers/raif/admin/model_completions_controller.rb +18 -1
  12. data/app/controllers/raif/admin/model_tool_invocations_controller.rb +7 -1
  13. data/app/controllers/raif/admin/stats/model_tool_invocations_controller.rb +21 -0
  14. data/app/controllers/raif/admin/stats/tasks_controller.rb +15 -6
  15. data/app/controllers/raif/admin/stats_controller.rb +32 -3
  16. data/app/controllers/raif/conversation_entries_controller.rb +1 -0
  17. data/app/controllers/raif/conversations_controller.rb +10 -2
  18. data/app/jobs/raif/conversation_entry_job.rb +8 -6
  19. data/app/models/raif/admin/task_stat.rb +7 -0
  20. data/app/models/raif/agent.rb +63 -2
  21. data/app/models/raif/agents/native_tool_calling_agent.rb +101 -56
  22. data/app/models/raif/application_record.rb +18 -0
  23. data/app/models/raif/concerns/agent_inference_stats.rb +35 -0
  24. data/app/models/raif/concerns/has_llm.rb +1 -1
  25. data/app/models/raif/concerns/json_schema_definition.rb +40 -5
  26. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +28 -0
  27. data/app/models/raif/concerns/llms/anthropic/response_tool_calls.rb +24 -0
  28. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +4 -0
  29. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +36 -0
  30. data/app/models/raif/concerns/llms/bedrock/response_tool_calls.rb +26 -0
  31. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
  32. data/app/models/raif/concerns/llms/google/message_formatting.rb +109 -0
  33. data/app/models/raif/concerns/llms/google/response_tool_calls.rb +32 -0
  34. data/app/models/raif/concerns/llms/google/tool_formatting.rb +72 -0
  35. data/app/models/raif/concerns/llms/message_formatting.rb +11 -5
  36. data/app/models/raif/concerns/llms/open_ai/json_schema_validation.rb +3 -3
  37. data/app/models/raif/concerns/llms/open_ai_completions/message_formatting.rb +22 -0
  38. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +22 -0
  39. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
  40. data/app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb +17 -0
  41. data/app/models/raif/concerns/llms/open_ai_responses/response_tool_calls.rb +26 -0
  42. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
  43. data/app/models/raif/concerns/run_with.rb +127 -0
  44. data/app/models/raif/conversation.rb +96 -9
  45. data/app/models/raif/conversation_entry.rb +37 -8
  46. data/app/models/raif/embedding_model.rb +2 -1
  47. data/app/models/raif/embedding_models/open_ai.rb +1 -1
  48. data/app/models/raif/llm.rb +28 -3
  49. data/app/models/raif/llms/anthropic.rb +7 -19
  50. data/app/models/raif/llms/bedrock.rb +6 -20
  51. data/app/models/raif/llms/google.rb +140 -0
  52. data/app/models/raif/llms/open_ai_base.rb +19 -5
  53. data/app/models/raif/llms/open_ai_completions.rb +6 -11
  54. data/app/models/raif/llms/open_ai_responses.rb +6 -16
  55. data/app/models/raif/llms/open_router.rb +10 -14
  56. data/app/models/raif/model_completion.rb +61 -0
  57. data/app/models/raif/model_tool.rb +10 -2
  58. data/app/models/raif/model_tool_invocation.rb +38 -6
  59. data/app/models/raif/model_tools/agent_final_answer.rb +2 -7
  60. data/app/models/raif/model_tools/provider_managed/code_execution.rb +4 -0
  61. data/app/models/raif/model_tools/provider_managed/image_generation.rb +4 -0
  62. data/app/models/raif/model_tools/provider_managed/web_search.rb +4 -0
  63. data/app/models/raif/streaming_responses/google.rb +71 -0
  64. data/app/models/raif/task.rb +74 -18
  65. data/app/models/raif/user_tool_invocation.rb +19 -0
  66. data/app/views/layouts/raif/admin.html.erb +12 -1
  67. data/app/views/raif/admin/agents/_agent.html.erb +8 -0
  68. data/app/views/raif/admin/agents/_conversation_message.html.erb +28 -6
  69. data/app/views/raif/admin/agents/index.html.erb +2 -0
  70. data/app/views/raif/admin/agents/show.html.erb +46 -1
  71. data/app/views/raif/admin/configs/show.html.erb +117 -0
  72. data/app/views/raif/admin/conversations/_conversation_entry.html.erb +29 -34
  73. data/app/views/raif/admin/conversations/show.html.erb +2 -0
  74. data/app/views/raif/admin/model_completions/_model_completion.html.erb +9 -0
  75. data/app/views/raif/admin/model_completions/index.html.erb +26 -0
  76. data/app/views/raif/admin/model_completions/show.html.erb +124 -61
  77. data/app/views/raif/admin/model_tool_invocations/index.html.erb +22 -1
  78. data/app/views/raif/admin/model_tools/_list.html.erb +16 -0
  79. data/app/views/raif/admin/model_tools/_model_tool.html.erb +36 -0
  80. data/app/views/raif/admin/stats/_stats_tile.html.erb +34 -0
  81. data/app/views/raif/admin/stats/index.html.erb +71 -88
  82. data/app/views/raif/admin/stats/model_tool_invocations/index.html.erb +43 -0
  83. data/app/views/raif/admin/stats/tasks/index.html.erb +20 -6
  84. data/app/views/raif/admin/tasks/index.html.erb +6 -1
  85. data/app/views/raif/admin/tasks/show.html.erb +36 -3
  86. data/app/views/raif/conversation_entries/_form.html.erb +4 -1
  87. data/app/views/raif/conversations/_conversation.html.erb +10 -0
  88. data/app/views/raif/conversations/_entry_processed.turbo_stream.erb +12 -0
  89. data/app/views/raif/conversations/_full_conversation.html.erb +3 -6
  90. data/app/views/raif/conversations/_initial_chat_message.html.erb +5 -0
  91. data/app/views/raif/conversations/index.html.erb +23 -0
  92. data/config/locales/admin.en.yml +33 -1
  93. data/config/locales/en.yml +41 -4
  94. data/config/routes.rb +2 -0
  95. data/db/migrate/20250804013843_add_task_run_args_to_raif_tasks.rb +13 -0
  96. data/db/migrate/20250811171150_make_raif_task_creator_optional.rb +8 -0
  97. data/db/migrate/20250904194456_add_generating_entry_response_to_raif_conversations.rb +7 -0
  98. data/db/migrate/20250911125234_add_source_to_raif_tasks.rb +7 -0
  99. data/db/migrate/20251020005853_add_source_to_raif_agents.rb +7 -0
  100. data/db/migrate/20251020011346_rename_task_run_args_to_run_with.rb +7 -0
  101. data/db/migrate/20251020011405_add_run_with_to_raif_agents.rb +13 -0
  102. data/db/migrate/20251024160119_add_llm_messages_max_length_to_raif_conversations.rb +14 -0
  103. data/db/migrate/20251124185033_add_provider_tool_call_id_to_raif_model_tool_invocations.rb +7 -0
  104. data/db/migrate/20251128202941_add_tool_choice_to_raif_model_completions.rb +7 -0
  105. data/db/migrate/20260118144846_add_source_to_raif_conversations.rb +7 -0
  106. data/db/migrate/20260119000000_add_failure_tracking_to_raif_model_completions.rb +10 -0
  107. data/db/migrate/20260119000001_add_completed_at_to_raif_model_completions.rb +8 -0
  108. data/db/migrate/20260119000002_add_started_at_to_raif_model_completions.rb +8 -0
  109. data/exe/raif +7 -0
  110. data/lib/generators/raif/agent/agent_generator.rb +22 -7
  111. data/lib/generators/raif/agent/templates/agent.rb.tt +20 -24
  112. data/lib/generators/raif/agent/templates/agent_eval_set.rb.tt +48 -0
  113. data/lib/generators/raif/agent/templates/application_agent.rb.tt +1 -3
  114. data/lib/generators/raif/base_generator.rb +19 -0
  115. data/lib/generators/raif/conversation/conversation_generator.rb +21 -2
  116. data/lib/generators/raif/conversation/templates/application_conversation.rb.tt +0 -2
  117. data/lib/generators/raif/conversation/templates/conversation.rb.tt +34 -32
  118. data/lib/generators/raif/conversation/templates/conversation_eval_set.rb.tt +70 -0
  119. data/lib/generators/raif/eval_set/eval_set_generator.rb +28 -0
  120. data/lib/generators/raif/eval_set/templates/eval_set.rb.tt +21 -0
  121. data/lib/generators/raif/evals/setup/setup_generator.rb +47 -0
  122. data/lib/generators/raif/install/install_generator.rb +15 -0
  123. data/lib/generators/raif/install/templates/initializer.rb +89 -10
  124. data/lib/generators/raif/model_tool/model_tool_generator.rb +5 -5
  125. data/lib/generators/raif/model_tool/templates/model_tool.rb.tt +78 -78
  126. data/lib/generators/raif/model_tool/templates/model_tool_invocation_partial.html.erb.tt +1 -1
  127. data/lib/generators/raif/task/task_generator.rb +22 -3
  128. data/lib/generators/raif/task/templates/application_task.rb.tt +0 -2
  129. data/lib/generators/raif/task/templates/task.rb.tt +55 -59
  130. data/lib/generators/raif/task/templates/task_eval_set.rb.tt +54 -0
  131. data/lib/raif/cli/base.rb +39 -0
  132. data/lib/raif/cli/evals.rb +47 -0
  133. data/lib/raif/cli/evals_setup.rb +27 -0
  134. data/lib/raif/cli.rb +67 -0
  135. data/lib/raif/configuration.rb +57 -8
  136. data/lib/raif/engine.rb +8 -0
  137. data/lib/raif/errors/instance_dependent_schema_error.rb +8 -0
  138. data/lib/raif/errors/streaming_error.rb +6 -3
  139. data/lib/raif/errors.rb +1 -0
  140. data/lib/raif/evals/eval.rb +30 -0
  141. data/lib/raif/evals/eval_set.rb +111 -0
  142. data/lib/raif/evals/eval_sets/expectations.rb +53 -0
  143. data/lib/raif/evals/eval_sets/llm_judge_expectations.rb +255 -0
  144. data/lib/raif/evals/expectation_result.rb +39 -0
  145. data/lib/raif/evals/llm_judge.rb +32 -0
  146. data/lib/raif/evals/llm_judges/binary.rb +94 -0
  147. data/lib/raif/evals/llm_judges/comparative.rb +89 -0
  148. data/lib/raif/evals/llm_judges/scored.rb +63 -0
  149. data/lib/raif/evals/llm_judges/summarization.rb +166 -0
  150. data/lib/raif/evals/run.rb +202 -0
  151. data/lib/raif/evals/scoring_rubric.rb +174 -0
  152. data/lib/raif/evals.rb +26 -0
  153. data/lib/raif/json_schema_builder.rb +14 -0
  154. data/lib/raif/llm_registry.rb +218 -15
  155. data/lib/raif/messages.rb +180 -0
  156. data/lib/raif/migration_checker.rb +3 -3
  157. data/lib/raif/utils/colors.rb +23 -0
  158. data/lib/raif/utils.rb +1 -0
  159. data/lib/raif/version.rb +1 -1
  160. data/lib/raif.rb +13 -0
  161. data/lib/tasks/annotate_rb.rake +10 -0
  162. data/spec/support/current_temperature_test_tool.rb +34 -0
  163. data/spec/support/rspec_helpers.rb +8 -8
  164. data/spec/support/test_conversation.rb +1 -1
  165. metadata +77 -10
  166. data/app/models/raif/agents/re_act_agent.rb +0 -127
  167. data/app/models/raif/agents/re_act_step.rb +0 -33
@@ -0,0 +1,54 @@
1
+ <% raif_module_namespacing(["Evals", "Tasks"]) do -%>
2
+ class <%= class_name.demodulize %>EvalSet < Raif::Evals::EvalSet
3
+ # Run this eval set with:
4
+ # bundle exec raif evals ./<%= eval_set_file_path %>
5
+
6
+ # Setup method runs before each eval
7
+ setup do
8
+ # Common setup code
9
+ end
10
+
11
+ # Teardown runs after each eval
12
+ teardown do
13
+ # Cleanup code
14
+ end
15
+
16
+ eval "<%= class_name %> produces expected output" do
17
+ # task = Raif::Tasks::<%= class_name %>.run(
18
+ # Add your task parameters here that produce the expected output
19
+ # )
20
+
21
+ # The return value of the block determines if the expectation passes or fails
22
+ # expect "task completes successfully" do
23
+ # task.completed?
24
+ # end
25
+
26
+ # expect "contains the word 'hello' in the output" do
27
+ # task.parsed_response.include?("hello")
28
+ # end
29
+
30
+ # Add more specific expectations based on your task's behavior
31
+ end
32
+
33
+ eval "properly handles refusals" do
34
+ # task = Raif::Tasks::<%= class_name %>.run(
35
+ # Add your task parameters here to trigger a refusal
36
+ # )
37
+
38
+ # expect "returns exactly the text 'I'm sorry, I can't do that.'" do
39
+ # task.parsed_response == "I'm sorry, I can't do that."
40
+ # end
41
+ end
42
+
43
+ eval "<%= class_name %> uses appropriate LLM tools" do
44
+ # Test that the task uses the expected tools if applicable
45
+ # task = Raif::Tasks::<%= class_name %>.run(
46
+ # Add parameters that trigger the use of the expected tools
47
+ # )
48
+
49
+ # Example tool invocation expectations (if your task uses tools):
50
+ # expect_tool_invocation(task, "tool_name", with: { param: "value" })
51
+ # expect_no_tool_invocation(task, "unwanted_tool")
52
+ end
53
+ end
54
+ <% end -%>
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif
4
+ module CLI
5
+ class Base
6
+ attr_reader :args, :options
7
+
8
+ def initialize(args = [])
9
+ @args = args
10
+ @options = {}
11
+ end
12
+
13
+ protected
14
+
15
+ def find_rails_root
16
+ current = Dir.pwd
17
+
18
+ until File.exist?(File.join(current, "config", "environment.rb"))
19
+ parent = File.dirname(current)
20
+ if parent == current
21
+ puts "Error: Could not find Rails application root"
22
+ puts "Please run this command from within a Rails application directory"
23
+ exit 1
24
+ end
25
+
26
+ current = parent
27
+ end
28
+
29
+ current
30
+ end
31
+
32
+ def load_rails_application
33
+ rails_root = find_rails_root
34
+ Dir.chdir(rails_root)
35
+ require File.join(rails_root, "config", "environment")
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "optparse"
4
+ require_relative "base"
5
+
6
+ module Raif
7
+ module CLI
8
+ class Evals < Base
9
+ def run
10
+ # Set test environment by default for evals
11
+ ENV["RAILS_ENV"] ||= "test"
12
+ ENV["RAIF_RUNNING_EVALS"] = "true"
13
+
14
+ OptionParser.new do |opts|
15
+ opts.banner = "Usage: raif evals [options] [FILE_PATHS]"
16
+
17
+ opts.on("-e", "--environment ENV", "Rails environment (default: test)") do |env|
18
+ ENV["RAILS_ENV"] = env
19
+ end
20
+
21
+ opts.on("-h", "--help", "Show this help message") do
22
+ puts opts
23
+ exit
24
+ end
25
+ end.parse!(args)
26
+
27
+ # Parse file paths with optional line numbers
28
+ file_paths = args.map do |arg|
29
+ if arg.include?(":")
30
+ file_path, line_number = arg.split(":", 2)
31
+ { file_path: file_path, line_number: line_number.to_i }
32
+ else
33
+ { file_path: arg, line_number: nil }
34
+ end
35
+ end if args.any?
36
+
37
+ # Find and load Rails application
38
+ load_rails_application
39
+
40
+ require "raif/evals"
41
+
42
+ run = Raif::Evals::Run.new(file_paths: file_paths)
43
+ run.execute
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "optparse"
4
+ require_relative "base"
5
+
6
+ module Raif
7
+ module CLI
8
+ class EvalsSetup < Base
9
+ def run
10
+ OptionParser.new do |opts|
11
+ opts.banner = "Usage: raif evals:setup [options]"
12
+ opts.on("-h", "--help", "Show this help message") do
13
+ puts opts
14
+ exit
15
+ end
16
+ end.parse!(args)
17
+
18
+ # Load Rails application to use generators
19
+ load_rails_application
20
+
21
+ # Invoke the Rails generator
22
+ require "rails/generators"
23
+ Rails::Generators.invoke("raif:evals:setup", args)
24
+ end
25
+ end
26
+ end
27
+ end
data/lib/raif/cli.rb ADDED
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "cli/base"
4
+ require_relative "cli/evals"
5
+ require_relative "cli/evals_setup"
6
+
7
+ module Raif
8
+ module CLI
9
+ COMMANDS = {
10
+ "evals" => "Run Raif evaluation sets",
11
+ "evals:setup" => "Setup Raif evals directory structure",
12
+ "version" => "Show Raif version",
13
+ "help" => "Show this help message"
14
+ }.freeze
15
+
16
+ class Runner
17
+ def initialize(args)
18
+ @args = args
19
+ @command = args.shift
20
+ end
21
+
22
+ def run
23
+ case @command
24
+ when "evals"
25
+ Evals.new(@args).run
26
+ when "evals:setup"
27
+ EvalsSetup.new(@args).run
28
+ when "version", "--version", "-v"
29
+ show_version
30
+ when "help", "--help", "-h", nil
31
+ show_help
32
+ else
33
+ puts "Unknown command: #{@command}"
34
+ puts ""
35
+ show_help
36
+ exit 1
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def show_version
43
+ require_relative "../raif/version"
44
+ puts "Raif #{Raif::VERSION}"
45
+ end
46
+
47
+ def show_help
48
+ puts "Usage: raif COMMAND [options]"
49
+ puts ""
50
+ puts "Commands:"
51
+ COMMANDS.each do |command, description|
52
+ puts format(" %-12s %s", command, description)
53
+ end
54
+ puts ""
55
+ puts "For help on a specific command:"
56
+ puts " raif COMMAND --help"
57
+ puts ""
58
+ puts "Examples:"
59
+ puts " raif evals:setup # Setup eval directory structure"
60
+ puts " raif evals # Run all eval sets in test environment"
61
+ puts " raif evals CustomerSupportEvalSet # Run specific eval set"
62
+ puts " raif evals -e development # Run evals in development environment"
63
+ puts " raif version # Show Raif version"
64
+ end
65
+ end
66
+ end
67
+ end
@@ -12,24 +12,37 @@ module Raif
12
12
  :aws_bedrock_region,
13
13
  :bedrock_embedding_models_enabled,
14
14
  :conversation_entries_controller,
15
+ :conversation_llm_messages_max_length_default,
15
16
  :conversation_system_prompt_intro,
16
17
  :conversation_types,
17
18
  :conversations_controller,
18
19
  :current_user_method,
19
20
  :default_embedding_model_key,
20
21
  :default_llm_model_key,
22
+ :evals_default_llm_judge_model_key,
23
+ :evals_verbose_output,
24
+ :google_api_key,
25
+ :google_models_enabled,
21
26
  :llm_api_requests_enabled,
22
27
  :llm_request_max_retries,
23
28
  :llm_request_retriable_exceptions,
24
29
  :model_superclass,
25
30
  :open_ai_api_key,
31
+ :open_ai_api_version,
32
+ :open_ai_auth_header_style,
33
+ :open_ai_base_url,
34
+ :open_ai_embedding_base_url,
26
35
  :open_ai_embedding_models_enabled,
27
36
  :open_ai_models_enabled,
28
37
  :open_router_api_key,
29
38
  :open_router_models_enabled,
30
39
  :open_router_app_name,
31
40
  :open_router_site_url,
41
+ :request_open_timeout,
42
+ :request_read_timeout,
43
+ :request_write_timeout,
32
44
  :streaming_update_chunk_size_threshold,
45
+ :task_creator_optional,
33
46
  :task_system_prompt_intro,
34
47
  :user_tool_types
35
48
 
@@ -40,9 +53,8 @@ module Raif
40
53
  alias_method :aws_bedrock_titan_embedding_models_enabled=, :bedrock_embedding_models_enabled=
41
54
 
42
55
  def initialize
43
- # Set default config
44
- @agent_types = Set.new(["Raif::Agents::ReActAgent", "Raif::Agents::NativeToolCallingAgent"])
45
- @anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
56
+ @agent_types = Set.new(["Raif::Agents::NativeToolCallingAgent"])
57
+ @anthropic_api_key = default_disable_llm_api_requests? ? "placeholder-anthropic-api-key" : ENV["ANTHROPIC_API_KEY"]
46
58
  @bedrock_models_enabled = false
47
59
  @anthropic_models_enabled = ENV["ANTHROPIC_API_KEY"].present?
48
60
  @authorize_admin_controller_action = ->{ false }
@@ -52,28 +64,45 @@ module Raif
52
64
  @bedrock_embedding_models_enabled = false
53
65
  @task_system_prompt_intro = "You are a helpful assistant."
54
66
  @conversation_entries_controller = "Raif::ConversationEntriesController"
67
+ @conversation_llm_messages_max_length_default = 50
55
68
  @conversation_system_prompt_intro = "You are a helpful assistant who is collaborating with a teammate."
56
69
  @conversation_types = Set.new(["Raif::Conversation"])
57
70
  @conversations_controller = "Raif::ConversationsController"
58
71
  @current_user_method = :current_user
59
72
  @default_embedding_model_key = "open_ai_text_embedding_3_small"
60
- @default_llm_model_key = "open_ai_gpt_4o"
61
- @llm_api_requests_enabled = true
73
+ @default_llm_model_key = default_disable_llm_api_requests? ? :raif_test_llm : (ENV["RAIF_DEFAULT_LLM_MODEL_KEY"].presence || "open_ai_gpt_4o")
74
+ @evals_default_llm_judge_model_key = ENV["RAIF_EVALS_DEFAULT_LLM_JUDGE_MODEL_KEY"].presence
75
+ @evals_verbose_output = false
76
+ google_api_key = ENV["GOOGLE_AI_API_KEY"].presence || ENV["GOOGLE_API_KEY"]
77
+ @google_api_key = default_disable_llm_api_requests? ? "placeholder-google-api-key" : google_api_key
78
+ @google_models_enabled = @google_api_key.present?
79
+ @llm_api_requests_enabled = !default_disable_llm_api_requests?
62
80
  @llm_request_max_retries = 2
63
81
  @llm_request_retriable_exceptions = [
64
82
  Faraday::ConnectionFailed,
65
83
  Faraday::TimeoutError,
66
84
  Faraday::ServerError,
85
+ Net::ReadTimeout,
86
+ Net::OpenTimeout,
67
87
  ]
68
88
  @model_superclass = "ApplicationRecord"
69
- @open_ai_api_key = ENV["OPENAI_API_KEY"]
89
+ @open_ai_api_key = default_disable_llm_api_requests? ? "placeholder-open-ai-api-key" : ENV["OPENAI_API_KEY"]
90
+ @open_ai_api_version = nil
91
+ @open_ai_auth_header_style = :bearer
92
+ @open_ai_base_url = "https://api.openai.com/v1"
93
+ @open_ai_embedding_base_url = "https://api.openai.com/v1"
70
94
  @open_ai_embedding_models_enabled = ENV["OPENAI_API_KEY"].present?
71
95
  @open_ai_models_enabled = ENV["OPENAI_API_KEY"].present?
72
- @open_router_api_key = ENV["OPEN_ROUTER_API_KEY"].presence || ENV["OPENROUTER_API_KEY"]
96
+ open_router_api_key = ENV["OPEN_ROUTER_API_KEY"].presence || ENV["OPENROUTER_API_KEY"]
97
+ @open_router_api_key = default_disable_llm_api_requests? ? "placeholder-open-router-api-key" : open_router_api_key
73
98
  @open_router_models_enabled = @open_router_api_key.present?
74
99
  @open_router_app_name = nil
75
100
  @open_router_site_url = nil
101
+ @request_open_timeout = nil
102
+ @request_read_timeout = nil
103
+ @request_write_timeout = nil
76
104
  @streaming_update_chunk_size_threshold = 25
105
+ @task_creator_optional = true
77
106
  @user_tool_types = []
78
107
  end
79
108
 
@@ -97,7 +126,9 @@ module Raif
97
126
  "Raif.config.default_llm_model_key was set to #{default_llm_model_key}, but must be one of: #{Raif.available_llm_keys.join(", ")}"
98
127
  end
99
128
 
100
- if Raif.embedding_model_registry.present? && !Raif.available_embedding_model_keys.include?(default_embedding_model_key.to_sym)
129
+ if default_embedding_model_key.present? &&
130
+ Raif.embedding_model_registry.present? &&
131
+ !Raif.available_embedding_model_keys.include?(default_embedding_model_key.to_sym)
101
132
  raise Raif::Errors::InvalidConfigError,
102
133
  "Raif.config.default_embedding_model_key was set to #{default_embedding_model_key}, but must be one of: #{Raif.available_embedding_model_keys.join(", ")}" # rubocop:disable Layout/LineLength
103
134
  end
@@ -121,6 +152,11 @@ module Raif
121
152
  "Raif.config.open_ai_api_key is required when Raif.config.open_ai_models_enabled is true. Set it via Raif.config.open_ai_api_key or ENV[\"OPENAI_API_KEY\"]" # rubocop:disable Layout/LineLength
122
153
  end
123
154
 
155
+ if open_ai_models_enabled && ![:bearer, :api_key].include?(open_ai_auth_header_style)
156
+ raise Raif::Errors::InvalidConfigError,
157
+ "Raif.config.open_ai_auth_header_style must be either :bearer or :api_key"
158
+ end
159
+
124
160
  if open_ai_embedding_models_enabled && open_ai_api_key.blank?
125
161
  raise Raif::Errors::InvalidConfigError,
126
162
  "Raif.config.open_ai_api_key is required when Raif.config.open_ai_embedding_models_enabled is true. Set it via Raif.config.open_ai_api_key or ENV[\"OPENAI_API_KEY\"]" # rubocop:disable Layout/LineLength
@@ -135,6 +171,19 @@ module Raif
135
171
  raise Raif::Errors::InvalidConfigError,
136
172
  "Raif.config.open_router_api_key is required when Raif.config.open_router_models_enabled is true. Set it via Raif.config.open_router_api_key or ENV['OPEN_ROUTER_API_KEY']" # rubocop:disable Layout/LineLength
137
173
  end
174
+
175
+ if google_models_enabled && google_api_key.blank?
176
+ raise Raif::Errors::InvalidConfigError,
177
+ "Raif.config.google_api_key is required when Raif.config.google_models_enabled is true. Set it via Raif.config.google_api_key or ENV['GOOGLE_API_KEY']" # rubocop:disable Layout/LineLength
178
+ end
179
+ end
180
+
181
+ private
182
+
183
+ # By default, evals run in the test environment, but need real API keys.
184
+ # In normal tests, we insert placeholders to make it hard to accidentally rack up an LLM API bill.
185
+ def default_disable_llm_api_requests?
186
+ Rails.env.test? && !Raif.running_evals?
138
187
  end
139
188
 
140
189
  end
data/lib/raif/engine.rb CHANGED
@@ -72,6 +72,14 @@ module Raif
72
72
  end
73
73
  end
74
74
 
75
+ config.after_initialize do
76
+ next unless Raif.config.google_models_enabled
77
+
78
+ Raif.default_llms[Raif::Llms::Google].each do |llm_config|
79
+ Raif.register_llm(Raif::Llms::Google, **llm_config)
80
+ end
81
+ end
82
+
75
83
  config.after_initialize do
76
84
  next unless Raif.config.bedrock_embedding_models_enabled
77
85
 
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif
4
+ module Errors
5
+ class InstanceDependentSchemaError < StandardError
6
+ end
7
+ end
8
+ end
@@ -3,16 +3,19 @@
3
3
  module Raif
4
4
  module Errors
5
5
  class StreamingError < StandardError
6
- attr_reader :message, :type, :code, :event
6
+ attr_reader :type, :code, :event
7
7
 
8
8
  def initialize(message:, type:, event:, code: nil)
9
- super
9
+ super(message)
10
10
 
11
- @message = message
12
11
  @type = type
13
12
  @code = code
14
13
  @event = event
15
14
  end
15
+
16
+ def to_s
17
+ "[#{type}] #{super} (code=#{code}, event=#{event})"
18
+ end
16
19
  end
17
20
  end
18
21
  end
data/lib/raif/errors.rb CHANGED
@@ -9,3 +9,4 @@ require "raif/errors/invalid_model_image_input_error"
9
9
  require "raif/errors/invalid_model_file_input_error"
10
10
  require "raif/errors/unsupported_feature_error"
11
11
  require "raif/errors/streaming_error"
12
+ require "raif/errors/instance_dependent_schema_error"
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif
4
+ module Evals
5
+ class Eval
6
+ attr_reader :description, :expectation_results
7
+
8
+ def initialize(description:)
9
+ @description = description
10
+ @expectation_results = []
11
+ end
12
+
13
+ def add_expectation_result(result)
14
+ @expectation_results << result
15
+ end
16
+
17
+ def passed?
18
+ expectation_results.all?(&:passed?)
19
+ end
20
+
21
+ def to_h
22
+ {
23
+ description: description,
24
+ passed: passed?,
25
+ expectation_results: expectation_results.map(&:to_h)
26
+ }
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "raif/evals/eval_sets/expectations"
4
+ require "raif/evals/eval_sets/llm_judge_expectations"
5
+
6
+ module Raif
7
+ module Evals
8
+ class EvalSet
9
+ include Raif::Evals::EvalSets::Expectations
10
+ include Raif::Evals::EvalSets::LlmJudgeExpectations
11
+
12
+ attr_reader :current_eval, :output, :results
13
+
14
+ def initialize(output: $stdout)
15
+ @output = output
16
+ end
17
+
18
+ class << self
19
+ attr_reader :setup_block
20
+ attr_reader :teardown_block
21
+
22
+ def inherited(subclass)
23
+ subclass.instance_variable_set(:@evals, [])
24
+ super
25
+ end
26
+
27
+ def evals
28
+ @evals ||= []
29
+ end
30
+
31
+ def eval(description, &block)
32
+ evals << { description: description, block: block, definition_line_number: caller_locations(1, 1).first.lineno }
33
+ end
34
+
35
+ def setup(&block)
36
+ @setup_block = block
37
+ end
38
+
39
+ def teardown(&block)
40
+ @teardown_block = block
41
+ end
42
+
43
+ def run(output: $stdout)
44
+ new(output: output).run
45
+ end
46
+ end
47
+
48
+ def run
49
+ @results = []
50
+
51
+ self.class.evals.each do |eval_definition|
52
+ @results << run_eval(eval_definition)
53
+ end
54
+
55
+ @results
56
+ end
57
+
58
+ def run_eval(eval_definition)
59
+ @current_eval = Eval.new(description: eval_definition[:description])
60
+
61
+ output.puts "Running: #{eval_definition[:description]}"
62
+
63
+ ActiveRecord::Base.transaction do
64
+ instance_eval(&self.class.setup_block) if self.class.setup_block
65
+
66
+ begin
67
+ instance_eval(&eval_definition[:block])
68
+ rescue => e
69
+ output.puts Raif::Utils::Colors.red(" Error in eval block: #{e.message}")
70
+ output.puts Raif::Utils::Colors.red(" #{e.backtrace.join("\n ")}")
71
+ @current_eval.add_expectation_result(
72
+ ExpectationResult.new(
73
+ description: "Eval block execution",
74
+ status: :error,
75
+ error: e
76
+ )
77
+ )
78
+ ensure
79
+ instance_eval(&self.class.teardown_block) if self.class.teardown_block
80
+ end
81
+
82
+ raise ActiveRecord::Rollback
83
+ end
84
+
85
+ @current_eval
86
+ end
87
+
88
+ def file(filename)
89
+ # Validate filename to prevent directory traversal
90
+ raise ArgumentError, "Invalid filename: cannot be empty" if filename.nil? || filename.empty?
91
+ raise ArgumentError, "Invalid filename: cannot contain '..' or absolute paths" if filename.include?("..") || filename.start_with?("/")
92
+
93
+ # Ensure we're only accessing files within the raif_evals/files directory
94
+ base_path = Rails.root.join("raif_evals", "files")
95
+ full_path = base_path.join(filename)
96
+
97
+ # Verify the resolved path is within the expected directory
98
+ unless full_path.to_s.start_with?(base_path.to_s)
99
+ raise ArgumentError, "Invalid filename: path traversal detected"
100
+ end
101
+
102
+ if full_path.exist?
103
+ full_path.read
104
+ else
105
+ raise ArgumentError, "File #{filename} does not exist in raif_evals/files/"
106
+ end
107
+ end
108
+
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif
4
+ module Evals
5
+ module EvalSets
6
+ module Expectations
7
+
8
+ def expect(description, result_metadata: nil, &block)
9
+ result = begin
10
+ if block.call
11
+ output.puts Raif::Utils::Colors.green(" ✓ #{description}")
12
+ output.puts Raif::Utils::Colors.green(" ⎿ #{result_metadata.inspect}") if result_metadata && Raif.config.evals_verbose_output
13
+ ExpectationResult.new(description: description, status: :passed, metadata: result_metadata)
14
+ else
15
+ output.puts Raif::Utils::Colors.red(" ✗ #{description}")
16
+ output.puts Raif::Utils::Colors.red(" ⎿ #{result_metadata.inspect}") if result_metadata && Raif.config.evals_verbose_output
17
+ ExpectationResult.new(description: description, status: :failed, metadata: result_metadata)
18
+ end
19
+ rescue => e
20
+ output.puts Raif::Utils::Colors.red(" ✗ #{description} (Error: #{e.message})")
21
+ ExpectationResult.new(description: description, status: :error, error: e, metadata: result_metadata)
22
+ end
23
+
24
+ current_eval.add_expectation_result(result)
25
+ result
26
+ end
27
+
28
+ def expect_tool_invocation(tool_invoker, tool_type, with: {})
29
+ invocations = tool_invoker.raif_model_tool_invocations.select { |inv| inv.tool_type == tool_type }
30
+ invoked_tools = tool_invoker.raif_model_tool_invocations.map{|inv| [inv.tool_type, inv.tool_arguments] }.to_h
31
+
32
+ if with.any?
33
+ invocations = invocations.select do |invocation|
34
+ with.all? { |key, value| invocation.tool_arguments[key.to_s] == value }
35
+ end
36
+ end
37
+
38
+ result_metadata = { invoked_tools: invoked_tools }
39
+ expect "invokes #{tool_type}#{with.any? ? " with #{with.to_json}" : ""}", result_metadata: result_metadata do
40
+ invocations.any?
41
+ end
42
+ end
43
+
44
+ def expect_no_tool_invocation(tool_invoker, tool_name)
45
+ expect "does not invoke #{tool_name}" do
46
+ tool_invoker.raif_model_tool_invocations.none? { |inv| inv.tool_name == tool_name }
47
+ end
48
+ end
49
+
50
+ end
51
+ end
52
+ end
53
+ end