raif 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -7
  3. data/app/assets/builds/raif.css +4 -1
  4. data/app/assets/builds/raif_admin.css +52 -2
  5. data/app/assets/builds/raif_admin_sprockets.js +2709 -0
  6. data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
  7. data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
  8. data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
  9. data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
  10. data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
  11. data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
  12. data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
  13. data/app/assets/javascript/raif/controllers/conversations_controller.js +1 -1
  14. data/app/assets/javascript/raif_admin.js +23 -0
  15. data/app/assets/javascript/raif_admin_sprockets.js +24 -0
  16. data/app/assets/stylesheets/raif/admin/conversation.scss +16 -0
  17. data/app/assets/stylesheets/raif/conversations.scss +3 -0
  18. data/app/assets/stylesheets/raif.scss +2 -1
  19. data/app/assets/stylesheets/raif_admin.scss +50 -1
  20. data/app/controllers/raif/admin/agents_controller.rb +27 -1
  21. data/app/controllers/raif/admin/application_controller.rb +16 -0
  22. data/app/controllers/raif/admin/configs_controller.rb +95 -0
  23. data/app/controllers/raif/admin/llms_controller.rb +27 -0
  24. data/app/controllers/raif/admin/model_completions_controller.rb +24 -1
  25. data/app/controllers/raif/admin/model_tool_invocations_controller.rb +7 -1
  26. data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
  27. data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
  28. data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
  29. data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
  30. data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
  31. data/app/controllers/raif/admin/stats/model_tool_invocations_controller.rb +21 -0
  32. data/app/controllers/raif/admin/stats/tasks_controller.rb +15 -6
  33. data/app/controllers/raif/admin/stats_controller.rb +32 -3
  34. data/app/controllers/raif/admin/tasks_controller.rb +5 -0
  35. data/app/controllers/raif/conversation_entries_controller.rb +1 -0
  36. data/app/controllers/raif/conversations_controller.rb +10 -2
  37. data/app/helpers/raif/application_helper.rb +40 -0
  38. data/app/jobs/raif/conversation_entry_job.rb +8 -6
  39. data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
  40. data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
  41. data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
  42. data/app/models/raif/admin/task_stat.rb +7 -0
  43. data/app/models/raif/agent.rb +98 -6
  44. data/app/models/raif/agents/native_tool_calling_agent.rb +179 -52
  45. data/app/models/raif/application_record.rb +18 -0
  46. data/app/models/raif/concerns/agent_inference_stats.rb +35 -0
  47. data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
  48. data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
  49. data/app/models/raif/concerns/json_schema_definition.rb +54 -6
  50. data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
  51. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +34 -0
  52. data/app/models/raif/concerns/llms/anthropic/response_tool_calls.rb +24 -0
  53. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +8 -0
  54. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +43 -0
  55. data/app/models/raif/concerns/llms/bedrock/response_tool_calls.rb +26 -0
  56. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +8 -0
  57. data/app/models/raif/concerns/llms/google/message_formatting.rb +112 -0
  58. data/app/models/raif/concerns/llms/google/response_tool_calls.rb +32 -0
  59. data/app/models/raif/concerns/llms/google/tool_formatting.rb +76 -0
  60. data/app/models/raif/concerns/llms/message_formatting.rb +41 -5
  61. data/app/models/raif/concerns/llms/open_ai/json_schema_validation.rb +3 -3
  62. data/app/models/raif/concerns/llms/open_ai_completions/message_formatting.rb +22 -0
  63. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +22 -0
  64. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +8 -0
  65. data/app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb +17 -0
  66. data/app/models/raif/concerns/llms/open_ai_responses/response_tool_calls.rb +26 -0
  67. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +8 -0
  68. data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
  69. data/app/models/raif/concerns/run_with.rb +127 -0
  70. data/app/models/raif/conversation.rb +112 -8
  71. data/app/models/raif/conversation_entry.rb +38 -4
  72. data/app/models/raif/embedding_model.rb +2 -1
  73. data/app/models/raif/embedding_models/bedrock.rb +10 -1
  74. data/app/models/raif/embedding_models/google.rb +37 -0
  75. data/app/models/raif/embedding_models/open_ai.rb +1 -1
  76. data/app/models/raif/evals/llm_judge.rb +70 -0
  77. data/{lib → app/models}/raif/evals/llm_judges/binary.rb +41 -3
  78. data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +41 -3
  79. data/{lib → app/models}/raif/evals/llm_judges/scored.rb +39 -1
  80. data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +40 -2
  81. data/app/models/raif/llm.rb +104 -4
  82. data/app/models/raif/llms/anthropic.rb +32 -22
  83. data/app/models/raif/llms/bedrock.rb +64 -24
  84. data/app/models/raif/llms/google.rb +166 -0
  85. data/app/models/raif/llms/open_ai_base.rb +23 -5
  86. data/app/models/raif/llms/open_ai_completions.rb +14 -12
  87. data/app/models/raif/llms/open_ai_responses.rb +14 -17
  88. data/app/models/raif/llms/open_router.rb +16 -15
  89. data/app/models/raif/model_completion.rb +103 -1
  90. data/app/models/raif/model_tool.rb +55 -5
  91. data/app/models/raif/model_tool_invocation.rb +68 -6
  92. data/app/models/raif/model_tools/agent_final_answer.rb +2 -7
  93. data/app/models/raif/model_tools/provider_managed/code_execution.rb +4 -0
  94. data/app/models/raif/model_tools/provider_managed/image_generation.rb +4 -0
  95. data/app/models/raif/model_tools/provider_managed/web_search.rb +4 -0
  96. data/app/models/raif/prompt_studio_batch_run.rb +155 -0
  97. data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
  98. data/app/models/raif/streaming_responses/bedrock.rb +60 -1
  99. data/app/models/raif/streaming_responses/google.rb +71 -0
  100. data/app/models/raif/task.rb +85 -18
  101. data/app/models/raif/user_tool_invocation.rb +19 -0
  102. data/app/views/layouts/raif/admin.html.erb +43 -2
  103. data/app/views/raif/admin/agents/_agent.html.erb +9 -0
  104. data/app/views/raif/admin/agents/_conversation_message.html.erb +28 -6
  105. data/app/views/raif/admin/agents/index.html.erb +50 -0
  106. data/app/views/raif/admin/agents/show.html.erb +50 -1
  107. data/app/views/raif/admin/configs/show.html.erb +117 -0
  108. data/app/views/raif/admin/conversations/_conversation_entry.html.erb +29 -34
  109. data/app/views/raif/admin/conversations/show.html.erb +2 -0
  110. data/app/views/raif/admin/llms/index.html.erb +110 -0
  111. data/app/views/raif/admin/model_completions/_model_completion.html.erb +10 -5
  112. data/app/views/raif/admin/model_completions/index.html.erb +40 -1
  113. data/app/views/raif/admin/model_completions/show.html.erb +256 -84
  114. data/app/views/raif/admin/model_tool_invocations/index.html.erb +22 -1
  115. data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
  116. data/app/views/raif/admin/model_tools/_list.html.erb +16 -0
  117. data/app/views/raif/admin/model_tools/_model_tool.html.erb +36 -0
  118. data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
  119. data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
  120. data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
  121. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
  122. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
  123. data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
  124. data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
  125. data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
  126. data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
  127. data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
  128. data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
  129. data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
  130. data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
  131. data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
  132. data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
  133. data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
  134. data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
  135. data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
  136. data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
  137. data/app/views/raif/admin/stats/_stats_tile.html.erb +34 -0
  138. data/app/views/raif/admin/stats/index.html.erb +71 -88
  139. data/app/views/raif/admin/stats/model_tool_invocations/index.html.erb +43 -0
  140. data/app/views/raif/admin/stats/tasks/index.html.erb +20 -6
  141. data/app/views/raif/admin/tasks/_task.html.erb +1 -0
  142. data/app/views/raif/admin/tasks/index.html.erb +23 -6
  143. data/app/views/raif/admin/tasks/show.html.erb +56 -3
  144. data/app/views/raif/conversation_entries/_form.html.erb +3 -0
  145. data/app/views/raif/conversation_entries/_message.html.erb +10 -6
  146. data/app/views/raif/conversations/_conversation.html.erb +10 -0
  147. data/app/views/raif/conversations/_entry_processed.turbo_stream.erb +12 -0
  148. data/app/views/raif/conversations/index.html.erb +23 -0
  149. data/config/importmap.rb +8 -0
  150. data/config/locales/admin.en.yml +161 -1
  151. data/config/locales/en.yml +67 -4
  152. data/config/routes.rb +10 -0
  153. data/db/migrate/20250904194456_add_generating_entry_response_to_raif_conversations.rb +7 -0
  154. data/db/migrate/20250911125234_add_source_to_raif_tasks.rb +7 -0
  155. data/db/migrate/20251020005853_add_source_to_raif_agents.rb +7 -0
  156. data/db/migrate/20251020011346_rename_task_run_args_to_run_with.rb +7 -0
  157. data/db/migrate/20251020011405_add_run_with_to_raif_agents.rb +13 -0
  158. data/db/migrate/20251024160119_add_llm_messages_max_length_to_raif_conversations.rb +14 -0
  159. data/db/migrate/20251124185033_add_provider_tool_call_id_to_raif_model_tool_invocations.rb +7 -0
  160. data/db/migrate/20251128202941_add_tool_choice_to_raif_model_completions.rb +7 -0
  161. data/db/migrate/20260118144846_add_source_to_raif_conversations.rb +7 -0
  162. data/db/migrate/20260119000000_add_failure_tracking_to_raif_model_completions.rb +10 -0
  163. data/db/migrate/20260119000001_add_completed_at_to_raif_model_completions.rb +8 -0
  164. data/db/migrate/20260119000002_add_started_at_to_raif_model_completions.rb +8 -0
  165. data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
  166. data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
  167. data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
  168. data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
  169. data/lib/generators/raif/agent/agent_generator.rb +18 -0
  170. data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
  171. data/lib/generators/raif/agent/templates/application_agent.rb.tt +1 -1
  172. data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
  173. data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
  174. data/lib/generators/raif/conversation/templates/conversation.rb.tt +6 -0
  175. data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
  176. data/lib/generators/raif/install/templates/initializer.rb +117 -8
  177. data/lib/generators/raif/task/task_generator.rb +18 -0
  178. data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
  179. data/lib/generators/raif/task/templates/task.rb.tt +10 -9
  180. data/lib/raif/configuration.rb +47 -2
  181. data/lib/raif/embedding_model_registry.rb +8 -0
  182. data/lib/raif/engine.rb +24 -1
  183. data/lib/raif/errors/blank_response_error.rb +8 -0
  184. data/lib/raif/errors/instance_dependent_schema_error.rb +8 -0
  185. data/lib/raif/errors/prompt_template_error.rb +15 -0
  186. data/lib/raif/errors/streaming_error.rb +6 -3
  187. data/lib/raif/errors.rb +3 -0
  188. data/lib/raif/evals/run.rb +1 -0
  189. data/lib/raif/evals.rb +0 -6
  190. data/lib/raif/json_schema_builder.rb +14 -0
  191. data/lib/raif/llm_registry.rb +433 -42
  192. data/lib/raif/messages.rb +180 -0
  193. data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
  194. data/lib/raif/token_estimator.rb +28 -0
  195. data/lib/raif/version.rb +1 -1
  196. data/lib/raif.rb +11 -0
  197. data/lib/tasks/annotate_rb.rake +10 -0
  198. data/spec/support/rspec_helpers.rb +15 -9
  199. data/spec/support/test_task.rb +9 -0
  200. data/spec/support/test_template_task.rb +41 -0
  201. metadata +108 -15
  202. data/app/models/raif/agents/re_act_agent.rb +0 -127
  203. data/app/models/raif/agents/re_act_step.rb +0 -32
  204. data/app/models/raif/concerns/task_run_args.rb +0 -62
  205. data/lib/raif/evals/llm_judge.rb +0 -32
  206. /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
41
+ module Raif
42
+ module Evals
43
+ class LlmJudge < Raif::Task
44
+ # Set default temperature for consistent judging
45
+ llm_temperature 0.0
46
+
47
+ # Default to JSON response format for structured output
48
+ llm_response_format :json
49
+
50
+ run_with :content_to_judge # the content to judge
51
+ run_with :additional_context # additional context to be provided to the judge
52
+
53
+ def default_llm_model_key
54
+ Raif.config.evals_default_llm_judge_model_key || super
55
+ end
56
+
57
+ def judgment_reasoning
58
+ parsed_response["reasoning"] if completed?
59
+ end
60
+
61
+ def judgment_confidence
62
+ parsed_response["confidence"] if completed?
63
+ end
64
+
65
+ def low_confidence?
66
+ judgment_confidence && judgment_confidence < 0.5
67
+ end
68
+ end
69
+ end
70
+ end
@@ -1,12 +1,50 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
6
44
  class Binary < Raif::Evals::LlmJudge
7
- task_run_arg :criteria
8
- task_run_arg :examples
9
- task_run_arg :strict_mode
45
+ run_with :criteria
46
+ run_with :examples
47
+ run_with :strict_mode
10
48
 
11
49
  json_response_schema do
12
50
  boolean :passes, description: "Whether the content passes the criteria"
@@ -1,12 +1,50 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
6
44
  class Comparative < Raif::Evals::LlmJudge
7
- task_run_arg :over_content # the content to compare against
8
- task_run_arg :comparison_criteria # the criteria to use when comparing content_to_judge to over_content
9
- task_run_arg :allow_ties # whether to allow ties in the comparison
45
+ run_with :over_content # the content to compare against
46
+ run_with :comparison_criteria # the criteria to use when comparing content_to_judge to over_content
47
+ run_with :allow_ties # whether to allow ties in the comparison
10
48
 
11
49
  attr_accessor :content_a, :content_b, :expected_winner
12
50
 
@@ -1,10 +1,48 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
6
44
  class Scored < Raif::Evals::LlmJudge
7
- task_run_arg :scoring_rubric # the scoring rubric to use when evaluating the content
45
+ run_with :scoring_rubric # the scoring rubric to use when evaluating the content
8
46
 
9
47
  json_response_schema do
10
48
  number :score, description: "Numerical score based on the rubric"
@@ -1,11 +1,49 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
6
44
  class Summarization < Raif::Evals::LlmJudge
7
- task_run_arg :original_content # the original content to evaluate the summary against
8
- task_run_arg :summary # the summary to evaluate against the original content
45
+ run_with :original_content # the original content to evaluate the summary against
46
+ run_with :summary # the summary to evaluate against the original content
9
47
 
10
48
  json_response_schema do
11
49
  object :coverage do
@@ -7,6 +7,7 @@ module Raif
7
7
 
8
8
  attr_accessor :key,
9
9
  :api_name,
10
+ :display_name,
10
11
  :default_temperature,
11
12
  :default_max_completion_tokens,
12
13
  :supports_native_tool_use,
@@ -25,6 +26,7 @@ module Raif
25
26
  def initialize(
26
27
  key:,
27
28
  api_name:,
29
+ display_name: nil,
28
30
  model_provider_settings: {},
29
31
  supported_provider_managed_tools: [],
30
32
  supports_native_tool_use: true,
@@ -35,6 +37,7 @@ module Raif
35
37
  )
36
38
  @key = key
37
39
  @api_name = api_name
40
+ @display_name = display_name
38
41
  @provider_settings = model_provider_settings
39
42
  @supports_native_tool_use = supports_native_tool_use
40
43
  @default_temperature = temperature || 0.7
@@ -45,11 +48,11 @@ module Raif
45
48
  end
46
49
 
47
50
  def name
48
- I18n.t("raif.model_names.#{key}")
51
+ I18n.t("raif.model_names.#{key}", default: display_name || key.to_s.humanize)
49
52
  end
50
53
 
51
54
  def chat(message: nil, messages: nil, response_format: :text, available_model_tools: [], source: nil, system_prompt: nil, temperature: nil,
52
- max_completion_tokens: nil, &block)
55
+ max_completion_tokens: nil, tool_choice: nil, anthropic_prompt_caching_enabled: false, bedrock_prompt_caching_enabled: false, &block)
53
56
  unless response_format.is_a?(Symbol)
54
57
  raise ArgumentError,
55
58
  "Raif::Llm#chat - Invalid response format: #{response_format}. Must be a symbol (you passed #{response_format.class}) and be one of: #{VALID_RESPONSE_FORMATS.join(", ")}" # rubocop:disable Layout/LineLength
@@ -67,6 +70,19 @@ module Raif
67
70
  raise ArgumentError, "Raif::Llm#chat - You must provide either a message: or messages: argument, not both"
68
71
  end
69
72
 
73
+ # Normalize :required / "required" to the symbol form for validation
74
+ tool_choice = :required if tool_choice.to_s == "required"
75
+
76
+ if tool_choice == :required
77
+ if available_model_tools.blank?
78
+ raise ArgumentError,
79
+ "Raif::Llm#chat - tool_choice: :required requires at least one available model tool"
80
+ end
81
+ elsif tool_choice.present? && !available_model_tools.map(&:to_s).include?(tool_choice.to_s)
82
+ raise ArgumentError,
83
+ "Raif::Llm#chat - Invalid tool choice: #{tool_choice} is not included in the available model tools: #{available_model_tools.join(", ")}"
84
+ end
85
+
70
86
  unless Raif.config.llm_api_requests_enabled
71
87
  Raif.logger.warn("LLM API requests are disabled. Skipping request to #{api_name}.")
72
88
  return
@@ -87,20 +103,33 @@ module Raif
87
103
  temperature: temperature,
88
104
  max_completion_tokens: max_completion_tokens,
89
105
  available_model_tools: available_model_tools,
106
+ tool_choice: tool_choice&.to_s,
90
107
  stream_response: block_given?
91
108
  )
92
109
 
110
+ model_completion.anthropic_prompt_caching_enabled = anthropic_prompt_caching_enabled
111
+ model_completion.bedrock_prompt_caching_enabled = bedrock_prompt_caching_enabled
112
+
113
+ model_completion.started!
114
+
93
115
  retry_with_backoff(model_completion) do
94
116
  perform_model_completion!(model_completion, &block)
117
+ ensure_model_completion_present!(model_completion)
95
118
  end
96
119
 
120
+ model_completion.completed!
97
121
  model_completion
98
122
  rescue Raif::Errors::StreamingError => e
99
123
  Rails.logger.error("Raif streaming error -- code: #{e.code} -- type: #{e.type} -- message: #{e.message} -- event: #{e.event}")
124
+ model_completion&.record_failure!(e) unless model_completion&.failed?
100
125
  raise e
101
126
  rescue Faraday::Error => e
102
127
  Raif.logger.error("LLM API request failed (status: #{e.response_status}): #{e.message}")
103
128
  Raif.logger.error(e.response_body)
129
+ model_completion&.record_failure!(e) unless model_completion&.failed?
130
+ raise e
131
+ rescue StandardError => e
132
+ model_completion&.record_failure!(e) unless model_completion&.failed?
104
133
  raise e
105
134
  end
106
135
 
@@ -112,10 +141,52 @@ module Raif
112
141
  VALID_RESPONSE_FORMATS
113
142
  end
114
143
 
144
+ # Override in subclasses to indicate whether prompt_tokens reported by the
145
+ # provider already include cached tokens as a subset (OpenAI, Google,
146
+ # OpenRouter) or whether cached tokens are reported separately and are
147
+ # additive to prompt_tokens (Anthropic, Bedrock).
148
+ def self.prompt_tokens_include_cached_tokens?
149
+ true
150
+ end
151
+
152
+ # Multiplier applied to the base input_token_cost to derive the per-token
153
+ # cost for cache reads. Return nil when the provider has no cache pricing.
154
+ def self.cache_read_input_token_cost_multiplier
155
+ nil
156
+ end
157
+
158
+ # Multiplier applied to the base input_token_cost to derive the per-token
159
+ # cost for cache creation writes. Return nil when there is no write surcharge.
160
+ def self.cache_creation_input_token_cost_multiplier
161
+ nil
162
+ end
163
+
115
164
  def supports_provider_managed_tool?(tool_klass)
116
165
  supported_provider_managed_tools&.include?(tool_klass.to_s)
117
166
  end
118
167
 
168
+ # Build the tool_choice parameter to force a specific tool to be called.
169
+ # Each provider implements this to return the correct format.
170
+ # @param tool_name [String] The name of the tool to force
171
+ # @return [Hash] The tool_choice parameter for the provider's API
172
+ def build_forced_tool_choice(tool_name)
173
+ raise NotImplementedError, "#{self.class.name} must implement #build_forced_tool_choice"
174
+ end
175
+
176
+ # Build the tool_choice parameter to require the model to call any tool (but not a specific one).
177
+ # Each provider implements this to return the correct format.
178
+ # @return [Hash, String] The tool_choice parameter for the provider's API
179
+ def build_required_tool_choice
180
+ raise NotImplementedError, "#{self.class.name} must implement #build_required_tool_choice"
181
+ end
182
+
183
+ # Whether the provider can faithfully enforce tool_choice: :required for
184
+ # the given tool set. Override in subclasses when a provider can only
185
+ # enforce required tool use for some tool types.
186
+ def supports_faithful_required_tool_choice?(available_model_tools)
187
+ available_model_tools.present?
188
+ end
189
+
119
190
  def validate_provider_managed_tool_support!(tool)
120
191
  unless supports_provider_managed_tool?(tool)
121
192
  raise Raif::Errors::UnsupportedFeatureError,
@@ -125,6 +196,10 @@ module Raif
125
196
 
126
197
  private
127
198
 
199
+ def retriable_exceptions
200
+ Raif.config.llm_request_retriable_exceptions
201
+ end
202
+
128
203
  def retry_with_backoff(model_completion)
129
204
  retries = 0
130
205
  max_retries = Raif.config.llm_request_max_retries
@@ -133,11 +208,11 @@ module Raif
133
208
 
134
209
  begin
135
210
  yield
136
- rescue *Raif.config.llm_request_retriable_exceptions => e
211
+ rescue *retriable_exceptions => e
137
212
  retries += 1
138
213
  if retries <= max_retries
139
214
  delay = [base_delay * (2**(retries - 1)), max_delay].min
140
- Raif.logger.warn("Retrying LLM API request after error: #{e.message}. Attempt #{retries}/#{max_retries}. Waiting #{delay} seconds...")
215
+ log_retry(e, model_completion, retries, max_retries, delay)
141
216
  model_completion.increment!(:retry_count)
142
217
  sleep delay
143
218
  retry
@@ -148,10 +223,35 @@ module Raif
148
223
  end
149
224
  end
150
225
 
226
+ def log_retry(error, model_completion, attempt, max_retries, delay)
227
+ if error.is_a?(Raif::Errors::BlankResponseError)
228
+ has_reasoning = model_completion.response_array&.any? do |block|
229
+ block.is_a?(Hash) ? block.key?("reasoning_content") : block.respond_to?(:reasoning_content)
230
+ end
231
+ Raif.logger.warn(
232
+ "Blank response retry #{attempt}/#{max_retries} for #{api_name} " \
233
+ "(ModelCompletion##{model_completion.id}, source: #{model_completion.source_type}##{model_completion.source_id}, " \
234
+ "completion_tokens: #{model_completion.completion_tokens}, reasoning_content_present: #{has_reasoning}). " \
235
+ "Waiting #{delay} seconds..."
236
+ )
237
+ else
238
+ Raif.logger.warn("Retrying LLM API request after error: #{error.message}. Attempt #{attempt}/#{max_retries}. Waiting #{delay} seconds...")
239
+ end
240
+ end
241
+
151
242
  def streaming_response_type
152
243
  raise NotImplementedError, "#{self.class.name} must implement #streaming_response_type"
153
244
  end
154
245
 
246
+ def ensure_model_completion_present!(model_completion)
247
+ # response_array/raw provider data may still be present for debugging even when
248
+ # the normalized response has no text or tool calls.
249
+ return if model_completion.raw_response.present? || model_completion.response_tool_calls.present?
250
+
251
+ raise Raif::Errors::BlankResponseError,
252
+ "Model completion #{model_completion.id} returned no text response and no tool calls"
253
+ end
254
+
155
255
  def streaming_chunk_handler(model_completion, &block)
156
256
  return unless model_completion.stream_response?
157
257
 
@@ -3,6 +3,19 @@
3
3
  class Raif::Llms::Anthropic < Raif::Llm
4
4
  include Raif::Concerns::Llms::Anthropic::MessageFormatting
5
5
  include Raif::Concerns::Llms::Anthropic::ToolFormatting
6
+ include Raif::Concerns::Llms::Anthropic::ResponseToolCalls
7
+
8
+ def self.prompt_tokens_include_cached_tokens?
9
+ false
10
+ end
11
+
12
+ def self.cache_read_input_token_cost_multiplier
13
+ 0.1
14
+ end
15
+
16
+ def self.cache_creation_input_token_cost_multiplier
17
+ 1.25
18
+ end
6
19
 
7
20
  def perform_model_completion!(model_completion, &block)
8
21
  params = build_request_parameters(model_completion)
@@ -21,7 +34,7 @@ class Raif::Llms::Anthropic < Raif::Llm
21
34
  private
22
35
 
23
36
  def connection
24
- @connection ||= Faraday.new(url: "https://api.anthropic.com/v1") do |f|
37
+ @connection ||= Faraday.new(url: "https://api.anthropic.com/v1", request: Raif.default_request_options) do |f|
25
38
  f.headers["x-api-key"] = Raif.config.anthropic_api_key
26
39
  f.headers["anthropic-version"] = "2023-06-01"
27
40
  f.request :json
@@ -48,22 +61,33 @@ private
48
61
  model_completion.completion_tokens = response_json&.dig("usage", "output_tokens")
49
62
  model_completion.prompt_tokens = response_json&.dig("usage", "input_tokens")
50
63
  model_completion.total_tokens = model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i
64
+ model_completion.cache_read_input_tokens = response_json&.dig("usage", "cache_read_input_tokens")
65
+ model_completion.cache_creation_input_tokens = response_json&.dig("usage", "cache_creation_input_tokens")
51
66
  model_completion.save!
52
67
  end
53
68
 
54
69
  def build_request_parameters(model_completion)
55
70
  params = {
56
71
  model: model_completion.model_api_name,
57
- messages: model_completion.messages,
58
- temperature: (model_completion.temperature || default_temperature).to_f,
59
- max_tokens: model_completion.max_completion_tokens || default_max_completion_tokens
72
+ messages: model_completion.messages
60
73
  }
61
74
 
75
+ params[:temperature] = (model_completion.temperature || default_temperature).to_f if supports_temperature?
76
+ params[:max_tokens] = model_completion.max_completion_tokens || default_max_completion_tokens
77
+
62
78
  params[:system] = model_completion.system_prompt if model_completion.system_prompt.present?
79
+ params[:cache_control] = { type: "ephemeral" } if model_completion.anthropic_prompt_caching_enabled
63
80
 
64
81
  if supports_native_tool_use?
65
82
  tools = build_tools_parameter(model_completion)
66
83
  params[:tools] = tools unless tools.blank?
84
+
85
+ if model_completion.tool_choice == "required"
86
+ params[:tool_choice] = build_required_tool_choice
87
+ elsif model_completion.tool_choice.present?
88
+ tool_klass = model_completion.tool_choice.constantize
89
+ params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
90
+ end
67
91
  end
68
92
 
69
93
  params[:stream] = true if model_completion.stream_response?
@@ -71,6 +95,10 @@ private
71
95
  params
72
96
  end
73
97
 
98
+ def supports_temperature?
99
+ provider_settings.key?(:supports_temperature) ? provider_settings[:supports_temperature] : true
100
+ end
101
+
74
102
  def extract_text_response(resp)
75
103
  return if resp&.dig("content").blank?
76
104
 
@@ -92,24 +120,6 @@ private
92
120
  end
93
121
  end
94
122
 
95
- def extract_response_tool_calls(resp)
96
- return if resp&.dig("content").nil?
97
-
98
- # Find any tool_use content blocks
99
- tool_uses = resp&.dig("content")&.select do |content|
100
- content["type"] == "tool_use"
101
- end
102
-
103
- return if tool_uses.blank?
104
-
105
- tool_uses.map do |tool_use|
106
- {
107
- "name" => tool_use["name"],
108
- "arguments" => tool_use["input"]
109
- }
110
- end
111
- end
112
-
113
123
  def extract_citations(resp)
114
124
  return [] if resp&.dig("content").nil?
115
125