raif 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -7
  3. data/app/assets/builds/raif.css +4 -1
  4. data/app/assets/builds/raif_admin.css +52 -2
  5. data/app/assets/builds/raif_admin_sprockets.js +2709 -0
  6. data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
  7. data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
  8. data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
  9. data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
  10. data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
  11. data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
  12. data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
  13. data/app/assets/javascript/raif/controllers/conversations_controller.js +1 -1
  14. data/app/assets/javascript/raif_admin.js +23 -0
  15. data/app/assets/javascript/raif_admin_sprockets.js +24 -0
  16. data/app/assets/stylesheets/raif/admin/conversation.scss +16 -0
  17. data/app/assets/stylesheets/raif/conversations.scss +3 -0
  18. data/app/assets/stylesheets/raif.scss +2 -1
  19. data/app/assets/stylesheets/raif_admin.scss +50 -1
  20. data/app/controllers/raif/admin/agents_controller.rb +27 -1
  21. data/app/controllers/raif/admin/application_controller.rb +16 -0
  22. data/app/controllers/raif/admin/configs_controller.rb +95 -0
  23. data/app/controllers/raif/admin/llms_controller.rb +27 -0
  24. data/app/controllers/raif/admin/model_completions_controller.rb +24 -1
  25. data/app/controllers/raif/admin/model_tool_invocations_controller.rb +7 -1
  26. data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
  27. data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
  28. data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
  29. data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
  30. data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
  31. data/app/controllers/raif/admin/stats/model_tool_invocations_controller.rb +21 -0
  32. data/app/controllers/raif/admin/stats/tasks_controller.rb +15 -6
  33. data/app/controllers/raif/admin/stats_controller.rb +32 -3
  34. data/app/controllers/raif/admin/tasks_controller.rb +5 -0
  35. data/app/controllers/raif/conversation_entries_controller.rb +1 -0
  36. data/app/controllers/raif/conversations_controller.rb +10 -2
  37. data/app/helpers/raif/application_helper.rb +40 -0
  38. data/app/jobs/raif/conversation_entry_job.rb +8 -6
  39. data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
  40. data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
  41. data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
  42. data/app/models/raif/admin/task_stat.rb +7 -0
  43. data/app/models/raif/agent.rb +98 -6
  44. data/app/models/raif/agents/native_tool_calling_agent.rb +179 -52
  45. data/app/models/raif/application_record.rb +18 -0
  46. data/app/models/raif/concerns/agent_inference_stats.rb +35 -0
  47. data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
  48. data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
  49. data/app/models/raif/concerns/json_schema_definition.rb +54 -6
  50. data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
  51. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +34 -0
  52. data/app/models/raif/concerns/llms/anthropic/response_tool_calls.rb +24 -0
  53. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +8 -0
  54. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +43 -0
  55. data/app/models/raif/concerns/llms/bedrock/response_tool_calls.rb +26 -0
  56. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +8 -0
  57. data/app/models/raif/concerns/llms/google/message_formatting.rb +112 -0
  58. data/app/models/raif/concerns/llms/google/response_tool_calls.rb +32 -0
  59. data/app/models/raif/concerns/llms/google/tool_formatting.rb +76 -0
  60. data/app/models/raif/concerns/llms/message_formatting.rb +41 -5
  61. data/app/models/raif/concerns/llms/open_ai/json_schema_validation.rb +3 -3
  62. data/app/models/raif/concerns/llms/open_ai_completions/message_formatting.rb +22 -0
  63. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +22 -0
  64. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +8 -0
  65. data/app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb +17 -0
  66. data/app/models/raif/concerns/llms/open_ai_responses/response_tool_calls.rb +26 -0
  67. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +8 -0
  68. data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
  69. data/app/models/raif/concerns/run_with.rb +127 -0
  70. data/app/models/raif/conversation.rb +112 -8
  71. data/app/models/raif/conversation_entry.rb +38 -4
  72. data/app/models/raif/embedding_model.rb +2 -1
  73. data/app/models/raif/embedding_models/bedrock.rb +10 -1
  74. data/app/models/raif/embedding_models/google.rb +37 -0
  75. data/app/models/raif/embedding_models/open_ai.rb +1 -1
  76. data/app/models/raif/evals/llm_judge.rb +70 -0
  77. data/{lib → app/models}/raif/evals/llm_judges/binary.rb +41 -3
  78. data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +41 -3
  79. data/{lib → app/models}/raif/evals/llm_judges/scored.rb +39 -1
  80. data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +40 -2
  81. data/app/models/raif/llm.rb +104 -4
  82. data/app/models/raif/llms/anthropic.rb +32 -22
  83. data/app/models/raif/llms/bedrock.rb +64 -24
  84. data/app/models/raif/llms/google.rb +166 -0
  85. data/app/models/raif/llms/open_ai_base.rb +23 -5
  86. data/app/models/raif/llms/open_ai_completions.rb +14 -12
  87. data/app/models/raif/llms/open_ai_responses.rb +14 -17
  88. data/app/models/raif/llms/open_router.rb +16 -15
  89. data/app/models/raif/model_completion.rb +103 -1
  90. data/app/models/raif/model_tool.rb +55 -5
  91. data/app/models/raif/model_tool_invocation.rb +68 -6
  92. data/app/models/raif/model_tools/agent_final_answer.rb +2 -7
  93. data/app/models/raif/model_tools/provider_managed/code_execution.rb +4 -0
  94. data/app/models/raif/model_tools/provider_managed/image_generation.rb +4 -0
  95. data/app/models/raif/model_tools/provider_managed/web_search.rb +4 -0
  96. data/app/models/raif/prompt_studio_batch_run.rb +155 -0
  97. data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
  98. data/app/models/raif/streaming_responses/bedrock.rb +60 -1
  99. data/app/models/raif/streaming_responses/google.rb +71 -0
  100. data/app/models/raif/task.rb +85 -18
  101. data/app/models/raif/user_tool_invocation.rb +19 -0
  102. data/app/views/layouts/raif/admin.html.erb +43 -2
  103. data/app/views/raif/admin/agents/_agent.html.erb +9 -0
  104. data/app/views/raif/admin/agents/_conversation_message.html.erb +28 -6
  105. data/app/views/raif/admin/agents/index.html.erb +50 -0
  106. data/app/views/raif/admin/agents/show.html.erb +50 -1
  107. data/app/views/raif/admin/configs/show.html.erb +117 -0
  108. data/app/views/raif/admin/conversations/_conversation_entry.html.erb +29 -34
  109. data/app/views/raif/admin/conversations/show.html.erb +2 -0
  110. data/app/views/raif/admin/llms/index.html.erb +110 -0
  111. data/app/views/raif/admin/model_completions/_model_completion.html.erb +10 -5
  112. data/app/views/raif/admin/model_completions/index.html.erb +40 -1
  113. data/app/views/raif/admin/model_completions/show.html.erb +256 -84
  114. data/app/views/raif/admin/model_tool_invocations/index.html.erb +22 -1
  115. data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
  116. data/app/views/raif/admin/model_tools/_list.html.erb +16 -0
  117. data/app/views/raif/admin/model_tools/_model_tool.html.erb +36 -0
  118. data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
  119. data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
  120. data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
  121. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
  122. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
  123. data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
  124. data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
  125. data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
  126. data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
  127. data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
  128. data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
  129. data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
  130. data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
  131. data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
  132. data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
  133. data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
  134. data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
  135. data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
  136. data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
  137. data/app/views/raif/admin/stats/_stats_tile.html.erb +34 -0
  138. data/app/views/raif/admin/stats/index.html.erb +71 -88
  139. data/app/views/raif/admin/stats/model_tool_invocations/index.html.erb +43 -0
  140. data/app/views/raif/admin/stats/tasks/index.html.erb +20 -6
  141. data/app/views/raif/admin/tasks/_task.html.erb +1 -0
  142. data/app/views/raif/admin/tasks/index.html.erb +23 -6
  143. data/app/views/raif/admin/tasks/show.html.erb +56 -3
  144. data/app/views/raif/conversation_entries/_form.html.erb +3 -0
  145. data/app/views/raif/conversation_entries/_message.html.erb +10 -6
  146. data/app/views/raif/conversations/_conversation.html.erb +10 -0
  147. data/app/views/raif/conversations/_entry_processed.turbo_stream.erb +12 -0
  148. data/app/views/raif/conversations/index.html.erb +23 -0
  149. data/config/importmap.rb +8 -0
  150. data/config/locales/admin.en.yml +161 -1
  151. data/config/locales/en.yml +67 -4
  152. data/config/routes.rb +10 -0
  153. data/db/migrate/20250904194456_add_generating_entry_response_to_raif_conversations.rb +7 -0
  154. data/db/migrate/20250911125234_add_source_to_raif_tasks.rb +7 -0
  155. data/db/migrate/20251020005853_add_source_to_raif_agents.rb +7 -0
  156. data/db/migrate/20251020011346_rename_task_run_args_to_run_with.rb +7 -0
  157. data/db/migrate/20251020011405_add_run_with_to_raif_agents.rb +13 -0
  158. data/db/migrate/20251024160119_add_llm_messages_max_length_to_raif_conversations.rb +14 -0
  159. data/db/migrate/20251124185033_add_provider_tool_call_id_to_raif_model_tool_invocations.rb +7 -0
  160. data/db/migrate/20251128202941_add_tool_choice_to_raif_model_completions.rb +7 -0
  161. data/db/migrate/20260118144846_add_source_to_raif_conversations.rb +7 -0
  162. data/db/migrate/20260119000000_add_failure_tracking_to_raif_model_completions.rb +10 -0
  163. data/db/migrate/20260119000001_add_completed_at_to_raif_model_completions.rb +8 -0
  164. data/db/migrate/20260119000002_add_started_at_to_raif_model_completions.rb +8 -0
  165. data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
  166. data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
  167. data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
  168. data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
  169. data/lib/generators/raif/agent/agent_generator.rb +18 -0
  170. data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
  171. data/lib/generators/raif/agent/templates/application_agent.rb.tt +1 -1
  172. data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
  173. data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
  174. data/lib/generators/raif/conversation/templates/conversation.rb.tt +6 -0
  175. data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
  176. data/lib/generators/raif/install/templates/initializer.rb +117 -8
  177. data/lib/generators/raif/task/task_generator.rb +18 -0
  178. data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
  179. data/lib/generators/raif/task/templates/task.rb.tt +10 -9
  180. data/lib/raif/configuration.rb +47 -2
  181. data/lib/raif/embedding_model_registry.rb +8 -0
  182. data/lib/raif/engine.rb +24 -1
  183. data/lib/raif/errors/blank_response_error.rb +8 -0
  184. data/lib/raif/errors/instance_dependent_schema_error.rb +8 -0
  185. data/lib/raif/errors/prompt_template_error.rb +15 -0
  186. data/lib/raif/errors/streaming_error.rb +6 -3
  187. data/lib/raif/errors.rb +3 -0
  188. data/lib/raif/evals/run.rb +1 -0
  189. data/lib/raif/evals.rb +0 -6
  190. data/lib/raif/json_schema_builder.rb +14 -0
  191. data/lib/raif/llm_registry.rb +433 -42
  192. data/lib/raif/messages.rb +180 -0
  193. data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
  194. data/lib/raif/token_estimator.rb +28 -0
  195. data/lib/raif/version.rb +1 -1
  196. data/lib/raif.rb +11 -0
  197. data/lib/tasks/annotate_rb.rake +10 -0
  198. data/spec/support/rspec_helpers.rb +15 -9
  199. data/spec/support/test_task.rb +9 -0
  200. data/spec/support/test_template_task.rb +41 -0
  201. metadata +108 -15
  202. data/app/models/raif/agents/re_act_agent.rb +0 -127
  203. data/app/models/raif/agents/re_act_step.rb +0 -32
  204. data/app/models/raif/concerns/task_run_args.rb +0 -62
  205. data/lib/raif/evals/llm_judge.rb +0 -32
  206. /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
@@ -3,11 +3,22 @@
3
3
  class Raif::Llms::Bedrock < Raif::Llm
4
4
  include Raif::Concerns::Llms::Bedrock::MessageFormatting
5
5
  include Raif::Concerns::Llms::Bedrock::ToolFormatting
6
+ include Raif::Concerns::Llms::Bedrock::ResponseToolCalls
7
+
8
+ def self.prompt_tokens_include_cached_tokens?
9
+ false
10
+ end
11
+
12
+ def self.cache_read_input_token_cost_multiplier
13
+ 0.1
14
+ end
15
+
16
+ def self.cache_creation_input_token_cost_multiplier
17
+ 1.25
18
+ end
6
19
 
7
20
  def perform_model_completion!(model_completion, &block)
8
- if Raif.config.aws_bedrock_model_name_prefix.present?
9
- model_completion.model_api_name = "#{Raif.config.aws_bedrock_model_name_prefix}.#{model_completion.model_api_name}"
10
- end
21
+ model_completion.model_api_name = resolve_model_api_name(model_completion.model_api_name)
11
22
 
12
23
  params = build_request_parameters(model_completion)
13
24
 
@@ -38,10 +49,29 @@ class Raif::Llms::Bedrock < Raif::Llm
38
49
  private
39
50
 
40
51
  def bedrock_client
41
- @bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
52
+ @bedrock_client ||= begin
53
+ client_options = {
54
+ region: Raif.config.aws_bedrock_region,
55
+ max_attempts: 1
56
+ }
57
+
58
+ client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
59
+ client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
60
+
61
+ Aws::BedrockRuntime::Client.new(client_options)
62
+ end
63
+ end
64
+
65
+ def retriable_exceptions
66
+ super + [
67
+ Aws::BedrockRuntime::Errors::ServiceError,
68
+ Seahorse::Client::NetworkingError
69
+ ]
42
70
  end
43
71
 
44
72
  def update_model_completion(model_completion, resp)
73
+ return if resp.nil?
74
+
45
75
  model_completion.raw_response = if model_completion.response_format_json?
46
76
  extract_json_response(resp)
47
77
  else
@@ -53,6 +83,8 @@ private
53
83
  model_completion.completion_tokens = resp.usage.output_tokens
54
84
  model_completion.prompt_tokens = resp.usage.input_tokens
55
85
  model_completion.total_tokens = resp.usage.total_tokens
86
+ model_completion.cache_read_input_tokens = resp.usage.try(:cache_read_input_tokens)
87
+ model_completion.cache_creation_input_tokens = resp.usage.try(:cache_write_input_tokens)
56
88
  model_completion.save!
57
89
  end
58
90
 
@@ -72,6 +104,19 @@ private
72
104
  if supports_native_tool_use?
73
105
  tools = build_tools_parameter(model_completion)
74
106
  params[:tool_config] = tools unless tools.blank?
107
+
108
+ if model_completion.tool_choice == "required"
109
+ params[:tool_config][:tool_choice] = build_required_tool_choice
110
+ elsif model_completion.tool_choice.present?
111
+ tool_klass = model_completion.tool_choice.constantize
112
+ params[:tool_config][:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
113
+ end
114
+ end
115
+
116
+ if model_completion.bedrock_prompt_caching_enabled
117
+ cache_point = { cache_point: { type: "default" } }
118
+ params[:system] << cache_point if params[:system].present?
119
+ messages_param.last[:content] << cache_point.dup if messages_param.last.present?
75
120
  end
76
121
 
77
122
  params
@@ -121,26 +166,6 @@ private
121
166
  end
122
167
  end
123
168
 
124
- def extract_response_tool_calls(resp)
125
- # Get the message from the response object
126
- message = resp.output.message
127
- return if message.content.nil?
128
-
129
- # Find any tool_use blocks in the content array
130
- tool_uses = message.content.select do |content|
131
- content.respond_to?(:tool_use) && content.tool_use.present?
132
- end
133
-
134
- return if tool_uses.blank?
135
-
136
- tool_uses.map do |content|
137
- {
138
- "name" => content.tool_use.name,
139
- "arguments" => content.tool_use.input
140
- }
141
- end
142
- end
143
-
144
169
  def streaming_chunk_handler(model_completion, &block)
145
170
  return unless model_completion.stream_response?
146
171
 
@@ -162,4 +187,19 @@ private
162
187
  end
163
188
  end
164
189
 
190
+ def resolve_model_api_name(model_api_name)
191
+ api_name = model_api_name.to_s
192
+ prefix = Raif.config.aws_bedrock_model_name_prefix.to_s.presence
193
+
194
+ return api_name if prefix.blank?
195
+ return api_name if api_name.start_with?("#{prefix}.")
196
+
197
+ # Some Bedrock model IDs are provider IDs (not inference profile IDs),
198
+ # so they should not be prefixed.
199
+ return api_name if api_name.start_with?("openai.gpt-oss-")
200
+ return api_name if api_name.start_with?("deepseek.")
201
+
202
+ "#{prefix}.#{api_name}"
203
+ end
204
+
165
205
  end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Raif::Llms::Google < Raif::Llm
4
+ include Raif::Concerns::Llms::Google::MessageFormatting
5
+ include Raif::Concerns::Llms::Google::ToolFormatting
6
+ include Raif::Concerns::Llms::Google::ResponseToolCalls
7
+
8
+ def self.cache_read_input_token_cost_multiplier
9
+ 0.25
10
+ end
11
+
12
+ def perform_model_completion!(model_completion, &block)
13
+ params = build_request_parameters(model_completion)
14
+ endpoint = build_endpoint(model_completion)
15
+
16
+ response = connection.post(endpoint) do |req|
17
+ req.body = params
18
+ req.options.on_data = streaming_chunk_handler(model_completion, &block) if model_completion.stream_response?
19
+ end
20
+
21
+ unless model_completion.stream_response?
22
+ update_model_completion(model_completion, response.body)
23
+ end
24
+
25
+ model_completion
26
+ end
27
+
28
+ def supports_faithful_required_tool_choice?(available_model_tools)
29
+ super && Array(available_model_tools).none? do |tool|
30
+ tool_class = tool.is_a?(String) ? tool.constantize : tool
31
+ tool_class.provider_managed?
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def connection
38
+ @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
39
+ f.headers["x-goog-api-key"] = Raif.config.google_api_key
40
+ f.request :json
41
+ f.response :json
42
+ f.response :raise_error
43
+ end
44
+ end
45
+
46
+ def build_endpoint(model_completion)
47
+ if model_completion.stream_response?
48
+ "models/#{model_completion.model_api_name}:streamGenerateContent?alt=sse"
49
+ else
50
+ "models/#{model_completion.model_api_name}:generateContent"
51
+ end
52
+ end
53
+
54
+ def streaming_response_type
55
+ Raif::StreamingResponses::Google
56
+ end
57
+
58
+ def update_model_completion(model_completion, response_json)
59
+ model_completion.raw_response = if model_completion.response_format_json?
60
+ extract_json_response(response_json)
61
+ else
62
+ extract_text_response(response_json)
63
+ end
64
+
65
+ model_completion.response_array = response_json&.dig("candidates", 0, "content", "parts")
66
+ model_completion.response_tool_calls = extract_response_tool_calls(response_json)
67
+ model_completion.citations = extract_citations(response_json)
68
+ model_completion.completion_tokens = response_json&.dig("usageMetadata", "candidatesTokenCount")
69
+ model_completion.prompt_tokens = response_json&.dig("usageMetadata", "promptTokenCount")
70
+ model_completion.total_tokens = response_json&.dig("usageMetadata", "totalTokenCount") ||
71
+ (model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i)
72
+ model_completion.cache_read_input_tokens = response_json&.dig("usageMetadata", "cachedContentTokenCount")
73
+ model_completion.save!
74
+ end
75
+
76
+ def build_request_parameters(model_completion)
77
+ params = {
78
+ contents: model_completion.messages
79
+ }
80
+
81
+ if model_completion.system_prompt.present?
82
+ params[:system_instruction] = { parts: [{ text: model_completion.system_prompt }] }
83
+ end
84
+
85
+ params[:generationConfig] = build_generation_config(model_completion)
86
+
87
+ if supports_native_tool_use?
88
+ tools = build_tools_parameter(model_completion)
89
+ params[:tools] = tools unless tools.blank?
90
+
91
+ if model_completion.tool_choice == "required"
92
+ if supports_faithful_required_tool_choice?(model_completion.available_model_tools)
93
+ params[:toolConfig] = { functionCallingConfig: build_required_tool_choice }
94
+ else
95
+ log_required_tool_choice_fallback(model_completion)
96
+ end
97
+ elsif model_completion.tool_choice.present?
98
+ tool_klass = model_completion.tool_choice.constantize
99
+ params[:toolConfig] = { functionCallingConfig: build_forced_tool_choice(tool_klass.tool_name) }
100
+ end
101
+ end
102
+
103
+ params
104
+ end
105
+
106
+ def log_required_tool_choice_fallback(model_completion)
107
+ Raif.logger.warn(
108
+ "Google AI cannot faithfully enforce tool_choice: :required when provider-managed tools are present. " \
109
+ "Falling back to runtime validation for #{model_completion.model_api_name} " \
110
+ "(tools: #{model_completion.available_model_tools_map.keys.join(", ")})"
111
+ )
112
+ end
113
+
114
+ def build_generation_config(model_completion)
115
+ config = {}
116
+
117
+ temperature = model_completion.temperature || default_temperature
118
+ config[:temperature] = temperature.to_f if temperature.present?
119
+
120
+ max_tokens = model_completion.max_completion_tokens || default_max_completion_tokens
121
+ config[:maxOutputTokens] = max_tokens if max_tokens.present?
122
+
123
+ # Use native JSON schema support for structured output
124
+ if model_completion.response_format_json? && model_completion.json_response_schema.present?
125
+ config[:responseMimeType] = "application/json"
126
+ config[:responseSchema] = sanitize_schema_for_google(model_completion.json_response_schema)
127
+ end
128
+
129
+ config
130
+ end
131
+
132
+ def extract_text_response(resp)
133
+ parts = resp&.dig("candidates", 0, "content", "parts")
134
+ return if parts.blank?
135
+
136
+ parts.select { |p| p.key?("text") }.map { |p| p["text"] }.join
137
+ end
138
+
139
+ def extract_json_response(resp)
140
+ # Google AI supports native JSON schema output, so the response should be in the text field
141
+ extract_text_response(resp)
142
+ end
143
+
144
+ def extract_citations(resp)
145
+ # Google AI returns grounding metadata for search results
146
+ grounding_metadata = resp&.dig("candidates", 0, "groundingMetadata")
147
+ return [] if grounding_metadata.blank?
148
+
149
+ citations = []
150
+
151
+ # Extract from grounding chunks
152
+ grounding_chunks = grounding_metadata["groundingChunks"] || []
153
+ grounding_chunks.each do |chunk|
154
+ web = chunk["web"]
155
+ next unless web.present?
156
+
157
+ citations << {
158
+ "url" => Raif::Utils::HtmlFragmentProcessor.strip_tracking_parameters(web["uri"]),
159
+ "title" => web["title"]
160
+ }
161
+ end
162
+
163
+ citations.uniq { |citation| citation["url"] }
164
+ end
165
+
166
+ end
@@ -3,6 +3,10 @@
3
3
  class Raif::Llms::OpenAiBase < Raif::Llm
4
4
  include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
5
5
 
6
+ def self.cache_read_input_token_cost_multiplier
7
+ 0.5
8
+ end
9
+
6
10
  def perform_model_completion!(model_completion, &block)
7
11
  if supports_temperature?
8
12
  model_completion.temperature ||= default_temperature
@@ -28,11 +32,25 @@ class Raif::Llms::OpenAiBase < Raif::Llm
28
32
  private
29
33
 
30
34
  def connection
31
- @connection ||= Faraday.new(url: "https://api.openai.com/v1") do |f|
32
- f.headers["Authorization"] = "Bearer #{Raif.config.open_ai_api_key}"
33
- f.request :json
34
- f.response :json
35
- f.response :raise_error
35
+ @connection ||= begin
36
+ conn = Faraday.new(url: Raif.config.open_ai_base_url, request: Raif.default_request_options) do |f|
37
+ case Raif.config.open_ai_auth_header_style
38
+ when :bearer
39
+ f.headers["Authorization"] = "Bearer #{Raif.config.open_ai_api_key}"
40
+ when :api_key
41
+ f.headers["api-key"] = Raif.config.open_ai_api_key
42
+ else
43
+ raise Raif::Errors::InvalidConfigError,
44
+ "Raif.config.open_ai_auth_header_style must be either :bearer or :api_key"
45
+ end
46
+
47
+ f.request :json
48
+ f.response :json
49
+ f.response :raise_error
50
+ end
51
+
52
+ conn.params["api-version"] = Raif.config.open_ai_api_version if Raif.config.open_ai_api_version.present?
53
+ conn
36
54
  end
37
55
  end
38
56
 
@@ -3,6 +3,7 @@
3
3
  class Raif::Llms::OpenAiCompletions < Raif::Llms::OpenAiBase
4
4
  include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
5
5
  include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
6
+ include Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
6
7
 
7
8
  private
8
9
 
@@ -15,6 +16,8 @@ private
15
16
  end
16
17
 
17
18
  def update_model_completion(model_completion, response_json)
19
+ return if response_json.nil?
20
+
18
21
  model_completion.update!(
19
22
  response_id: response_json["id"],
20
23
  response_tool_calls: extract_response_tool_calls(response_json),
@@ -22,21 +25,11 @@ private
22
25
  response_array: response_json["choices"],
23
26
  completion_tokens: response_json.dig("usage", "completion_tokens"),
24
27
  prompt_tokens: response_json.dig("usage", "prompt_tokens"),
25
- total_tokens: response_json.dig("usage", "total_tokens")
28
+ total_tokens: response_json.dig("usage", "total_tokens"),
29
+ cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
26
30
  )
27
31
  end
28
32
 
29
- def extract_response_tool_calls(resp)
30
- return if resp.dig("choices", 0, "message", "tool_calls").blank?
31
-
32
- resp.dig("choices", 0, "message", "tool_calls").map do |tool_call|
33
- {
34
- "name" => tool_call["function"]["name"],
35
- "arguments" => JSON.parse(tool_call["function"]["arguments"])
36
- }
37
- end
38
- end
39
-
40
33
  def build_request_parameters(model_completion)
41
34
  formatted_system_prompt = format_system_prompt(model_completion)
42
35
 
@@ -60,6 +53,15 @@ private
60
53
  if supports_native_tool_use?
61
54
  tools = build_tools_parameter(model_completion)
62
55
  parameters[:tools] = tools unless tools.blank?
56
+
57
+ if model_completion.tool_choice == "required"
58
+ parameters[:tool_choice] = build_required_tool_choice
59
+ parameters[:parallel_tool_calls] = false unless tools.blank?
60
+ elsif model_completion.tool_choice.present?
61
+ tool_klass = model_completion.tool_choice.constantize
62
+ parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
63
+ parameters[:parallel_tool_calls] = false unless tools.blank?
64
+ end
63
65
  end
64
66
 
65
67
  if model_completion.stream_response?
@@ -3,6 +3,7 @@
3
3
  class Raif::Llms::OpenAiResponses < Raif::Llms::OpenAiBase
4
4
  include Raif::Concerns::Llms::OpenAiResponses::MessageFormatting
5
5
  include Raif::Concerns::Llms::OpenAiResponses::ToolFormatting
6
+ include Raif::Concerns::Llms::OpenAiResponses::ResponseToolCalls
6
7
 
7
8
  private
8
9
 
@@ -15,6 +16,8 @@ private
15
16
  end
16
17
 
17
18
  def update_model_completion(model_completion, response_json)
19
+ return if response_json.nil?
20
+
18
21
  model_completion.update!(
19
22
  response_id: response_json["id"],
20
23
  response_tool_calls: extract_response_tool_calls(response_json),
@@ -23,26 +26,11 @@ private
23
26
  citations: extract_citations(response_json),
24
27
  completion_tokens: response_json.dig("usage", "output_tokens"),
25
28
  prompt_tokens: response_json.dig("usage", "input_tokens"),
26
- total_tokens: response_json.dig("usage", "total_tokens")
29
+ total_tokens: response_json.dig("usage", "total_tokens"),
30
+ cache_read_input_tokens: response_json.dig("usage", "input_tokens_details", "cached_tokens")
27
31
  )
28
32
  end
29
33
 
30
- def extract_response_tool_calls(resp)
31
- return if resp["output"].blank?
32
-
33
- tool_calls = []
34
- resp["output"].each do |output_item|
35
- next unless output_item["type"] == "function_call"
36
-
37
- tool_calls << {
38
- "name" => output_item["name"],
39
- "arguments" => JSON.parse(output_item["arguments"])
40
- }
41
- end
42
-
43
- tool_calls.any? ? tool_calls : nil
44
- end
45
-
46
34
  def extract_raw_response(resp)
47
35
  text_outputs = []
48
36
 
@@ -110,6 +98,15 @@ private
110
98
  if supports_native_tool_use?
111
99
  tools = build_tools_parameter(model_completion)
112
100
  parameters[:tools] = tools unless tools.blank?
101
+
102
+ if model_completion.tool_choice == "required"
103
+ parameters[:tool_choice] = build_required_tool_choice
104
+ parameters[:parallel_tool_calls] = false unless tools.blank?
105
+ elsif model_completion.tool_choice.present?
106
+ tool_klass = model_completion.tool_choice.constantize
107
+ parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
108
+ parameters[:parallel_tool_calls] = false unless tools.blank?
109
+ end
113
110
  end
114
111
 
115
112
  # Add response format if needed. Default will be { "type": "text" }
@@ -3,6 +3,7 @@
3
3
  class Raif::Llms::OpenRouter < Raif::Llm
4
4
  include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
5
5
  include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
6
+ include Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
6
7
  include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
7
8
 
8
9
  def perform_model_completion!(model_completion, &block)
@@ -23,7 +24,7 @@ class Raif::Llms::OpenRouter < Raif::Llm
23
24
  private
24
25
 
25
26
  def connection
26
- @connection ||= Faraday.new(url: "https://openrouter.ai/api/v1") do |f|
27
+ @connection ||= Faraday.new(url: "https://openrouter.ai/api/v1", request: Raif.default_request_options) do |f|
27
28
  f.headers["Authorization"] = "Bearer #{Raif.config.open_router_api_key}"
28
29
  f.headers["HTTP-Referer"] = Raif.config.open_router_site_url if Raif.config.open_router_site_url.present?
29
30
  f.headers["X-Title"] = Raif.config.open_router_app_name if Raif.config.open_router_app_name.present?
@@ -38,6 +39,8 @@ private
38
39
  end
39
40
 
40
41
  def update_model_completion(model_completion, response_json)
42
+ return if response_json.nil?
43
+
41
44
  raw_response = if model_completion.response_format_json?
42
45
  extract_json_response(response_json)
43
46
  else
@@ -51,7 +54,8 @@ private
51
54
  response_array: response_json["choices"],
52
55
  completion_tokens: response_json.dig("usage", "completion_tokens"),
53
56
  prompt_tokens: response_json.dig("usage", "prompt_tokens"),
54
- total_tokens: response_json.dig("usage", "total_tokens")
57
+ total_tokens: response_json.dig("usage", "total_tokens"),
58
+ cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
55
59
  )
56
60
  end
57
61
 
@@ -85,6 +89,15 @@ private
85
89
  end
86
90
 
87
91
  params[:tools] = tools unless tools.blank?
92
+
93
+ if model_completion.tool_choice == "required"
94
+ params[:tool_choice] = build_required_tool_choice
95
+ params[:parallel_tool_calls] = false unless tools.blank?
96
+ elsif model_completion.tool_choice.present?
97
+ tool_klass = model_completion.tool_choice.constantize
98
+ params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
99
+ params[:parallel_tool_calls] = false unless tools.blank?
100
+ end
88
101
  end
89
102
 
90
103
  if model_completion.stream_response?
@@ -108,7 +121,7 @@ private
108
121
  end
109
122
 
110
123
  def extract_json_response(resp)
111
- tool_calls = resp.dig("choices", 0, "message", "tool_calls")
124
+ tool_calls = resp&.dig("choices", 0, "message", "tool_calls")
112
125
  return extract_text_response(resp) if tool_calls.blank?
113
126
 
114
127
  tool_response = tool_calls.find do |tool_call|
@@ -121,16 +134,4 @@ private
121
134
  extract_text_response(resp)
122
135
  end
123
136
  end
124
-
125
- def extract_response_tool_calls(resp)
126
- tool_calls = resp.dig("choices", 0, "message", "tool_calls")
127
- return if tool_calls.blank?
128
-
129
- tool_calls.map do |tool_call|
130
- {
131
- "name" => tool_call["function"]["name"],
132
- "arguments" => JSON.parse(tool_call["function"]["arguments"])
133
- }
134
- end
135
- end
136
137
  end
@@ -1,14 +1,74 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_model_completions
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # cache_creation_input_tokens :integer
10
+ # cache_read_input_tokens :integer
11
+ # citations :jsonb
12
+ # completed_at :datetime
13
+ # completion_tokens :integer
14
+ # failed_at :datetime
15
+ # failure_error :string
16
+ # failure_reason :text
17
+ # llm_model_key :string not null
18
+ # max_completion_tokens :integer
19
+ # messages :jsonb not null
20
+ # model_api_name :string not null
21
+ # output_token_cost :decimal(10, 6)
22
+ # prompt_token_cost :decimal(10, 6)
23
+ # prompt_tokens :integer
24
+ # raw_response :text
25
+ # response_array :jsonb
26
+ # response_format :integer default("text"), not null
27
+ # response_format_parameter :string
28
+ # response_tool_calls :jsonb
29
+ # retry_count :integer default(0), not null
30
+ # source_type :string
31
+ # started_at :datetime
32
+ # stream_response :boolean default(FALSE), not null
33
+ # system_prompt :text
34
+ # temperature :decimal(5, 3)
35
+ # tool_choice :string
36
+ # total_cost :decimal(10, 6)
37
+ # total_tokens :integer
38
+ # created_at :datetime not null
39
+ # updated_at :datetime not null
40
+ # response_id :string
41
+ # source_id :bigint
42
+ #
43
+ # Indexes
44
+ #
45
+ # index_raif_model_completions_on_completed_at (completed_at)
46
+ # index_raif_model_completions_on_created_at (created_at)
47
+ # index_raif_model_completions_on_failed_at (failed_at)
48
+ # index_raif_model_completions_on_source (source_type,source_id)
49
+ # index_raif_model_completions_on_started_at (started_at)
50
+ #
3
51
  class Raif::ModelCompletion < Raif::ApplicationRecord
4
52
  include Raif::Concerns::LlmResponseParsing
5
53
  include Raif::Concerns::HasAvailableModelTools
54
+ include Raif::Concerns::HasRuntimeDuration
55
+ include Raif::Concerns::ProviderManagedToolCalls
56
+ include Raif::Concerns::BooleanTimestamp
57
+
58
+ attr_accessor :anthropic_prompt_caching_enabled, :bedrock_prompt_caching_enabled
59
+
60
+ boolean_timestamp :started_at
61
+ boolean_timestamp :completed_at
62
+ boolean_timestamp :failed_at
6
63
 
7
64
  belongs_to :source, polymorphic: true, optional: true
8
65
 
9
66
  validates :llm_model_key, presence: true, inclusion: { in: ->{ Raif.available_llm_keys.map(&:to_s) } }
10
67
  validates :model_api_name, presence: true
11
68
 
69
+ # Scope to find completions that have response tool calls
70
+ scope :with_response_tool_calls, -> { where_json_not_blank(:response_tool_calls) }
71
+
12
72
  delegate :json_response_schema, to: :source, allow_nil: true
13
73
 
14
74
  before_save :set_total_tokens
@@ -28,8 +88,12 @@ class Raif::ModelCompletion < Raif::ApplicationRecord
28
88
  end
29
89
 
30
90
  def calculate_costs
91
+ # Each retry resends the same prompt, so the provider charges input tokens
92
+ # for every attempt. Factor in retry_count to reflect actual billing.
93
+ total_attempts = (retry_count || 0) + 1
94
+
31
95
  if prompt_tokens.present? && llm_config[:input_token_cost].present?
32
- self.prompt_token_cost = llm_config[:input_token_cost] * prompt_tokens
96
+ self.prompt_token_cost = calculate_prompt_token_cost(total_attempts)
33
97
  end
34
98
 
35
99
  if completion_tokens.present? && llm_config[:output_token_cost].present?
@@ -41,8 +105,46 @@ class Raif::ModelCompletion < Raif::ApplicationRecord
41
105
  end
42
106
  end
43
107
 
108
+ def record_failure!(exception)
109
+ self.failed_at = Time.current
110
+ self.failure_error = exception.class.name
111
+ self.failure_reason = exception.message.truncate(255)
112
+ save!
113
+ end
114
+
44
115
  private
45
116
 
117
+ def calculate_prompt_token_cost(total_attempts)
118
+ input_cost = llm_config[:input_token_cost]
119
+ llm_class = llm_config[:llm_class]
120
+ cache_read_multiplier = llm_class&.cache_read_input_token_cost_multiplier
121
+ cache_creation_multiplier = llm_class&.cache_creation_input_token_cost_multiplier
122
+ cached_reads = cache_read_input_tokens.to_i
123
+ cached_writes = cache_creation_input_tokens.to_i
124
+
125
+ if cached_reads > 0 && cache_read_multiplier.present?
126
+ cache_read_cost = input_cost * cache_read_multiplier
127
+
128
+ if llm_class.prompt_tokens_include_cached_tokens?
129
+ # OpenAI / Google / OpenRouter: cached tokens are a subset of prompt_tokens
130
+ non_cached = prompt_tokens - cached_reads
131
+ cost = (non_cached * input_cost) + (cached_reads * cache_read_cost)
132
+ else
133
+ # Anthropic / Bedrock: cached tokens are separate from prompt_tokens
134
+ cost = (prompt_tokens * input_cost) + (cached_reads * cache_read_cost)
135
+ end
136
+ else
137
+ cost = prompt_tokens * input_cost
138
+ end
139
+
140
+ # Cache creation surcharge (Anthropic / Bedrock)
141
+ if cached_writes > 0 && cache_creation_multiplier.present?
142
+ cost += cached_writes * input_cost * cache_creation_multiplier
143
+ end
144
+
145
+ cost * total_attempts
146
+ end
147
+
46
148
  def llm_config
47
149
  @llm_config ||= Raif.llm_config(llm_model_key.to_sym)
48
150
  end