raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/app/assets/builds/raif_admin.css +40 -2
  4. data/app/assets/builds/raif_admin_sprockets.js +2709 -0
  5. data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
  6. data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
  7. data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
  8. data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
  9. data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
  10. data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
  11. data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
  12. data/app/assets/javascript/raif_admin.js +23 -0
  13. data/app/assets/javascript/raif_admin_sprockets.js +24 -0
  14. data/app/assets/stylesheets/raif_admin.scss +50 -1
  15. data/app/controllers/raif/admin/agents_controller.rb +27 -1
  16. data/app/controllers/raif/admin/configs_controller.rb +1 -0
  17. data/app/controllers/raif/admin/llms_controller.rb +27 -0
  18. data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
  19. data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
  20. data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
  21. data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
  22. data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
  23. data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
  24. data/app/controllers/raif/admin/tasks_controller.rb +5 -0
  25. data/app/helpers/raif/application_helper.rb +40 -0
  26. data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
  27. data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
  28. data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
  29. data/app/models/raif/agent.rb +36 -5
  30. data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
  31. data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
  32. data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
  33. data/app/models/raif/concerns/json_schema_definition.rb +16 -3
  34. data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
  35. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
  36. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
  37. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
  38. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
  39. data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
  40. data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
  41. data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
  42. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
  43. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
  44. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
  45. data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
  46. data/app/models/raif/conversation.rb +24 -3
  47. data/app/models/raif/conversation_entry.rb +6 -3
  48. data/app/models/raif/embedding_models/bedrock.rb +10 -1
  49. data/app/models/raif/embedding_models/google.rb +37 -0
  50. data/app/models/raif/evals/llm_judge.rb +70 -0
  51. data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
  52. data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
  53. data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
  54. data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
  55. data/app/models/raif/llm.rb +82 -7
  56. data/app/models/raif/llms/anthropic.rb +26 -4
  57. data/app/models/raif/llms/bedrock.rb +59 -5
  58. data/app/models/raif/llms/google.rb +28 -2
  59. data/app/models/raif/llms/open_ai_base.rb +4 -0
  60. data/app/models/raif/llms/open_ai_completions.rb +9 -2
  61. data/app/models/raif/llms/open_ai_responses.rb +9 -2
  62. data/app/models/raif/llms/open_router.rb +10 -3
  63. data/app/models/raif/model_completion.rb +75 -34
  64. data/app/models/raif/model_tool.rb +45 -3
  65. data/app/models/raif/model_tool_invocation.rb +31 -1
  66. data/app/models/raif/prompt_studio_batch_run.rb +155 -0
  67. data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
  68. data/app/models/raif/streaming_responses/bedrock.rb +60 -1
  69. data/app/models/raif/task.rb +30 -6
  70. data/app/views/layouts/raif/admin.html.erb +31 -1
  71. data/app/views/raif/admin/agents/_agent.html.erb +1 -0
  72. data/app/views/raif/admin/agents/index.html.erb +48 -0
  73. data/app/views/raif/admin/agents/show.html.erb +4 -0
  74. data/app/views/raif/admin/llms/index.html.erb +110 -0
  75. data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
  76. data/app/views/raif/admin/model_completions/index.html.erb +14 -1
  77. data/app/views/raif/admin/model_completions/show.html.erb +164 -55
  78. data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
  79. data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
  80. data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
  81. data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
  82. data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
  83. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
  84. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
  85. data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
  86. data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
  87. data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
  88. data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
  89. data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
  90. data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
  91. data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
  92. data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
  93. data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
  94. data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
  95. data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
  96. data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
  97. data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
  98. data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
  99. data/app/views/raif/admin/tasks/_task.html.erb +1 -0
  100. data/app/views/raif/admin/tasks/index.html.erb +17 -5
  101. data/app/views/raif/admin/tasks/show.html.erb +20 -0
  102. data/app/views/raif/conversation_entries/_message.html.erb +10 -6
  103. data/config/importmap.rb +8 -0
  104. data/config/locales/admin.en.yml +128 -0
  105. data/config/locales/en.yml +36 -2
  106. data/config/routes.rb +8 -0
  107. data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
  108. data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
  109. data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
  110. data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
  111. data/lib/generators/raif/agent/agent_generator.rb +18 -0
  112. data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
  113. data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
  114. data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
  115. data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
  116. data/lib/generators/raif/install/templates/initializer.rb +68 -27
  117. data/lib/generators/raif/task/task_generator.rb +18 -0
  118. data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
  119. data/lib/generators/raif/task/templates/task.rb.tt +9 -8
  120. data/lib/raif/configuration.rb +10 -0
  121. data/lib/raif/embedding_model_registry.rb +8 -0
  122. data/lib/raif/engine.rb +16 -1
  123. data/lib/raif/errors/blank_response_error.rb +8 -0
  124. data/lib/raif/errors/prompt_template_error.rb +15 -0
  125. data/lib/raif/errors.rb +2 -0
  126. data/lib/raif/evals.rb +0 -6
  127. data/lib/raif/llm_registry.rb +230 -9
  128. data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
  129. data/lib/raif/token_estimator.rb +28 -0
  130. data/lib/raif/version.rb +1 -1
  131. data/lib/raif.rb +2 -0
  132. data/spec/support/rspec_helpers.rb +7 -1
  133. data/spec/support/test_task.rb +9 -0
  134. data/spec/support/test_template_task.rb +41 -0
  135. metadata +65 -7
  136. data/lib/raif/evals/llm_judge.rb +0 -32
  137. /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
@@ -1,5 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
@@ -1,5 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
@@ -1,5 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
@@ -1,5 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
3
41
  module Raif
4
42
  module Evals
5
43
  module LlmJudges
@@ -52,7 +52,7 @@ module Raif
52
52
  end
53
53
 
54
54
  def chat(message: nil, messages: nil, response_format: :text, available_model_tools: [], source: nil, system_prompt: nil, temperature: nil,
55
- max_completion_tokens: nil, tool_choice: nil, &block)
55
+ max_completion_tokens: nil, tool_choice: nil, anthropic_prompt_caching_enabled: false, bedrock_prompt_caching_enabled: false, &block)
56
56
  unless response_format.is_a?(Symbol)
57
57
  raise ArgumentError,
58
58
  "Raif::Llm#chat - Invalid response format: #{response_format}. Must be a symbol (you passed #{response_format.class}) and be one of: #{VALID_RESPONSE_FORMATS.join(", ")}" # rubocop:disable Layout/LineLength
@@ -70,7 +70,15 @@ module Raif
70
70
  raise ArgumentError, "Raif::Llm#chat - You must provide either a message: or messages: argument, not both"
71
71
  end
72
72
 
73
- if tool_choice.present? && !available_model_tools.map(&:to_s).include?(tool_choice.to_s)
73
+ # Normalize :required / "required" to the symbol form for validation
74
+ tool_choice = :required if tool_choice.to_s == "required"
75
+
76
+ if tool_choice == :required
77
+ if available_model_tools.blank?
78
+ raise ArgumentError,
79
+ "Raif::Llm#chat - tool_choice: :required requires at least one available model tool"
80
+ end
81
+ elsif tool_choice.present? && !available_model_tools.map(&:to_s).include?(tool_choice.to_s)
74
82
  raise ArgumentError,
75
83
  "Raif::Llm#chat - Invalid tool choice: #{tool_choice} is not included in the available model tools: #{available_model_tools.join(", ")}"
76
84
  end
@@ -99,25 +107,29 @@ module Raif
99
107
  stream_response: block_given?
100
108
  )
101
109
 
110
+ model_completion.anthropic_prompt_caching_enabled = anthropic_prompt_caching_enabled
111
+ model_completion.bedrock_prompt_caching_enabled = bedrock_prompt_caching_enabled
112
+
102
113
  model_completion.started!
103
114
 
104
115
  retry_with_backoff(model_completion) do
105
116
  perform_model_completion!(model_completion, &block)
117
+ ensure_model_completion_present!(model_completion)
106
118
  end
107
119
 
108
120
  model_completion.completed!
109
121
  model_completion
110
122
  rescue Raif::Errors::StreamingError => e
111
123
  Rails.logger.error("Raif streaming error -- code: #{e.code} -- type: #{e.type} -- message: #{e.message} -- event: #{e.event}")
112
- model_completion&.record_failure!(e)
124
+ model_completion&.record_failure!(e) unless model_completion&.failed?
113
125
  raise e
114
126
  rescue Faraday::Error => e
115
127
  Raif.logger.error("LLM API request failed (status: #{e.response_status}): #{e.message}")
116
128
  Raif.logger.error(e.response_body)
117
- model_completion&.record_failure!(e)
129
+ model_completion&.record_failure!(e) unless model_completion&.failed?
118
130
  raise e
119
131
  rescue StandardError => e
120
- model_completion&.record_failure!(e)
132
+ model_completion&.record_failure!(e) unless model_completion&.failed?
121
133
  raise e
122
134
  end
123
135
 
@@ -129,6 +141,26 @@ module Raif
129
141
  VALID_RESPONSE_FORMATS
130
142
  end
131
143
 
144
+ # Override in subclasses to indicate whether prompt_tokens reported by the
145
+ # provider already include cached tokens as a subset (OpenAI, Google,
146
+ # OpenRouter) or whether cached tokens are reported separately and are
147
+ # additive to prompt_tokens (Anthropic, Bedrock).
148
+ def self.prompt_tokens_include_cached_tokens?
149
+ true
150
+ end
151
+
152
+ # Multiplier applied to the base input_token_cost to derive the per-token
153
+ # cost for cache reads. Return nil when the provider has no cache pricing.
154
+ def self.cache_read_input_token_cost_multiplier
155
+ nil
156
+ end
157
+
158
+ # Multiplier applied to the base input_token_cost to derive the per-token
159
+ # cost for cache creation writes. Return nil when there is no write surcharge.
160
+ def self.cache_creation_input_token_cost_multiplier
161
+ nil
162
+ end
163
+
132
164
  def supports_provider_managed_tool?(tool_klass)
133
165
  supported_provider_managed_tools&.include?(tool_klass.to_s)
134
166
  end
@@ -141,6 +173,20 @@ module Raif
141
173
  raise NotImplementedError, "#{self.class.name} must implement #build_forced_tool_choice"
142
174
  end
143
175
 
176
+ # Build the tool_choice parameter to require the model to call any tool (but not a specific one).
177
+ # Each provider implements this to return the correct format.
178
+ # @return [Hash, String] The tool_choice parameter for the provider's API
179
+ def build_required_tool_choice
180
+ raise NotImplementedError, "#{self.class.name} must implement #build_required_tool_choice"
181
+ end
182
+
183
+ # Whether the provider can faithfully enforce tool_choice: :required for
184
+ # the given tool set. Override in subclasses when a provider can only
185
+ # enforce required tool use for some tool types.
186
+ def supports_faithful_required_tool_choice?(available_model_tools)
187
+ available_model_tools.present?
188
+ end
189
+
144
190
  def validate_provider_managed_tool_support!(tool)
145
191
  unless supports_provider_managed_tool?(tool)
146
192
  raise Raif::Errors::UnsupportedFeatureError,
@@ -150,6 +196,10 @@ module Raif
150
196
 
151
197
  private
152
198
 
199
+ def retriable_exceptions
200
+ Raif.config.llm_request_retriable_exceptions
201
+ end
202
+
153
203
  def retry_with_backoff(model_completion)
154
204
  retries = 0
155
205
  max_retries = Raif.config.llm_request_max_retries
@@ -158,11 +208,11 @@ module Raif
158
208
 
159
209
  begin
160
210
  yield
161
- rescue *Raif.config.llm_request_retriable_exceptions => e
211
+ rescue *retriable_exceptions => e
162
212
  retries += 1
163
213
  if retries <= max_retries
164
214
  delay = [base_delay * (2**(retries - 1)), max_delay].min
165
- Raif.logger.warn("Retrying LLM API request after error: #{e.message}. Attempt #{retries}/#{max_retries}. Waiting #{delay} seconds...")
215
+ log_retry(e, model_completion, retries, max_retries, delay)
166
216
  model_completion.increment!(:retry_count)
167
217
  sleep delay
168
218
  retry
@@ -173,10 +223,35 @@ module Raif
173
223
  end
174
224
  end
175
225
 
226
+ def log_retry(error, model_completion, attempt, max_retries, delay)
227
+ if error.is_a?(Raif::Errors::BlankResponseError)
228
+ has_reasoning = model_completion.response_array&.any? do |block|
229
+ block.is_a?(Hash) ? block.key?("reasoning_content") : block.respond_to?(:reasoning_content)
230
+ end
231
+ Raif.logger.warn(
232
+ "Blank response retry #{attempt}/#{max_retries} for #{api_name} " \
233
+ "(ModelCompletion##{model_completion.id}, source: #{model_completion.source_type}##{model_completion.source_id}, " \
234
+ "completion_tokens: #{model_completion.completion_tokens}, reasoning_content_present: #{has_reasoning}). " \
235
+ "Waiting #{delay} seconds..."
236
+ )
237
+ else
238
+ Raif.logger.warn("Retrying LLM API request after error: #{error.message}. Attempt #{attempt}/#{max_retries}. Waiting #{delay} seconds...")
239
+ end
240
+ end
241
+
176
242
  def streaming_response_type
177
243
  raise NotImplementedError, "#{self.class.name} must implement #streaming_response_type"
178
244
  end
179
245
 
246
+ def ensure_model_completion_present!(model_completion)
247
+ # response_array/raw provider data may still be present for debugging even when
248
+ # the normalized response has no text or tool calls.
249
+ return if model_completion.raw_response.present? || model_completion.response_tool_calls.present?
250
+
251
+ raise Raif::Errors::BlankResponseError,
252
+ "Model completion #{model_completion.id} returned no text response and no tool calls"
253
+ end
254
+
180
255
  def streaming_chunk_handler(model_completion, &block)
181
256
  return unless model_completion.stream_response?
182
257
 
@@ -5,6 +5,18 @@ class Raif::Llms::Anthropic < Raif::Llm
5
5
  include Raif::Concerns::Llms::Anthropic::ToolFormatting
6
6
  include Raif::Concerns::Llms::Anthropic::ResponseToolCalls
7
7
 
8
+ def self.prompt_tokens_include_cached_tokens?
9
+ false
10
+ end
11
+
12
+ def self.cache_read_input_token_cost_multiplier
13
+ 0.1
14
+ end
15
+
16
+ def self.cache_creation_input_token_cost_multiplier
17
+ 1.25
18
+ end
19
+
8
20
  def perform_model_completion!(model_completion, &block)
9
21
  params = build_request_parameters(model_completion)
10
22
  response = connection.post("messages") do |req|
@@ -49,24 +61,30 @@ private
49
61
  model_completion.completion_tokens = response_json&.dig("usage", "output_tokens")
50
62
  model_completion.prompt_tokens = response_json&.dig("usage", "input_tokens")
51
63
  model_completion.total_tokens = model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i
64
+ model_completion.cache_read_input_tokens = response_json&.dig("usage", "cache_read_input_tokens")
65
+ model_completion.cache_creation_input_tokens = response_json&.dig("usage", "cache_creation_input_tokens")
52
66
  model_completion.save!
53
67
  end
54
68
 
55
69
  def build_request_parameters(model_completion)
56
70
  params = {
57
71
  model: model_completion.model_api_name,
58
- messages: model_completion.messages,
59
- temperature: (model_completion.temperature || default_temperature).to_f,
60
- max_tokens: model_completion.max_completion_tokens || default_max_completion_tokens
72
+ messages: model_completion.messages
61
73
  }
62
74
 
75
+ params[:temperature] = (model_completion.temperature || default_temperature).to_f if supports_temperature?
76
+ params[:max_tokens] = model_completion.max_completion_tokens || default_max_completion_tokens
77
+
63
78
  params[:system] = model_completion.system_prompt if model_completion.system_prompt.present?
79
+ params[:cache_control] = { type: "ephemeral" } if model_completion.anthropic_prompt_caching_enabled
64
80
 
65
81
  if supports_native_tool_use?
66
82
  tools = build_tools_parameter(model_completion)
67
83
  params[:tools] = tools unless tools.blank?
68
84
 
69
- if model_completion.tool_choice.present?
85
+ if model_completion.tool_choice == "required"
86
+ params[:tool_choice] = build_required_tool_choice
87
+ elsif model_completion.tool_choice.present?
70
88
  tool_klass = model_completion.tool_choice.constantize
71
89
  params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
72
90
  end
@@ -77,6 +95,10 @@ private
77
95
  params
78
96
  end
79
97
 
98
+ def supports_temperature?
99
+ provider_settings.key?(:supports_temperature) ? provider_settings[:supports_temperature] : true
100
+ end
101
+
80
102
  def extract_text_response(resp)
81
103
  return if resp&.dig("content").blank?
82
104
 
@@ -5,10 +5,20 @@ class Raif::Llms::Bedrock < Raif::Llm
5
5
  include Raif::Concerns::Llms::Bedrock::ToolFormatting
6
6
  include Raif::Concerns::Llms::Bedrock::ResponseToolCalls
7
7
 
8
+ def self.prompt_tokens_include_cached_tokens?
9
+ false
10
+ end
11
+
12
+ def self.cache_read_input_token_cost_multiplier
13
+ 0.1
14
+ end
15
+
16
+ def self.cache_creation_input_token_cost_multiplier
17
+ 1.25
18
+ end
19
+
8
20
  def perform_model_completion!(model_completion, &block)
9
- if Raif.config.aws_bedrock_model_name_prefix.present?
10
- model_completion.model_api_name = "#{Raif.config.aws_bedrock_model_name_prefix}.#{model_completion.model_api_name}"
11
- end
21
+ model_completion.model_api_name = resolve_model_api_name(model_completion.model_api_name)
12
22
 
13
23
  params = build_request_parameters(model_completion)
14
24
 
@@ -39,10 +49,29 @@ class Raif::Llms::Bedrock < Raif::Llm
39
49
  private
40
50
 
41
51
  def bedrock_client
42
- @bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
52
+ @bedrock_client ||= begin
53
+ client_options = {
54
+ region: Raif.config.aws_bedrock_region,
55
+ max_attempts: 1
56
+ }
57
+
58
+ client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
59
+ client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
60
+
61
+ Aws::BedrockRuntime::Client.new(client_options)
62
+ end
63
+ end
64
+
65
+ def retriable_exceptions
66
+ super + [
67
+ Aws::BedrockRuntime::Errors::ServiceError,
68
+ Seahorse::Client::NetworkingError
69
+ ]
43
70
  end
44
71
 
45
72
  def update_model_completion(model_completion, resp)
73
+ return if resp.nil?
74
+
46
75
  model_completion.raw_response = if model_completion.response_format_json?
47
76
  extract_json_response(resp)
48
77
  else
@@ -54,6 +83,8 @@ private
54
83
  model_completion.completion_tokens = resp.usage.output_tokens
55
84
  model_completion.prompt_tokens = resp.usage.input_tokens
56
85
  model_completion.total_tokens = resp.usage.total_tokens
86
+ model_completion.cache_read_input_tokens = resp.usage.try(:cache_read_input_tokens)
87
+ model_completion.cache_creation_input_tokens = resp.usage.try(:cache_write_input_tokens)
57
88
  model_completion.save!
58
89
  end
59
90
 
@@ -74,12 +105,20 @@ private
74
105
  tools = build_tools_parameter(model_completion)
75
106
  params[:tool_config] = tools unless tools.blank?
76
107
 
77
- if model_completion.tool_choice.present?
108
+ if model_completion.tool_choice == "required"
109
+ params[:tool_config][:tool_choice] = build_required_tool_choice
110
+ elsif model_completion.tool_choice.present?
78
111
  tool_klass = model_completion.tool_choice.constantize
79
112
  params[:tool_config][:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
80
113
  end
81
114
  end
82
115
 
116
+ if model_completion.bedrock_prompt_caching_enabled
117
+ cache_point = { cache_point: { type: "default" } }
118
+ params[:system] << cache_point if params[:system].present?
119
+ messages_param.last[:content] << cache_point.dup if messages_param.last.present?
120
+ end
121
+
83
122
  params
84
123
  end
85
124
 
@@ -148,4 +187,19 @@ private
148
187
  end
149
188
  end
150
189
 
190
+ def resolve_model_api_name(model_api_name)
191
+ api_name = model_api_name.to_s
192
+ prefix = Raif.config.aws_bedrock_model_name_prefix.to_s.presence
193
+
194
+ return api_name if prefix.blank?
195
+ return api_name if api_name.start_with?("#{prefix}.")
196
+
197
+ # Some Bedrock model IDs are provider IDs (not inference profile IDs),
198
+ # so they should not be prefixed.
199
+ return api_name if api_name.start_with?("openai.gpt-oss-")
200
+ return api_name if api_name.start_with?("deepseek.")
201
+
202
+ "#{prefix}.#{api_name}"
203
+ end
204
+
151
205
  end
@@ -5,6 +5,10 @@ class Raif::Llms::Google < Raif::Llm
5
5
  include Raif::Concerns::Llms::Google::ToolFormatting
6
6
  include Raif::Concerns::Llms::Google::ResponseToolCalls
7
7
 
8
+ def self.cache_read_input_token_cost_multiplier
9
+ 0.25
10
+ end
11
+
8
12
  def perform_model_completion!(model_completion, &block)
9
13
  params = build_request_parameters(model_completion)
10
14
  endpoint = build_endpoint(model_completion)
@@ -21,10 +25,17 @@ class Raif::Llms::Google < Raif::Llm
21
25
  model_completion
22
26
  end
23
27
 
28
+ def supports_faithful_required_tool_choice?(available_model_tools)
29
+ super && Array(available_model_tools).none? do |tool|
30
+ tool_class = tool.is_a?(String) ? tool.constantize : tool
31
+ tool_class.provider_managed?
32
+ end
33
+ end
34
+
24
35
  private
25
36
 
26
37
  def connection
27
- @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta") do |f|
38
+ @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
28
39
  f.headers["x-goog-api-key"] = Raif.config.google_api_key
29
40
  f.request :json
30
41
  f.response :json
@@ -58,6 +69,7 @@ private
58
69
  model_completion.prompt_tokens = response_json&.dig("usageMetadata", "promptTokenCount")
59
70
  model_completion.total_tokens = response_json&.dig("usageMetadata", "totalTokenCount") ||
60
71
  (model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i)
72
+ model_completion.cache_read_input_tokens = response_json&.dig("usageMetadata", "cachedContentTokenCount")
61
73
  model_completion.save!
62
74
  end
63
75
 
@@ -76,7 +88,13 @@ private
76
88
  tools = build_tools_parameter(model_completion)
77
89
  params[:tools] = tools unless tools.blank?
78
90
 
79
- if model_completion.tool_choice.present?
91
+ if model_completion.tool_choice == "required"
92
+ if supports_faithful_required_tool_choice?(model_completion.available_model_tools)
93
+ params[:toolConfig] = { functionCallingConfig: build_required_tool_choice }
94
+ else
95
+ log_required_tool_choice_fallback(model_completion)
96
+ end
97
+ elsif model_completion.tool_choice.present?
80
98
  tool_klass = model_completion.tool_choice.constantize
81
99
  params[:toolConfig] = { functionCallingConfig: build_forced_tool_choice(tool_klass.tool_name) }
82
100
  end
@@ -85,6 +103,14 @@ private
85
103
  params
86
104
  end
87
105
 
106
+ def log_required_tool_choice_fallback(model_completion)
107
+ Raif.logger.warn(
108
+ "Google AI cannot faithfully enforce tool_choice: :required when provider-managed tools are present. " \
109
+ "Falling back to runtime validation for #{model_completion.model_api_name} " \
110
+ "(tools: #{model_completion.available_model_tools_map.keys.join(", ")})"
111
+ )
112
+ end
113
+
88
114
  def build_generation_config(model_completion)
89
115
  config = {}
90
116
 
@@ -3,6 +3,10 @@
3
3
  class Raif::Llms::OpenAiBase < Raif::Llm
4
4
  include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
5
5
 
6
+ def self.cache_read_input_token_cost_multiplier
7
+ 0.5
8
+ end
9
+
6
10
  def perform_model_completion!(model_completion, &block)
7
11
  if supports_temperature?
8
12
  model_completion.temperature ||= default_temperature
@@ -16,6 +16,8 @@ private
16
16
  end
17
17
 
18
18
  def update_model_completion(model_completion, response_json)
19
+ return if response_json.nil?
20
+
19
21
  model_completion.update!(
20
22
  response_id: response_json["id"],
21
23
  response_tool_calls: extract_response_tool_calls(response_json),
@@ -23,7 +25,8 @@ private
23
25
  response_array: response_json["choices"],
24
26
  completion_tokens: response_json.dig("usage", "completion_tokens"),
25
27
  prompt_tokens: response_json.dig("usage", "prompt_tokens"),
26
- total_tokens: response_json.dig("usage", "total_tokens")
28
+ total_tokens: response_json.dig("usage", "total_tokens"),
29
+ cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
27
30
  )
28
31
  end
29
32
 
@@ -51,9 +54,13 @@ private
51
54
  tools = build_tools_parameter(model_completion)
52
55
  parameters[:tools] = tools unless tools.blank?
53
56
 
54
- if model_completion.tool_choice.present?
57
+ if model_completion.tool_choice == "required"
58
+ parameters[:tool_choice] = build_required_tool_choice
59
+ parameters[:parallel_tool_calls] = false unless tools.blank?
60
+ elsif model_completion.tool_choice.present?
55
61
  tool_klass = model_completion.tool_choice.constantize
56
62
  parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
63
+ parameters[:parallel_tool_calls] = false unless tools.blank?
57
64
  end
58
65
  end
59
66
 
@@ -16,6 +16,8 @@ private
16
16
  end
17
17
 
18
18
  def update_model_completion(model_completion, response_json)
19
+ return if response_json.nil?
20
+
19
21
  model_completion.update!(
20
22
  response_id: response_json["id"],
21
23
  response_tool_calls: extract_response_tool_calls(response_json),
@@ -24,7 +26,8 @@ private
24
26
  citations: extract_citations(response_json),
25
27
  completion_tokens: response_json.dig("usage", "output_tokens"),
26
28
  prompt_tokens: response_json.dig("usage", "input_tokens"),
27
- total_tokens: response_json.dig("usage", "total_tokens")
29
+ total_tokens: response_json.dig("usage", "total_tokens"),
30
+ cache_read_input_tokens: response_json.dig("usage", "input_tokens_details", "cached_tokens")
28
31
  )
29
32
  end
30
33
 
@@ -96,9 +99,13 @@ private
96
99
  tools = build_tools_parameter(model_completion)
97
100
  parameters[:tools] = tools unless tools.blank?
98
101
 
99
- if model_completion.tool_choice.present?
102
+ if model_completion.tool_choice == "required"
103
+ parameters[:tool_choice] = build_required_tool_choice
104
+ parameters[:parallel_tool_calls] = false unless tools.blank?
105
+ elsif model_completion.tool_choice.present?
100
106
  tool_klass = model_completion.tool_choice.constantize
101
107
  parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
108
+ parameters[:parallel_tool_calls] = false unless tools.blank?
102
109
  end
103
110
  end
104
111