raif 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/app/assets/builds/raif_admin.css +40 -2
- data/app/assets/builds/raif_admin_sprockets.js +2709 -0
- data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
- data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
- data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
- data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
- data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
- data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
- data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
- data/app/assets/javascript/raif_admin.js +23 -0
- data/app/assets/javascript/raif_admin_sprockets.js +24 -0
- data/app/assets/stylesheets/raif_admin.scss +50 -1
- data/app/controllers/raif/admin/agents_controller.rb +27 -1
- data/app/controllers/raif/admin/configs_controller.rb +1 -0
- data/app/controllers/raif/admin/llms_controller.rb +27 -0
- data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
- data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
- data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
- data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
- data/app/controllers/raif/admin/tasks_controller.rb +5 -0
- data/app/helpers/raif/application_helper.rb +40 -0
- data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
- data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
- data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
- data/app/models/raif/agent.rb +36 -5
- data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
- data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
- data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
- data/app/models/raif/concerns/json_schema_definition.rb +16 -3
- data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
- data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
- data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
- data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
- data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
- data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
- data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
- data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
- data/app/models/raif/conversation.rb +24 -3
- data/app/models/raif/conversation_entry.rb +6 -3
- data/app/models/raif/embedding_models/bedrock.rb +10 -1
- data/app/models/raif/embedding_models/google.rb +37 -0
- data/app/models/raif/evals/llm_judge.rb +70 -0
- data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
- data/app/models/raif/llm.rb +82 -7
- data/app/models/raif/llms/anthropic.rb +26 -4
- data/app/models/raif/llms/bedrock.rb +59 -5
- data/app/models/raif/llms/google.rb +28 -2
- data/app/models/raif/llms/open_ai_base.rb +4 -0
- data/app/models/raif/llms/open_ai_completions.rb +9 -2
- data/app/models/raif/llms/open_ai_responses.rb +9 -2
- data/app/models/raif/llms/open_router.rb +10 -3
- data/app/models/raif/model_completion.rb +75 -34
- data/app/models/raif/model_tool.rb +45 -3
- data/app/models/raif/model_tool_invocation.rb +31 -1
- data/app/models/raif/prompt_studio_batch_run.rb +155 -0
- data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
- data/app/models/raif/streaming_responses/bedrock.rb +60 -1
- data/app/models/raif/task.rb +30 -6
- data/app/views/layouts/raif/admin.html.erb +31 -1
- data/app/views/raif/admin/agents/_agent.html.erb +1 -0
- data/app/views/raif/admin/agents/index.html.erb +48 -0
- data/app/views/raif/admin/agents/show.html.erb +4 -0
- data/app/views/raif/admin/llms/index.html.erb +110 -0
- data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
- data/app/views/raif/admin/model_completions/index.html.erb +14 -1
- data/app/views/raif/admin/model_completions/show.html.erb +164 -55
- data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
- data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
- data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
- data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
- data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
- data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
- data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
- data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
- data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
- data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
- data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
- data/app/views/raif/admin/tasks/_task.html.erb +1 -0
- data/app/views/raif/admin/tasks/index.html.erb +17 -5
- data/app/views/raif/admin/tasks/show.html.erb +20 -0
- data/app/views/raif/conversation_entries/_message.html.erb +10 -6
- data/config/importmap.rb +8 -0
- data/config/locales/admin.en.yml +128 -0
- data/config/locales/en.yml +36 -2
- data/config/routes.rb +8 -0
- data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
- data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
- data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
- data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
- data/lib/generators/raif/agent/agent_generator.rb +18 -0
- data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
- data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
- data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
- data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
- data/lib/generators/raif/install/templates/initializer.rb +68 -27
- data/lib/generators/raif/task/task_generator.rb +18 -0
- data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
- data/lib/generators/raif/task/templates/task.rb.tt +9 -8
- data/lib/raif/configuration.rb +10 -0
- data/lib/raif/embedding_model_registry.rb +8 -0
- data/lib/raif/engine.rb +16 -1
- data/lib/raif/errors/blank_response_error.rb +8 -0
- data/lib/raif/errors/prompt_template_error.rb +15 -0
- data/lib/raif/errors.rb +2 -0
- data/lib/raif/evals.rb +0 -6
- data/lib/raif/llm_registry.rb +230 -9
- data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
- data/lib/raif/token_estimator.rb +28 -0
- data/lib/raif/version.rb +1 -1
- data/lib/raif.rb +2 -0
- data/spec/support/rspec_helpers.rb +7 -1
- data/spec/support/test_task.rb +9 -0
- data/spec/support/test_template_task.rb +41 -0
- metadata +65 -7
- data/lib/raif/evals/llm_judge.rb +0 -32
- /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
|
@@ -1,5 +1,43 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# == Schema Information
|
|
4
|
+
#
|
|
5
|
+
# Table name: raif_tasks
|
|
6
|
+
#
|
|
7
|
+
# id :bigint not null, primary key
|
|
8
|
+
# available_model_tools :jsonb not null
|
|
9
|
+
# completed_at :datetime
|
|
10
|
+
# creator_type :string
|
|
11
|
+
# failed_at :datetime
|
|
12
|
+
# llm_model_key :string not null
|
|
13
|
+
# prompt :text
|
|
14
|
+
# prompt_studio_run :boolean default(FALSE), not null
|
|
15
|
+
# raw_response :text
|
|
16
|
+
# requested_language_key :string
|
|
17
|
+
# response_format :integer default("text"), not null
|
|
18
|
+
# run_with :jsonb
|
|
19
|
+
# source_type :string
|
|
20
|
+
# started_at :datetime
|
|
21
|
+
# system_prompt :text
|
|
22
|
+
# type :string not null
|
|
23
|
+
# created_at :datetime not null
|
|
24
|
+
# updated_at :datetime not null
|
|
25
|
+
# creator_id :bigint
|
|
26
|
+
# source_id :bigint
|
|
27
|
+
#
|
|
28
|
+
# Indexes
|
|
29
|
+
#
|
|
30
|
+
# index_raif_tasks_on_completed_at (completed_at)
|
|
31
|
+
# index_raif_tasks_on_created_at (created_at)
|
|
32
|
+
# index_raif_tasks_on_creator (creator_type,creator_id)
|
|
33
|
+
# index_raif_tasks_on_failed_at (failed_at)
|
|
34
|
+
# index_raif_tasks_on_source (source_type,source_id)
|
|
35
|
+
# index_raif_tasks_on_started_at (started_at)
|
|
36
|
+
# index_raif_tasks_on_type (type)
|
|
37
|
+
# index_raif_tasks_on_type_and_completed_at (type,completed_at)
|
|
38
|
+
# index_raif_tasks_on_type_and_failed_at (type,failed_at)
|
|
39
|
+
# index_raif_tasks_on_type_and_started_at (type,started_at)
|
|
40
|
+
#
|
|
3
41
|
module Raif
|
|
4
42
|
module Evals
|
|
5
43
|
module LlmJudges
|
|
@@ -1,5 +1,43 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# == Schema Information
|
|
4
|
+
#
|
|
5
|
+
# Table name: raif_tasks
|
|
6
|
+
#
|
|
7
|
+
# id :bigint not null, primary key
|
|
8
|
+
# available_model_tools :jsonb not null
|
|
9
|
+
# completed_at :datetime
|
|
10
|
+
# creator_type :string
|
|
11
|
+
# failed_at :datetime
|
|
12
|
+
# llm_model_key :string not null
|
|
13
|
+
# prompt :text
|
|
14
|
+
# prompt_studio_run :boolean default(FALSE), not null
|
|
15
|
+
# raw_response :text
|
|
16
|
+
# requested_language_key :string
|
|
17
|
+
# response_format :integer default("text"), not null
|
|
18
|
+
# run_with :jsonb
|
|
19
|
+
# source_type :string
|
|
20
|
+
# started_at :datetime
|
|
21
|
+
# system_prompt :text
|
|
22
|
+
# type :string not null
|
|
23
|
+
# created_at :datetime not null
|
|
24
|
+
# updated_at :datetime not null
|
|
25
|
+
# creator_id :bigint
|
|
26
|
+
# source_id :bigint
|
|
27
|
+
#
|
|
28
|
+
# Indexes
|
|
29
|
+
#
|
|
30
|
+
# index_raif_tasks_on_completed_at (completed_at)
|
|
31
|
+
# index_raif_tasks_on_created_at (created_at)
|
|
32
|
+
# index_raif_tasks_on_creator (creator_type,creator_id)
|
|
33
|
+
# index_raif_tasks_on_failed_at (failed_at)
|
|
34
|
+
# index_raif_tasks_on_source (source_type,source_id)
|
|
35
|
+
# index_raif_tasks_on_started_at (started_at)
|
|
36
|
+
# index_raif_tasks_on_type (type)
|
|
37
|
+
# index_raif_tasks_on_type_and_completed_at (type,completed_at)
|
|
38
|
+
# index_raif_tasks_on_type_and_failed_at (type,failed_at)
|
|
39
|
+
# index_raif_tasks_on_type_and_started_at (type,started_at)
|
|
40
|
+
#
|
|
3
41
|
module Raif
|
|
4
42
|
module Evals
|
|
5
43
|
module LlmJudges
|
|
@@ -1,5 +1,43 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# == Schema Information
|
|
4
|
+
#
|
|
5
|
+
# Table name: raif_tasks
|
|
6
|
+
#
|
|
7
|
+
# id :bigint not null, primary key
|
|
8
|
+
# available_model_tools :jsonb not null
|
|
9
|
+
# completed_at :datetime
|
|
10
|
+
# creator_type :string
|
|
11
|
+
# failed_at :datetime
|
|
12
|
+
# llm_model_key :string not null
|
|
13
|
+
# prompt :text
|
|
14
|
+
# prompt_studio_run :boolean default(FALSE), not null
|
|
15
|
+
# raw_response :text
|
|
16
|
+
# requested_language_key :string
|
|
17
|
+
# response_format :integer default("text"), not null
|
|
18
|
+
# run_with :jsonb
|
|
19
|
+
# source_type :string
|
|
20
|
+
# started_at :datetime
|
|
21
|
+
# system_prompt :text
|
|
22
|
+
# type :string not null
|
|
23
|
+
# created_at :datetime not null
|
|
24
|
+
# updated_at :datetime not null
|
|
25
|
+
# creator_id :bigint
|
|
26
|
+
# source_id :bigint
|
|
27
|
+
#
|
|
28
|
+
# Indexes
|
|
29
|
+
#
|
|
30
|
+
# index_raif_tasks_on_completed_at (completed_at)
|
|
31
|
+
# index_raif_tasks_on_created_at (created_at)
|
|
32
|
+
# index_raif_tasks_on_creator (creator_type,creator_id)
|
|
33
|
+
# index_raif_tasks_on_failed_at (failed_at)
|
|
34
|
+
# index_raif_tasks_on_source (source_type,source_id)
|
|
35
|
+
# index_raif_tasks_on_started_at (started_at)
|
|
36
|
+
# index_raif_tasks_on_type (type)
|
|
37
|
+
# index_raif_tasks_on_type_and_completed_at (type,completed_at)
|
|
38
|
+
# index_raif_tasks_on_type_and_failed_at (type,failed_at)
|
|
39
|
+
# index_raif_tasks_on_type_and_started_at (type,started_at)
|
|
40
|
+
#
|
|
3
41
|
module Raif
|
|
4
42
|
module Evals
|
|
5
43
|
module LlmJudges
|
|
@@ -1,5 +1,43 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# == Schema Information
|
|
4
|
+
#
|
|
5
|
+
# Table name: raif_tasks
|
|
6
|
+
#
|
|
7
|
+
# id :bigint not null, primary key
|
|
8
|
+
# available_model_tools :jsonb not null
|
|
9
|
+
# completed_at :datetime
|
|
10
|
+
# creator_type :string
|
|
11
|
+
# failed_at :datetime
|
|
12
|
+
# llm_model_key :string not null
|
|
13
|
+
# prompt :text
|
|
14
|
+
# prompt_studio_run :boolean default(FALSE), not null
|
|
15
|
+
# raw_response :text
|
|
16
|
+
# requested_language_key :string
|
|
17
|
+
# response_format :integer default("text"), not null
|
|
18
|
+
# run_with :jsonb
|
|
19
|
+
# source_type :string
|
|
20
|
+
# started_at :datetime
|
|
21
|
+
# system_prompt :text
|
|
22
|
+
# type :string not null
|
|
23
|
+
# created_at :datetime not null
|
|
24
|
+
# updated_at :datetime not null
|
|
25
|
+
# creator_id :bigint
|
|
26
|
+
# source_id :bigint
|
|
27
|
+
#
|
|
28
|
+
# Indexes
|
|
29
|
+
#
|
|
30
|
+
# index_raif_tasks_on_completed_at (completed_at)
|
|
31
|
+
# index_raif_tasks_on_created_at (created_at)
|
|
32
|
+
# index_raif_tasks_on_creator (creator_type,creator_id)
|
|
33
|
+
# index_raif_tasks_on_failed_at (failed_at)
|
|
34
|
+
# index_raif_tasks_on_source (source_type,source_id)
|
|
35
|
+
# index_raif_tasks_on_started_at (started_at)
|
|
36
|
+
# index_raif_tasks_on_type (type)
|
|
37
|
+
# index_raif_tasks_on_type_and_completed_at (type,completed_at)
|
|
38
|
+
# index_raif_tasks_on_type_and_failed_at (type,failed_at)
|
|
39
|
+
# index_raif_tasks_on_type_and_started_at (type,started_at)
|
|
40
|
+
#
|
|
3
41
|
module Raif
|
|
4
42
|
module Evals
|
|
5
43
|
module LlmJudges
|
data/app/models/raif/llm.rb
CHANGED
|
@@ -52,7 +52,7 @@ module Raif
|
|
|
52
52
|
end
|
|
53
53
|
|
|
54
54
|
def chat(message: nil, messages: nil, response_format: :text, available_model_tools: [], source: nil, system_prompt: nil, temperature: nil,
|
|
55
|
-
max_completion_tokens: nil, tool_choice: nil, &block)
|
|
55
|
+
max_completion_tokens: nil, tool_choice: nil, anthropic_prompt_caching_enabled: false, bedrock_prompt_caching_enabled: false, &block)
|
|
56
56
|
unless response_format.is_a?(Symbol)
|
|
57
57
|
raise ArgumentError,
|
|
58
58
|
"Raif::Llm#chat - Invalid response format: #{response_format}. Must be a symbol (you passed #{response_format.class}) and be one of: #{VALID_RESPONSE_FORMATS.join(", ")}" # rubocop:disable Layout/LineLength
|
|
@@ -70,7 +70,15 @@ module Raif
|
|
|
70
70
|
raise ArgumentError, "Raif::Llm#chat - You must provide either a message: or messages: argument, not both"
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
# Normalize :required / "required" to the symbol form for validation
|
|
74
|
+
tool_choice = :required if tool_choice.to_s == "required"
|
|
75
|
+
|
|
76
|
+
if tool_choice == :required
|
|
77
|
+
if available_model_tools.blank?
|
|
78
|
+
raise ArgumentError,
|
|
79
|
+
"Raif::Llm#chat - tool_choice: :required requires at least one available model tool"
|
|
80
|
+
end
|
|
81
|
+
elsif tool_choice.present? && !available_model_tools.map(&:to_s).include?(tool_choice.to_s)
|
|
74
82
|
raise ArgumentError,
|
|
75
83
|
"Raif::Llm#chat - Invalid tool choice: #{tool_choice} is not included in the available model tools: #{available_model_tools.join(", ")}"
|
|
76
84
|
end
|
|
@@ -99,25 +107,29 @@ module Raif
|
|
|
99
107
|
stream_response: block_given?
|
|
100
108
|
)
|
|
101
109
|
|
|
110
|
+
model_completion.anthropic_prompt_caching_enabled = anthropic_prompt_caching_enabled
|
|
111
|
+
model_completion.bedrock_prompt_caching_enabled = bedrock_prompt_caching_enabled
|
|
112
|
+
|
|
102
113
|
model_completion.started!
|
|
103
114
|
|
|
104
115
|
retry_with_backoff(model_completion) do
|
|
105
116
|
perform_model_completion!(model_completion, &block)
|
|
117
|
+
ensure_model_completion_present!(model_completion)
|
|
106
118
|
end
|
|
107
119
|
|
|
108
120
|
model_completion.completed!
|
|
109
121
|
model_completion
|
|
110
122
|
rescue Raif::Errors::StreamingError => e
|
|
111
123
|
Rails.logger.error("Raif streaming error -- code: #{e.code} -- type: #{e.type} -- message: #{e.message} -- event: #{e.event}")
|
|
112
|
-
model_completion&.record_failure!(e)
|
|
124
|
+
model_completion&.record_failure!(e) unless model_completion&.failed?
|
|
113
125
|
raise e
|
|
114
126
|
rescue Faraday::Error => e
|
|
115
127
|
Raif.logger.error("LLM API request failed (status: #{e.response_status}): #{e.message}")
|
|
116
128
|
Raif.logger.error(e.response_body)
|
|
117
|
-
model_completion&.record_failure!(e)
|
|
129
|
+
model_completion&.record_failure!(e) unless model_completion&.failed?
|
|
118
130
|
raise e
|
|
119
131
|
rescue StandardError => e
|
|
120
|
-
model_completion&.record_failure!(e)
|
|
132
|
+
model_completion&.record_failure!(e) unless model_completion&.failed?
|
|
121
133
|
raise e
|
|
122
134
|
end
|
|
123
135
|
|
|
@@ -129,6 +141,26 @@ module Raif
|
|
|
129
141
|
VALID_RESPONSE_FORMATS
|
|
130
142
|
end
|
|
131
143
|
|
|
144
|
+
# Override in subclasses to indicate whether prompt_tokens reported by the
|
|
145
|
+
# provider already include cached tokens as a subset (OpenAI, Google,
|
|
146
|
+
# OpenRouter) or whether cached tokens are reported separately and are
|
|
147
|
+
# additive to prompt_tokens (Anthropic, Bedrock).
|
|
148
|
+
def self.prompt_tokens_include_cached_tokens?
|
|
149
|
+
true
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Multiplier applied to the base input_token_cost to derive the per-token
|
|
153
|
+
# cost for cache reads. Return nil when the provider has no cache pricing.
|
|
154
|
+
def self.cache_read_input_token_cost_multiplier
|
|
155
|
+
nil
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Multiplier applied to the base input_token_cost to derive the per-token
|
|
159
|
+
# cost for cache creation writes. Return nil when there is no write surcharge.
|
|
160
|
+
def self.cache_creation_input_token_cost_multiplier
|
|
161
|
+
nil
|
|
162
|
+
end
|
|
163
|
+
|
|
132
164
|
def supports_provider_managed_tool?(tool_klass)
|
|
133
165
|
supported_provider_managed_tools&.include?(tool_klass.to_s)
|
|
134
166
|
end
|
|
@@ -141,6 +173,20 @@ module Raif
|
|
|
141
173
|
raise NotImplementedError, "#{self.class.name} must implement #build_forced_tool_choice"
|
|
142
174
|
end
|
|
143
175
|
|
|
176
|
+
# Build the tool_choice parameter to require the model to call any tool (but not a specific one).
|
|
177
|
+
# Each provider implements this to return the correct format.
|
|
178
|
+
# @return [Hash, String] The tool_choice parameter for the provider's API
|
|
179
|
+
def build_required_tool_choice
|
|
180
|
+
raise NotImplementedError, "#{self.class.name} must implement #build_required_tool_choice"
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Whether the provider can faithfully enforce tool_choice: :required for
|
|
184
|
+
# the given tool set. Override in subclasses when a provider can only
|
|
185
|
+
# enforce required tool use for some tool types.
|
|
186
|
+
def supports_faithful_required_tool_choice?(available_model_tools)
|
|
187
|
+
available_model_tools.present?
|
|
188
|
+
end
|
|
189
|
+
|
|
144
190
|
def validate_provider_managed_tool_support!(tool)
|
|
145
191
|
unless supports_provider_managed_tool?(tool)
|
|
146
192
|
raise Raif::Errors::UnsupportedFeatureError,
|
|
@@ -150,6 +196,10 @@ module Raif
|
|
|
150
196
|
|
|
151
197
|
private
|
|
152
198
|
|
|
199
|
+
def retriable_exceptions
|
|
200
|
+
Raif.config.llm_request_retriable_exceptions
|
|
201
|
+
end
|
|
202
|
+
|
|
153
203
|
def retry_with_backoff(model_completion)
|
|
154
204
|
retries = 0
|
|
155
205
|
max_retries = Raif.config.llm_request_max_retries
|
|
@@ -158,11 +208,11 @@ module Raif
|
|
|
158
208
|
|
|
159
209
|
begin
|
|
160
210
|
yield
|
|
161
|
-
rescue *
|
|
211
|
+
rescue *retriable_exceptions => e
|
|
162
212
|
retries += 1
|
|
163
213
|
if retries <= max_retries
|
|
164
214
|
delay = [base_delay * (2**(retries - 1)), max_delay].min
|
|
165
|
-
|
|
215
|
+
log_retry(e, model_completion, retries, max_retries, delay)
|
|
166
216
|
model_completion.increment!(:retry_count)
|
|
167
217
|
sleep delay
|
|
168
218
|
retry
|
|
@@ -173,10 +223,35 @@ module Raif
|
|
|
173
223
|
end
|
|
174
224
|
end
|
|
175
225
|
|
|
226
|
+
def log_retry(error, model_completion, attempt, max_retries, delay)
|
|
227
|
+
if error.is_a?(Raif::Errors::BlankResponseError)
|
|
228
|
+
has_reasoning = model_completion.response_array&.any? do |block|
|
|
229
|
+
block.is_a?(Hash) ? block.key?("reasoning_content") : block.respond_to?(:reasoning_content)
|
|
230
|
+
end
|
|
231
|
+
Raif.logger.warn(
|
|
232
|
+
"Blank response retry #{attempt}/#{max_retries} for #{api_name} " \
|
|
233
|
+
"(ModelCompletion##{model_completion.id}, source: #{model_completion.source_type}##{model_completion.source_id}, " \
|
|
234
|
+
"completion_tokens: #{model_completion.completion_tokens}, reasoning_content_present: #{has_reasoning}). " \
|
|
235
|
+
"Waiting #{delay} seconds..."
|
|
236
|
+
)
|
|
237
|
+
else
|
|
238
|
+
Raif.logger.warn("Retrying LLM API request after error: #{error.message}. Attempt #{attempt}/#{max_retries}. Waiting #{delay} seconds...")
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
176
242
|
def streaming_response_type
|
|
177
243
|
raise NotImplementedError, "#{self.class.name} must implement #streaming_response_type"
|
|
178
244
|
end
|
|
179
245
|
|
|
246
|
+
def ensure_model_completion_present!(model_completion)
|
|
247
|
+
# response_array/raw provider data may still be present for debugging even when
|
|
248
|
+
# the normalized response has no text or tool calls.
|
|
249
|
+
return if model_completion.raw_response.present? || model_completion.response_tool_calls.present?
|
|
250
|
+
|
|
251
|
+
raise Raif::Errors::BlankResponseError,
|
|
252
|
+
"Model completion #{model_completion.id} returned no text response and no tool calls"
|
|
253
|
+
end
|
|
254
|
+
|
|
180
255
|
def streaming_chunk_handler(model_completion, &block)
|
|
181
256
|
return unless model_completion.stream_response?
|
|
182
257
|
|
|
@@ -5,6 +5,18 @@ class Raif::Llms::Anthropic < Raif::Llm
|
|
|
5
5
|
include Raif::Concerns::Llms::Anthropic::ToolFormatting
|
|
6
6
|
include Raif::Concerns::Llms::Anthropic::ResponseToolCalls
|
|
7
7
|
|
|
8
|
+
def self.prompt_tokens_include_cached_tokens?
|
|
9
|
+
false
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.cache_read_input_token_cost_multiplier
|
|
13
|
+
0.1
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.cache_creation_input_token_cost_multiplier
|
|
17
|
+
1.25
|
|
18
|
+
end
|
|
19
|
+
|
|
8
20
|
def perform_model_completion!(model_completion, &block)
|
|
9
21
|
params = build_request_parameters(model_completion)
|
|
10
22
|
response = connection.post("messages") do |req|
|
|
@@ -49,24 +61,30 @@ private
|
|
|
49
61
|
model_completion.completion_tokens = response_json&.dig("usage", "output_tokens")
|
|
50
62
|
model_completion.prompt_tokens = response_json&.dig("usage", "input_tokens")
|
|
51
63
|
model_completion.total_tokens = model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i
|
|
64
|
+
model_completion.cache_read_input_tokens = response_json&.dig("usage", "cache_read_input_tokens")
|
|
65
|
+
model_completion.cache_creation_input_tokens = response_json&.dig("usage", "cache_creation_input_tokens")
|
|
52
66
|
model_completion.save!
|
|
53
67
|
end
|
|
54
68
|
|
|
55
69
|
def build_request_parameters(model_completion)
|
|
56
70
|
params = {
|
|
57
71
|
model: model_completion.model_api_name,
|
|
58
|
-
messages: model_completion.messages
|
|
59
|
-
temperature: (model_completion.temperature || default_temperature).to_f,
|
|
60
|
-
max_tokens: model_completion.max_completion_tokens || default_max_completion_tokens
|
|
72
|
+
messages: model_completion.messages
|
|
61
73
|
}
|
|
62
74
|
|
|
75
|
+
params[:temperature] = (model_completion.temperature || default_temperature).to_f if supports_temperature?
|
|
76
|
+
params[:max_tokens] = model_completion.max_completion_tokens || default_max_completion_tokens
|
|
77
|
+
|
|
63
78
|
params[:system] = model_completion.system_prompt if model_completion.system_prompt.present?
|
|
79
|
+
params[:cache_control] = { type: "ephemeral" } if model_completion.anthropic_prompt_caching_enabled
|
|
64
80
|
|
|
65
81
|
if supports_native_tool_use?
|
|
66
82
|
tools = build_tools_parameter(model_completion)
|
|
67
83
|
params[:tools] = tools unless tools.blank?
|
|
68
84
|
|
|
69
|
-
if model_completion.tool_choice
|
|
85
|
+
if model_completion.tool_choice == "required"
|
|
86
|
+
params[:tool_choice] = build_required_tool_choice
|
|
87
|
+
elsif model_completion.tool_choice.present?
|
|
70
88
|
tool_klass = model_completion.tool_choice.constantize
|
|
71
89
|
params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
72
90
|
end
|
|
@@ -77,6 +95,10 @@ private
|
|
|
77
95
|
params
|
|
78
96
|
end
|
|
79
97
|
|
|
98
|
+
def supports_temperature?
|
|
99
|
+
provider_settings.key?(:supports_temperature) ? provider_settings[:supports_temperature] : true
|
|
100
|
+
end
|
|
101
|
+
|
|
80
102
|
def extract_text_response(resp)
|
|
81
103
|
return if resp&.dig("content").blank?
|
|
82
104
|
|
|
@@ -5,10 +5,20 @@ class Raif::Llms::Bedrock < Raif::Llm
|
|
|
5
5
|
include Raif::Concerns::Llms::Bedrock::ToolFormatting
|
|
6
6
|
include Raif::Concerns::Llms::Bedrock::ResponseToolCalls
|
|
7
7
|
|
|
8
|
+
def self.prompt_tokens_include_cached_tokens?
|
|
9
|
+
false
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.cache_read_input_token_cost_multiplier
|
|
13
|
+
0.1
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.cache_creation_input_token_cost_multiplier
|
|
17
|
+
1.25
|
|
18
|
+
end
|
|
19
|
+
|
|
8
20
|
def perform_model_completion!(model_completion, &block)
|
|
9
|
-
|
|
10
|
-
model_completion.model_api_name = "#{Raif.config.aws_bedrock_model_name_prefix}.#{model_completion.model_api_name}"
|
|
11
|
-
end
|
|
21
|
+
model_completion.model_api_name = resolve_model_api_name(model_completion.model_api_name)
|
|
12
22
|
|
|
13
23
|
params = build_request_parameters(model_completion)
|
|
14
24
|
|
|
@@ -39,10 +49,29 @@ class Raif::Llms::Bedrock < Raif::Llm
|
|
|
39
49
|
private
|
|
40
50
|
|
|
41
51
|
def bedrock_client
|
|
42
|
-
@bedrock_client ||=
|
|
52
|
+
@bedrock_client ||= begin
|
|
53
|
+
client_options = {
|
|
54
|
+
region: Raif.config.aws_bedrock_region,
|
|
55
|
+
max_attempts: 1
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
|
|
59
|
+
client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
|
|
60
|
+
|
|
61
|
+
Aws::BedrockRuntime::Client.new(client_options)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def retriable_exceptions
|
|
66
|
+
super + [
|
|
67
|
+
Aws::BedrockRuntime::Errors::ServiceError,
|
|
68
|
+
Seahorse::Client::NetworkingError
|
|
69
|
+
]
|
|
43
70
|
end
|
|
44
71
|
|
|
45
72
|
def update_model_completion(model_completion, resp)
|
|
73
|
+
return if resp.nil?
|
|
74
|
+
|
|
46
75
|
model_completion.raw_response = if model_completion.response_format_json?
|
|
47
76
|
extract_json_response(resp)
|
|
48
77
|
else
|
|
@@ -54,6 +83,8 @@ private
|
|
|
54
83
|
model_completion.completion_tokens = resp.usage.output_tokens
|
|
55
84
|
model_completion.prompt_tokens = resp.usage.input_tokens
|
|
56
85
|
model_completion.total_tokens = resp.usage.total_tokens
|
|
86
|
+
model_completion.cache_read_input_tokens = resp.usage.try(:cache_read_input_tokens)
|
|
87
|
+
model_completion.cache_creation_input_tokens = resp.usage.try(:cache_write_input_tokens)
|
|
57
88
|
model_completion.save!
|
|
58
89
|
end
|
|
59
90
|
|
|
@@ -74,12 +105,20 @@ private
|
|
|
74
105
|
tools = build_tools_parameter(model_completion)
|
|
75
106
|
params[:tool_config] = tools unless tools.blank?
|
|
76
107
|
|
|
77
|
-
if model_completion.tool_choice
|
|
108
|
+
if model_completion.tool_choice == "required"
|
|
109
|
+
params[:tool_config][:tool_choice] = build_required_tool_choice
|
|
110
|
+
elsif model_completion.tool_choice.present?
|
|
78
111
|
tool_klass = model_completion.tool_choice.constantize
|
|
79
112
|
params[:tool_config][:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
80
113
|
end
|
|
81
114
|
end
|
|
82
115
|
|
|
116
|
+
if model_completion.bedrock_prompt_caching_enabled
|
|
117
|
+
cache_point = { cache_point: { type: "default" } }
|
|
118
|
+
params[:system] << cache_point if params[:system].present?
|
|
119
|
+
messages_param.last[:content] << cache_point.dup if messages_param.last.present?
|
|
120
|
+
end
|
|
121
|
+
|
|
83
122
|
params
|
|
84
123
|
end
|
|
85
124
|
|
|
@@ -148,4 +187,19 @@ private
|
|
|
148
187
|
end
|
|
149
188
|
end
|
|
150
189
|
|
|
190
|
+
def resolve_model_api_name(model_api_name)
|
|
191
|
+
api_name = model_api_name.to_s
|
|
192
|
+
prefix = Raif.config.aws_bedrock_model_name_prefix.to_s.presence
|
|
193
|
+
|
|
194
|
+
return api_name if prefix.blank?
|
|
195
|
+
return api_name if api_name.start_with?("#{prefix}.")
|
|
196
|
+
|
|
197
|
+
# Some Bedrock model IDs are provider IDs (not inference profile IDs),
|
|
198
|
+
# so they should not be prefixed.
|
|
199
|
+
return api_name if api_name.start_with?("openai.gpt-oss-")
|
|
200
|
+
return api_name if api_name.start_with?("deepseek.")
|
|
201
|
+
|
|
202
|
+
"#{prefix}.#{api_name}"
|
|
203
|
+
end
|
|
204
|
+
|
|
151
205
|
end
|
|
@@ -5,6 +5,10 @@ class Raif::Llms::Google < Raif::Llm
|
|
|
5
5
|
include Raif::Concerns::Llms::Google::ToolFormatting
|
|
6
6
|
include Raif::Concerns::Llms::Google::ResponseToolCalls
|
|
7
7
|
|
|
8
|
+
def self.cache_read_input_token_cost_multiplier
|
|
9
|
+
0.25
|
|
10
|
+
end
|
|
11
|
+
|
|
8
12
|
def perform_model_completion!(model_completion, &block)
|
|
9
13
|
params = build_request_parameters(model_completion)
|
|
10
14
|
endpoint = build_endpoint(model_completion)
|
|
@@ -21,10 +25,17 @@ class Raif::Llms::Google < Raif::Llm
|
|
|
21
25
|
model_completion
|
|
22
26
|
end
|
|
23
27
|
|
|
28
|
+
def supports_faithful_required_tool_choice?(available_model_tools)
|
|
29
|
+
super && Array(available_model_tools).none? do |tool|
|
|
30
|
+
tool_class = tool.is_a?(String) ? tool.constantize : tool
|
|
31
|
+
tool_class.provider_managed?
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
24
35
|
private
|
|
25
36
|
|
|
26
37
|
def connection
|
|
27
|
-
@connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta") do |f|
|
|
38
|
+
@connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
|
|
28
39
|
f.headers["x-goog-api-key"] = Raif.config.google_api_key
|
|
29
40
|
f.request :json
|
|
30
41
|
f.response :json
|
|
@@ -58,6 +69,7 @@ private
|
|
|
58
69
|
model_completion.prompt_tokens = response_json&.dig("usageMetadata", "promptTokenCount")
|
|
59
70
|
model_completion.total_tokens = response_json&.dig("usageMetadata", "totalTokenCount") ||
|
|
60
71
|
(model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i)
|
|
72
|
+
model_completion.cache_read_input_tokens = response_json&.dig("usageMetadata", "cachedContentTokenCount")
|
|
61
73
|
model_completion.save!
|
|
62
74
|
end
|
|
63
75
|
|
|
@@ -76,7 +88,13 @@ private
|
|
|
76
88
|
tools = build_tools_parameter(model_completion)
|
|
77
89
|
params[:tools] = tools unless tools.blank?
|
|
78
90
|
|
|
79
|
-
if model_completion.tool_choice
|
|
91
|
+
if model_completion.tool_choice == "required"
|
|
92
|
+
if supports_faithful_required_tool_choice?(model_completion.available_model_tools)
|
|
93
|
+
params[:toolConfig] = { functionCallingConfig: build_required_tool_choice }
|
|
94
|
+
else
|
|
95
|
+
log_required_tool_choice_fallback(model_completion)
|
|
96
|
+
end
|
|
97
|
+
elsif model_completion.tool_choice.present?
|
|
80
98
|
tool_klass = model_completion.tool_choice.constantize
|
|
81
99
|
params[:toolConfig] = { functionCallingConfig: build_forced_tool_choice(tool_klass.tool_name) }
|
|
82
100
|
end
|
|
@@ -85,6 +103,14 @@ private
|
|
|
85
103
|
params
|
|
86
104
|
end
|
|
87
105
|
|
|
106
|
+
def log_required_tool_choice_fallback(model_completion)
|
|
107
|
+
Raif.logger.warn(
|
|
108
|
+
"Google AI cannot faithfully enforce tool_choice: :required when provider-managed tools are present. " \
|
|
109
|
+
"Falling back to runtime validation for #{model_completion.model_api_name} " \
|
|
110
|
+
"(tools: #{model_completion.available_model_tools_map.keys.join(", ")})"
|
|
111
|
+
)
|
|
112
|
+
end
|
|
113
|
+
|
|
88
114
|
def build_generation_config(model_completion)
|
|
89
115
|
config = {}
|
|
90
116
|
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
class Raif::Llms::OpenAiBase < Raif::Llm
|
|
4
4
|
include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
|
|
5
5
|
|
|
6
|
+
def self.cache_read_input_token_cost_multiplier
|
|
7
|
+
0.5
|
|
8
|
+
end
|
|
9
|
+
|
|
6
10
|
def perform_model_completion!(model_completion, &block)
|
|
7
11
|
if supports_temperature?
|
|
8
12
|
model_completion.temperature ||= default_temperature
|
|
@@ -16,6 +16,8 @@ private
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def update_model_completion(model_completion, response_json)
|
|
19
|
+
return if response_json.nil?
|
|
20
|
+
|
|
19
21
|
model_completion.update!(
|
|
20
22
|
response_id: response_json["id"],
|
|
21
23
|
response_tool_calls: extract_response_tool_calls(response_json),
|
|
@@ -23,7 +25,8 @@ private
|
|
|
23
25
|
response_array: response_json["choices"],
|
|
24
26
|
completion_tokens: response_json.dig("usage", "completion_tokens"),
|
|
25
27
|
prompt_tokens: response_json.dig("usage", "prompt_tokens"),
|
|
26
|
-
total_tokens: response_json.dig("usage", "total_tokens")
|
|
28
|
+
total_tokens: response_json.dig("usage", "total_tokens"),
|
|
29
|
+
cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
|
|
27
30
|
)
|
|
28
31
|
end
|
|
29
32
|
|
|
@@ -51,9 +54,13 @@ private
|
|
|
51
54
|
tools = build_tools_parameter(model_completion)
|
|
52
55
|
parameters[:tools] = tools unless tools.blank?
|
|
53
56
|
|
|
54
|
-
if model_completion.tool_choice
|
|
57
|
+
if model_completion.tool_choice == "required"
|
|
58
|
+
parameters[:tool_choice] = build_required_tool_choice
|
|
59
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
60
|
+
elsif model_completion.tool_choice.present?
|
|
55
61
|
tool_klass = model_completion.tool_choice.constantize
|
|
56
62
|
parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
63
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
57
64
|
end
|
|
58
65
|
end
|
|
59
66
|
|
|
@@ -16,6 +16,8 @@ private
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def update_model_completion(model_completion, response_json)
|
|
19
|
+
return if response_json.nil?
|
|
20
|
+
|
|
19
21
|
model_completion.update!(
|
|
20
22
|
response_id: response_json["id"],
|
|
21
23
|
response_tool_calls: extract_response_tool_calls(response_json),
|
|
@@ -24,7 +26,8 @@ private
|
|
|
24
26
|
citations: extract_citations(response_json),
|
|
25
27
|
completion_tokens: response_json.dig("usage", "output_tokens"),
|
|
26
28
|
prompt_tokens: response_json.dig("usage", "input_tokens"),
|
|
27
|
-
total_tokens: response_json.dig("usage", "total_tokens")
|
|
29
|
+
total_tokens: response_json.dig("usage", "total_tokens"),
|
|
30
|
+
cache_read_input_tokens: response_json.dig("usage", "input_tokens_details", "cached_tokens")
|
|
28
31
|
)
|
|
29
32
|
end
|
|
30
33
|
|
|
@@ -96,9 +99,13 @@ private
|
|
|
96
99
|
tools = build_tools_parameter(model_completion)
|
|
97
100
|
parameters[:tools] = tools unless tools.blank?
|
|
98
101
|
|
|
99
|
-
if model_completion.tool_choice
|
|
102
|
+
if model_completion.tool_choice == "required"
|
|
103
|
+
parameters[:tool_choice] = build_required_tool_choice
|
|
104
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
105
|
+
elsif model_completion.tool_choice.present?
|
|
100
106
|
tool_klass = model_completion.tool_choice.constantize
|
|
101
107
|
parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
108
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
102
109
|
end
|
|
103
110
|
end
|
|
104
111
|
|