raif 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -7
- data/app/assets/builds/raif.css +4 -1
- data/app/assets/builds/raif_admin.css +52 -2
- data/app/assets/builds/raif_admin_sprockets.js +2709 -0
- data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
- data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
- data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
- data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
- data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
- data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
- data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
- data/app/assets/javascript/raif/controllers/conversations_controller.js +1 -1
- data/app/assets/javascript/raif_admin.js +23 -0
- data/app/assets/javascript/raif_admin_sprockets.js +24 -0
- data/app/assets/stylesheets/raif/admin/conversation.scss +16 -0
- data/app/assets/stylesheets/raif/conversations.scss +3 -0
- data/app/assets/stylesheets/raif.scss +2 -1
- data/app/assets/stylesheets/raif_admin.scss +50 -1
- data/app/controllers/raif/admin/agents_controller.rb +27 -1
- data/app/controllers/raif/admin/application_controller.rb +16 -0
- data/app/controllers/raif/admin/configs_controller.rb +95 -0
- data/app/controllers/raif/admin/llms_controller.rb +27 -0
- data/app/controllers/raif/admin/model_completions_controller.rb +24 -1
- data/app/controllers/raif/admin/model_tool_invocations_controller.rb +7 -1
- data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
- data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
- data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
- data/app/controllers/raif/admin/stats/model_tool_invocations_controller.rb +21 -0
- data/app/controllers/raif/admin/stats/tasks_controller.rb +15 -6
- data/app/controllers/raif/admin/stats_controller.rb +32 -3
- data/app/controllers/raif/admin/tasks_controller.rb +5 -0
- data/app/controllers/raif/conversation_entries_controller.rb +1 -0
- data/app/controllers/raif/conversations_controller.rb +10 -2
- data/app/helpers/raif/application_helper.rb +40 -0
- data/app/jobs/raif/conversation_entry_job.rb +8 -6
- data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
- data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
- data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
- data/app/models/raif/admin/task_stat.rb +7 -0
- data/app/models/raif/agent.rb +98 -6
- data/app/models/raif/agents/native_tool_calling_agent.rb +179 -52
- data/app/models/raif/application_record.rb +18 -0
- data/app/models/raif/concerns/agent_inference_stats.rb +35 -0
- data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
- data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
- data/app/models/raif/concerns/json_schema_definition.rb +54 -6
- data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
- data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +34 -0
- data/app/models/raif/concerns/llms/anthropic/response_tool_calls.rb +24 -0
- data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +8 -0
- data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +43 -0
- data/app/models/raif/concerns/llms/bedrock/response_tool_calls.rb +26 -0
- data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +8 -0
- data/app/models/raif/concerns/llms/google/message_formatting.rb +112 -0
- data/app/models/raif/concerns/llms/google/response_tool_calls.rb +32 -0
- data/app/models/raif/concerns/llms/google/tool_formatting.rb +76 -0
- data/app/models/raif/concerns/llms/message_formatting.rb +41 -5
- data/app/models/raif/concerns/llms/open_ai/json_schema_validation.rb +3 -3
- data/app/models/raif/concerns/llms/open_ai_completions/message_formatting.rb +22 -0
- data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +22 -0
- data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +8 -0
- data/app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb +17 -0
- data/app/models/raif/concerns/llms/open_ai_responses/response_tool_calls.rb +26 -0
- data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +8 -0
- data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
- data/app/models/raif/concerns/run_with.rb +127 -0
- data/app/models/raif/conversation.rb +112 -8
- data/app/models/raif/conversation_entry.rb +38 -4
- data/app/models/raif/embedding_model.rb +2 -1
- data/app/models/raif/embedding_models/bedrock.rb +10 -1
- data/app/models/raif/embedding_models/google.rb +37 -0
- data/app/models/raif/embedding_models/open_ai.rb +1 -1
- data/app/models/raif/evals/llm_judge.rb +70 -0
- data/{lib → app/models}/raif/evals/llm_judges/binary.rb +41 -3
- data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +41 -3
- data/{lib → app/models}/raif/evals/llm_judges/scored.rb +39 -1
- data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +40 -2
- data/app/models/raif/llm.rb +104 -4
- data/app/models/raif/llms/anthropic.rb +32 -22
- data/app/models/raif/llms/bedrock.rb +64 -24
- data/app/models/raif/llms/google.rb +166 -0
- data/app/models/raif/llms/open_ai_base.rb +23 -5
- data/app/models/raif/llms/open_ai_completions.rb +14 -12
- data/app/models/raif/llms/open_ai_responses.rb +14 -17
- data/app/models/raif/llms/open_router.rb +16 -15
- data/app/models/raif/model_completion.rb +103 -1
- data/app/models/raif/model_tool.rb +55 -5
- data/app/models/raif/model_tool_invocation.rb +68 -6
- data/app/models/raif/model_tools/agent_final_answer.rb +2 -7
- data/app/models/raif/model_tools/provider_managed/code_execution.rb +4 -0
- data/app/models/raif/model_tools/provider_managed/image_generation.rb +4 -0
- data/app/models/raif/model_tools/provider_managed/web_search.rb +4 -0
- data/app/models/raif/prompt_studio_batch_run.rb +155 -0
- data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
- data/app/models/raif/streaming_responses/bedrock.rb +60 -1
- data/app/models/raif/streaming_responses/google.rb +71 -0
- data/app/models/raif/task.rb +85 -18
- data/app/models/raif/user_tool_invocation.rb +19 -0
- data/app/views/layouts/raif/admin.html.erb +43 -2
- data/app/views/raif/admin/agents/_agent.html.erb +9 -0
- data/app/views/raif/admin/agents/_conversation_message.html.erb +28 -6
- data/app/views/raif/admin/agents/index.html.erb +50 -0
- data/app/views/raif/admin/agents/show.html.erb +50 -1
- data/app/views/raif/admin/configs/show.html.erb +117 -0
- data/app/views/raif/admin/conversations/_conversation_entry.html.erb +29 -34
- data/app/views/raif/admin/conversations/show.html.erb +2 -0
- data/app/views/raif/admin/llms/index.html.erb +110 -0
- data/app/views/raif/admin/model_completions/_model_completion.html.erb +10 -5
- data/app/views/raif/admin/model_completions/index.html.erb +40 -1
- data/app/views/raif/admin/model_completions/show.html.erb +256 -84
- data/app/views/raif/admin/model_tool_invocations/index.html.erb +22 -1
- data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
- data/app/views/raif/admin/model_tools/_list.html.erb +16 -0
- data/app/views/raif/admin/model_tools/_model_tool.html.erb +36 -0
- data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
- data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
- data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
- data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
- data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
- data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
- data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
- data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
- data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
- data/app/views/raif/admin/stats/_stats_tile.html.erb +34 -0
- data/app/views/raif/admin/stats/index.html.erb +71 -88
- data/app/views/raif/admin/stats/model_tool_invocations/index.html.erb +43 -0
- data/app/views/raif/admin/stats/tasks/index.html.erb +20 -6
- data/app/views/raif/admin/tasks/_task.html.erb +1 -0
- data/app/views/raif/admin/tasks/index.html.erb +23 -6
- data/app/views/raif/admin/tasks/show.html.erb +56 -3
- data/app/views/raif/conversation_entries/_form.html.erb +3 -0
- data/app/views/raif/conversation_entries/_message.html.erb +10 -6
- data/app/views/raif/conversations/_conversation.html.erb +10 -0
- data/app/views/raif/conversations/_entry_processed.turbo_stream.erb +12 -0
- data/app/views/raif/conversations/index.html.erb +23 -0
- data/config/importmap.rb +8 -0
- data/config/locales/admin.en.yml +161 -1
- data/config/locales/en.yml +67 -4
- data/config/routes.rb +10 -0
- data/db/migrate/20250904194456_add_generating_entry_response_to_raif_conversations.rb +7 -0
- data/db/migrate/20250911125234_add_source_to_raif_tasks.rb +7 -0
- data/db/migrate/20251020005853_add_source_to_raif_agents.rb +7 -0
- data/db/migrate/20251020011346_rename_task_run_args_to_run_with.rb +7 -0
- data/db/migrate/20251020011405_add_run_with_to_raif_agents.rb +13 -0
- data/db/migrate/20251024160119_add_llm_messages_max_length_to_raif_conversations.rb +14 -0
- data/db/migrate/20251124185033_add_provider_tool_call_id_to_raif_model_tool_invocations.rb +7 -0
- data/db/migrate/20251128202941_add_tool_choice_to_raif_model_completions.rb +7 -0
- data/db/migrate/20260118144846_add_source_to_raif_conversations.rb +7 -0
- data/db/migrate/20260119000000_add_failure_tracking_to_raif_model_completions.rb +10 -0
- data/db/migrate/20260119000001_add_completed_at_to_raif_model_completions.rb +8 -0
- data/db/migrate/20260119000002_add_started_at_to_raif_model_completions.rb +8 -0
- data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
- data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
- data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
- data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
- data/lib/generators/raif/agent/agent_generator.rb +18 -0
- data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
- data/lib/generators/raif/agent/templates/application_agent.rb.tt +1 -1
- data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
- data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
- data/lib/generators/raif/conversation/templates/conversation.rb.tt +6 -0
- data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
- data/lib/generators/raif/install/templates/initializer.rb +117 -8
- data/lib/generators/raif/task/task_generator.rb +18 -0
- data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
- data/lib/generators/raif/task/templates/task.rb.tt +10 -9
- data/lib/raif/configuration.rb +47 -2
- data/lib/raif/embedding_model_registry.rb +8 -0
- data/lib/raif/engine.rb +24 -1
- data/lib/raif/errors/blank_response_error.rb +8 -0
- data/lib/raif/errors/instance_dependent_schema_error.rb +8 -0
- data/lib/raif/errors/prompt_template_error.rb +15 -0
- data/lib/raif/errors/streaming_error.rb +6 -3
- data/lib/raif/errors.rb +3 -0
- data/lib/raif/evals/run.rb +1 -0
- data/lib/raif/evals.rb +0 -6
- data/lib/raif/json_schema_builder.rb +14 -0
- data/lib/raif/llm_registry.rb +433 -42
- data/lib/raif/messages.rb +180 -0
- data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
- data/lib/raif/token_estimator.rb +28 -0
- data/lib/raif/version.rb +1 -1
- data/lib/raif.rb +11 -0
- data/lib/tasks/annotate_rb.rake +10 -0
- data/spec/support/rspec_helpers.rb +15 -9
- data/spec/support/test_task.rb +9 -0
- data/spec/support/test_template_task.rb +41 -0
- metadata +108 -15
- data/app/models/raif/agents/re_act_agent.rb +0 -127
- data/app/models/raif/agents/re_act_step.rb +0 -32
- data/app/models/raif/concerns/task_run_args.rb +0 -62
- data/lib/raif/evals/llm_judge.rb +0 -32
- /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
|
@@ -3,11 +3,22 @@
|
|
|
3
3
|
class Raif::Llms::Bedrock < Raif::Llm
|
|
4
4
|
include Raif::Concerns::Llms::Bedrock::MessageFormatting
|
|
5
5
|
include Raif::Concerns::Llms::Bedrock::ToolFormatting
|
|
6
|
+
include Raif::Concerns::Llms::Bedrock::ResponseToolCalls
|
|
7
|
+
|
|
8
|
+
def self.prompt_tokens_include_cached_tokens?
|
|
9
|
+
false
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.cache_read_input_token_cost_multiplier
|
|
13
|
+
0.1
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.cache_creation_input_token_cost_multiplier
|
|
17
|
+
1.25
|
|
18
|
+
end
|
|
6
19
|
|
|
7
20
|
def perform_model_completion!(model_completion, &block)
|
|
8
|
-
|
|
9
|
-
model_completion.model_api_name = "#{Raif.config.aws_bedrock_model_name_prefix}.#{model_completion.model_api_name}"
|
|
10
|
-
end
|
|
21
|
+
model_completion.model_api_name = resolve_model_api_name(model_completion.model_api_name)
|
|
11
22
|
|
|
12
23
|
params = build_request_parameters(model_completion)
|
|
13
24
|
|
|
@@ -38,10 +49,29 @@ class Raif::Llms::Bedrock < Raif::Llm
|
|
|
38
49
|
private
|
|
39
50
|
|
|
40
51
|
def bedrock_client
|
|
41
|
-
@bedrock_client ||=
|
|
52
|
+
@bedrock_client ||= begin
|
|
53
|
+
client_options = {
|
|
54
|
+
region: Raif.config.aws_bedrock_region,
|
|
55
|
+
max_attempts: 1
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
|
|
59
|
+
client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
|
|
60
|
+
|
|
61
|
+
Aws::BedrockRuntime::Client.new(client_options)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def retriable_exceptions
|
|
66
|
+
super + [
|
|
67
|
+
Aws::BedrockRuntime::Errors::ServiceError,
|
|
68
|
+
Seahorse::Client::NetworkingError
|
|
69
|
+
]
|
|
42
70
|
end
|
|
43
71
|
|
|
44
72
|
def update_model_completion(model_completion, resp)
|
|
73
|
+
return if resp.nil?
|
|
74
|
+
|
|
45
75
|
model_completion.raw_response = if model_completion.response_format_json?
|
|
46
76
|
extract_json_response(resp)
|
|
47
77
|
else
|
|
@@ -53,6 +83,8 @@ private
|
|
|
53
83
|
model_completion.completion_tokens = resp.usage.output_tokens
|
|
54
84
|
model_completion.prompt_tokens = resp.usage.input_tokens
|
|
55
85
|
model_completion.total_tokens = resp.usage.total_tokens
|
|
86
|
+
model_completion.cache_read_input_tokens = resp.usage.try(:cache_read_input_tokens)
|
|
87
|
+
model_completion.cache_creation_input_tokens = resp.usage.try(:cache_write_input_tokens)
|
|
56
88
|
model_completion.save!
|
|
57
89
|
end
|
|
58
90
|
|
|
@@ -72,6 +104,19 @@ private
|
|
|
72
104
|
if supports_native_tool_use?
|
|
73
105
|
tools = build_tools_parameter(model_completion)
|
|
74
106
|
params[:tool_config] = tools unless tools.blank?
|
|
107
|
+
|
|
108
|
+
if model_completion.tool_choice == "required"
|
|
109
|
+
params[:tool_config][:tool_choice] = build_required_tool_choice
|
|
110
|
+
elsif model_completion.tool_choice.present?
|
|
111
|
+
tool_klass = model_completion.tool_choice.constantize
|
|
112
|
+
params[:tool_config][:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
if model_completion.bedrock_prompt_caching_enabled
|
|
117
|
+
cache_point = { cache_point: { type: "default" } }
|
|
118
|
+
params[:system] << cache_point if params[:system].present?
|
|
119
|
+
messages_param.last[:content] << cache_point.dup if messages_param.last.present?
|
|
75
120
|
end
|
|
76
121
|
|
|
77
122
|
params
|
|
@@ -121,26 +166,6 @@ private
|
|
|
121
166
|
end
|
|
122
167
|
end
|
|
123
168
|
|
|
124
|
-
def extract_response_tool_calls(resp)
|
|
125
|
-
# Get the message from the response object
|
|
126
|
-
message = resp.output.message
|
|
127
|
-
return if message.content.nil?
|
|
128
|
-
|
|
129
|
-
# Find any tool_use blocks in the content array
|
|
130
|
-
tool_uses = message.content.select do |content|
|
|
131
|
-
content.respond_to?(:tool_use) && content.tool_use.present?
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
return if tool_uses.blank?
|
|
135
|
-
|
|
136
|
-
tool_uses.map do |content|
|
|
137
|
-
{
|
|
138
|
-
"name" => content.tool_use.name,
|
|
139
|
-
"arguments" => content.tool_use.input
|
|
140
|
-
}
|
|
141
|
-
end
|
|
142
|
-
end
|
|
143
|
-
|
|
144
169
|
def streaming_chunk_handler(model_completion, &block)
|
|
145
170
|
return unless model_completion.stream_response?
|
|
146
171
|
|
|
@@ -162,4 +187,19 @@ private
|
|
|
162
187
|
end
|
|
163
188
|
end
|
|
164
189
|
|
|
190
|
+
def resolve_model_api_name(model_api_name)
|
|
191
|
+
api_name = model_api_name.to_s
|
|
192
|
+
prefix = Raif.config.aws_bedrock_model_name_prefix.to_s.presence
|
|
193
|
+
|
|
194
|
+
return api_name if prefix.blank?
|
|
195
|
+
return api_name if api_name.start_with?("#{prefix}.")
|
|
196
|
+
|
|
197
|
+
# Some Bedrock model IDs are provider IDs (not inference profile IDs),
|
|
198
|
+
# so they should not be prefixed.
|
|
199
|
+
return api_name if api_name.start_with?("openai.gpt-oss-")
|
|
200
|
+
return api_name if api_name.start_with?("deepseek.")
|
|
201
|
+
|
|
202
|
+
"#{prefix}.#{api_name}"
|
|
203
|
+
end
|
|
204
|
+
|
|
165
205
|
end
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Raif::Llms::Google < Raif::Llm
|
|
4
|
+
include Raif::Concerns::Llms::Google::MessageFormatting
|
|
5
|
+
include Raif::Concerns::Llms::Google::ToolFormatting
|
|
6
|
+
include Raif::Concerns::Llms::Google::ResponseToolCalls
|
|
7
|
+
|
|
8
|
+
def self.cache_read_input_token_cost_multiplier
|
|
9
|
+
0.25
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def perform_model_completion!(model_completion, &block)
|
|
13
|
+
params = build_request_parameters(model_completion)
|
|
14
|
+
endpoint = build_endpoint(model_completion)
|
|
15
|
+
|
|
16
|
+
response = connection.post(endpoint) do |req|
|
|
17
|
+
req.body = params
|
|
18
|
+
req.options.on_data = streaming_chunk_handler(model_completion, &block) if model_completion.stream_response?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
unless model_completion.stream_response?
|
|
22
|
+
update_model_completion(model_completion, response.body)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
model_completion
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def supports_faithful_required_tool_choice?(available_model_tools)
|
|
29
|
+
super && Array(available_model_tools).none? do |tool|
|
|
30
|
+
tool_class = tool.is_a?(String) ? tool.constantize : tool
|
|
31
|
+
tool_class.provider_managed?
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def connection
|
|
38
|
+
@connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
|
|
39
|
+
f.headers["x-goog-api-key"] = Raif.config.google_api_key
|
|
40
|
+
f.request :json
|
|
41
|
+
f.response :json
|
|
42
|
+
f.response :raise_error
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def build_endpoint(model_completion)
|
|
47
|
+
if model_completion.stream_response?
|
|
48
|
+
"models/#{model_completion.model_api_name}:streamGenerateContent?alt=sse"
|
|
49
|
+
else
|
|
50
|
+
"models/#{model_completion.model_api_name}:generateContent"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def streaming_response_type
|
|
55
|
+
Raif::StreamingResponses::Google
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def update_model_completion(model_completion, response_json)
|
|
59
|
+
model_completion.raw_response = if model_completion.response_format_json?
|
|
60
|
+
extract_json_response(response_json)
|
|
61
|
+
else
|
|
62
|
+
extract_text_response(response_json)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
model_completion.response_array = response_json&.dig("candidates", 0, "content", "parts")
|
|
66
|
+
model_completion.response_tool_calls = extract_response_tool_calls(response_json)
|
|
67
|
+
model_completion.citations = extract_citations(response_json)
|
|
68
|
+
model_completion.completion_tokens = response_json&.dig("usageMetadata", "candidatesTokenCount")
|
|
69
|
+
model_completion.prompt_tokens = response_json&.dig("usageMetadata", "promptTokenCount")
|
|
70
|
+
model_completion.total_tokens = response_json&.dig("usageMetadata", "totalTokenCount") ||
|
|
71
|
+
(model_completion.completion_tokens.to_i + model_completion.prompt_tokens.to_i)
|
|
72
|
+
model_completion.cache_read_input_tokens = response_json&.dig("usageMetadata", "cachedContentTokenCount")
|
|
73
|
+
model_completion.save!
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def build_request_parameters(model_completion)
|
|
77
|
+
params = {
|
|
78
|
+
contents: model_completion.messages
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if model_completion.system_prompt.present?
|
|
82
|
+
params[:system_instruction] = { parts: [{ text: model_completion.system_prompt }] }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
params[:generationConfig] = build_generation_config(model_completion)
|
|
86
|
+
|
|
87
|
+
if supports_native_tool_use?
|
|
88
|
+
tools = build_tools_parameter(model_completion)
|
|
89
|
+
params[:tools] = tools unless tools.blank?
|
|
90
|
+
|
|
91
|
+
if model_completion.tool_choice == "required"
|
|
92
|
+
if supports_faithful_required_tool_choice?(model_completion.available_model_tools)
|
|
93
|
+
params[:toolConfig] = { functionCallingConfig: build_required_tool_choice }
|
|
94
|
+
else
|
|
95
|
+
log_required_tool_choice_fallback(model_completion)
|
|
96
|
+
end
|
|
97
|
+
elsif model_completion.tool_choice.present?
|
|
98
|
+
tool_klass = model_completion.tool_choice.constantize
|
|
99
|
+
params[:toolConfig] = { functionCallingConfig: build_forced_tool_choice(tool_klass.tool_name) }
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
params
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def log_required_tool_choice_fallback(model_completion)
|
|
107
|
+
Raif.logger.warn(
|
|
108
|
+
"Google AI cannot faithfully enforce tool_choice: :required when provider-managed tools are present. " \
|
|
109
|
+
"Falling back to runtime validation for #{model_completion.model_api_name} " \
|
|
110
|
+
"(tools: #{model_completion.available_model_tools_map.keys.join(", ")})"
|
|
111
|
+
)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def build_generation_config(model_completion)
|
|
115
|
+
config = {}
|
|
116
|
+
|
|
117
|
+
temperature = model_completion.temperature || default_temperature
|
|
118
|
+
config[:temperature] = temperature.to_f if temperature.present?
|
|
119
|
+
|
|
120
|
+
max_tokens = model_completion.max_completion_tokens || default_max_completion_tokens
|
|
121
|
+
config[:maxOutputTokens] = max_tokens if max_tokens.present?
|
|
122
|
+
|
|
123
|
+
# Use native JSON schema support for structured output
|
|
124
|
+
if model_completion.response_format_json? && model_completion.json_response_schema.present?
|
|
125
|
+
config[:responseMimeType] = "application/json"
|
|
126
|
+
config[:responseSchema] = sanitize_schema_for_google(model_completion.json_response_schema)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
config
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def extract_text_response(resp)
|
|
133
|
+
parts = resp&.dig("candidates", 0, "content", "parts")
|
|
134
|
+
return if parts.blank?
|
|
135
|
+
|
|
136
|
+
parts.select { |p| p.key?("text") }.map { |p| p["text"] }.join
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def extract_json_response(resp)
|
|
140
|
+
# Google AI supports native JSON schema output, so the response should be in the text field
|
|
141
|
+
extract_text_response(resp)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def extract_citations(resp)
|
|
145
|
+
# Google AI returns grounding metadata for search results
|
|
146
|
+
grounding_metadata = resp&.dig("candidates", 0, "groundingMetadata")
|
|
147
|
+
return [] if grounding_metadata.blank?
|
|
148
|
+
|
|
149
|
+
citations = []
|
|
150
|
+
|
|
151
|
+
# Extract from grounding chunks
|
|
152
|
+
grounding_chunks = grounding_metadata["groundingChunks"] || []
|
|
153
|
+
grounding_chunks.each do |chunk|
|
|
154
|
+
web = chunk["web"]
|
|
155
|
+
next unless web.present?
|
|
156
|
+
|
|
157
|
+
citations << {
|
|
158
|
+
"url" => Raif::Utils::HtmlFragmentProcessor.strip_tracking_parameters(web["uri"]),
|
|
159
|
+
"title" => web["title"]
|
|
160
|
+
}
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
citations.uniq { |citation| citation["url"] }
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
end
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
class Raif::Llms::OpenAiBase < Raif::Llm
|
|
4
4
|
include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
|
|
5
5
|
|
|
6
|
+
def self.cache_read_input_token_cost_multiplier
|
|
7
|
+
0.5
|
|
8
|
+
end
|
|
9
|
+
|
|
6
10
|
def perform_model_completion!(model_completion, &block)
|
|
7
11
|
if supports_temperature?
|
|
8
12
|
model_completion.temperature ||= default_temperature
|
|
@@ -28,11 +32,25 @@ class Raif::Llms::OpenAiBase < Raif::Llm
|
|
|
28
32
|
private
|
|
29
33
|
|
|
30
34
|
def connection
|
|
31
|
-
@connection ||=
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
35
|
+
@connection ||= begin
|
|
36
|
+
conn = Faraday.new(url: Raif.config.open_ai_base_url, request: Raif.default_request_options) do |f|
|
|
37
|
+
case Raif.config.open_ai_auth_header_style
|
|
38
|
+
when :bearer
|
|
39
|
+
f.headers["Authorization"] = "Bearer #{Raif.config.open_ai_api_key}"
|
|
40
|
+
when :api_key
|
|
41
|
+
f.headers["api-key"] = Raif.config.open_ai_api_key
|
|
42
|
+
else
|
|
43
|
+
raise Raif::Errors::InvalidConfigError,
|
|
44
|
+
"Raif.config.open_ai_auth_header_style must be either :bearer or :api_key"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
f.request :json
|
|
48
|
+
f.response :json
|
|
49
|
+
f.response :raise_error
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
conn.params["api-version"] = Raif.config.open_ai_api_version if Raif.config.open_ai_api_version.present?
|
|
53
|
+
conn
|
|
36
54
|
end
|
|
37
55
|
end
|
|
38
56
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
class Raif::Llms::OpenAiCompletions < Raif::Llms::OpenAiBase
|
|
4
4
|
include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
|
|
5
5
|
include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
|
|
6
|
+
include Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
|
|
6
7
|
|
|
7
8
|
private
|
|
8
9
|
|
|
@@ -15,6 +16,8 @@ private
|
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def update_model_completion(model_completion, response_json)
|
|
19
|
+
return if response_json.nil?
|
|
20
|
+
|
|
18
21
|
model_completion.update!(
|
|
19
22
|
response_id: response_json["id"],
|
|
20
23
|
response_tool_calls: extract_response_tool_calls(response_json),
|
|
@@ -22,21 +25,11 @@ private
|
|
|
22
25
|
response_array: response_json["choices"],
|
|
23
26
|
completion_tokens: response_json.dig("usage", "completion_tokens"),
|
|
24
27
|
prompt_tokens: response_json.dig("usage", "prompt_tokens"),
|
|
25
|
-
total_tokens: response_json.dig("usage", "total_tokens")
|
|
28
|
+
total_tokens: response_json.dig("usage", "total_tokens"),
|
|
29
|
+
cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
|
|
26
30
|
)
|
|
27
31
|
end
|
|
28
32
|
|
|
29
|
-
def extract_response_tool_calls(resp)
|
|
30
|
-
return if resp.dig("choices", 0, "message", "tool_calls").blank?
|
|
31
|
-
|
|
32
|
-
resp.dig("choices", 0, "message", "tool_calls").map do |tool_call|
|
|
33
|
-
{
|
|
34
|
-
"name" => tool_call["function"]["name"],
|
|
35
|
-
"arguments" => JSON.parse(tool_call["function"]["arguments"])
|
|
36
|
-
}
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
|
|
40
33
|
def build_request_parameters(model_completion)
|
|
41
34
|
formatted_system_prompt = format_system_prompt(model_completion)
|
|
42
35
|
|
|
@@ -60,6 +53,15 @@ private
|
|
|
60
53
|
if supports_native_tool_use?
|
|
61
54
|
tools = build_tools_parameter(model_completion)
|
|
62
55
|
parameters[:tools] = tools unless tools.blank?
|
|
56
|
+
|
|
57
|
+
if model_completion.tool_choice == "required"
|
|
58
|
+
parameters[:tool_choice] = build_required_tool_choice
|
|
59
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
60
|
+
elsif model_completion.tool_choice.present?
|
|
61
|
+
tool_klass = model_completion.tool_choice.constantize
|
|
62
|
+
parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
63
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
64
|
+
end
|
|
63
65
|
end
|
|
64
66
|
|
|
65
67
|
if model_completion.stream_response?
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
class Raif::Llms::OpenAiResponses < Raif::Llms::OpenAiBase
|
|
4
4
|
include Raif::Concerns::Llms::OpenAiResponses::MessageFormatting
|
|
5
5
|
include Raif::Concerns::Llms::OpenAiResponses::ToolFormatting
|
|
6
|
+
include Raif::Concerns::Llms::OpenAiResponses::ResponseToolCalls
|
|
6
7
|
|
|
7
8
|
private
|
|
8
9
|
|
|
@@ -15,6 +16,8 @@ private
|
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def update_model_completion(model_completion, response_json)
|
|
19
|
+
return if response_json.nil?
|
|
20
|
+
|
|
18
21
|
model_completion.update!(
|
|
19
22
|
response_id: response_json["id"],
|
|
20
23
|
response_tool_calls: extract_response_tool_calls(response_json),
|
|
@@ -23,26 +26,11 @@ private
|
|
|
23
26
|
citations: extract_citations(response_json),
|
|
24
27
|
completion_tokens: response_json.dig("usage", "output_tokens"),
|
|
25
28
|
prompt_tokens: response_json.dig("usage", "input_tokens"),
|
|
26
|
-
total_tokens: response_json.dig("usage", "total_tokens")
|
|
29
|
+
total_tokens: response_json.dig("usage", "total_tokens"),
|
|
30
|
+
cache_read_input_tokens: response_json.dig("usage", "input_tokens_details", "cached_tokens")
|
|
27
31
|
)
|
|
28
32
|
end
|
|
29
33
|
|
|
30
|
-
def extract_response_tool_calls(resp)
|
|
31
|
-
return if resp["output"].blank?
|
|
32
|
-
|
|
33
|
-
tool_calls = []
|
|
34
|
-
resp["output"].each do |output_item|
|
|
35
|
-
next unless output_item["type"] == "function_call"
|
|
36
|
-
|
|
37
|
-
tool_calls << {
|
|
38
|
-
"name" => output_item["name"],
|
|
39
|
-
"arguments" => JSON.parse(output_item["arguments"])
|
|
40
|
-
}
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
tool_calls.any? ? tool_calls : nil
|
|
44
|
-
end
|
|
45
|
-
|
|
46
34
|
def extract_raw_response(resp)
|
|
47
35
|
text_outputs = []
|
|
48
36
|
|
|
@@ -110,6 +98,15 @@ private
|
|
|
110
98
|
if supports_native_tool_use?
|
|
111
99
|
tools = build_tools_parameter(model_completion)
|
|
112
100
|
parameters[:tools] = tools unless tools.blank?
|
|
101
|
+
|
|
102
|
+
if model_completion.tool_choice == "required"
|
|
103
|
+
parameters[:tool_choice] = build_required_tool_choice
|
|
104
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
105
|
+
elsif model_completion.tool_choice.present?
|
|
106
|
+
tool_klass = model_completion.tool_choice.constantize
|
|
107
|
+
parameters[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
108
|
+
parameters[:parallel_tool_calls] = false unless tools.blank?
|
|
109
|
+
end
|
|
113
110
|
end
|
|
114
111
|
|
|
115
112
|
# Add response format if needed. Default will be { "type": "text" }
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
class Raif::Llms::OpenRouter < Raif::Llm
|
|
4
4
|
include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
|
|
5
5
|
include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
|
|
6
|
+
include Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
|
|
6
7
|
include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
|
|
7
8
|
|
|
8
9
|
def perform_model_completion!(model_completion, &block)
|
|
@@ -23,7 +24,7 @@ class Raif::Llms::OpenRouter < Raif::Llm
|
|
|
23
24
|
private
|
|
24
25
|
|
|
25
26
|
def connection
|
|
26
|
-
@connection ||= Faraday.new(url: "https://openrouter.ai/api/v1") do |f|
|
|
27
|
+
@connection ||= Faraday.new(url: "https://openrouter.ai/api/v1", request: Raif.default_request_options) do |f|
|
|
27
28
|
f.headers["Authorization"] = "Bearer #{Raif.config.open_router_api_key}"
|
|
28
29
|
f.headers["HTTP-Referer"] = Raif.config.open_router_site_url if Raif.config.open_router_site_url.present?
|
|
29
30
|
f.headers["X-Title"] = Raif.config.open_router_app_name if Raif.config.open_router_app_name.present?
|
|
@@ -38,6 +39,8 @@ private
|
|
|
38
39
|
end
|
|
39
40
|
|
|
40
41
|
def update_model_completion(model_completion, response_json)
|
|
42
|
+
return if response_json.nil?
|
|
43
|
+
|
|
41
44
|
raw_response = if model_completion.response_format_json?
|
|
42
45
|
extract_json_response(response_json)
|
|
43
46
|
else
|
|
@@ -51,7 +54,8 @@ private
|
|
|
51
54
|
response_array: response_json["choices"],
|
|
52
55
|
completion_tokens: response_json.dig("usage", "completion_tokens"),
|
|
53
56
|
prompt_tokens: response_json.dig("usage", "prompt_tokens"),
|
|
54
|
-
total_tokens: response_json.dig("usage", "total_tokens")
|
|
57
|
+
total_tokens: response_json.dig("usage", "total_tokens"),
|
|
58
|
+
cache_read_input_tokens: response_json.dig("usage", "prompt_tokens_details", "cached_tokens")
|
|
55
59
|
)
|
|
56
60
|
end
|
|
57
61
|
|
|
@@ -85,6 +89,15 @@ private
|
|
|
85
89
|
end
|
|
86
90
|
|
|
87
91
|
params[:tools] = tools unless tools.blank?
|
|
92
|
+
|
|
93
|
+
if model_completion.tool_choice == "required"
|
|
94
|
+
params[:tool_choice] = build_required_tool_choice
|
|
95
|
+
params[:parallel_tool_calls] = false unless tools.blank?
|
|
96
|
+
elsif model_completion.tool_choice.present?
|
|
97
|
+
tool_klass = model_completion.tool_choice.constantize
|
|
98
|
+
params[:tool_choice] = build_forced_tool_choice(tool_klass.tool_name)
|
|
99
|
+
params[:parallel_tool_calls] = false unless tools.blank?
|
|
100
|
+
end
|
|
88
101
|
end
|
|
89
102
|
|
|
90
103
|
if model_completion.stream_response?
|
|
@@ -108,7 +121,7 @@ private
|
|
|
108
121
|
end
|
|
109
122
|
|
|
110
123
|
def extract_json_response(resp)
|
|
111
|
-
tool_calls = resp
|
|
124
|
+
tool_calls = resp&.dig("choices", 0, "message", "tool_calls")
|
|
112
125
|
return extract_text_response(resp) if tool_calls.blank?
|
|
113
126
|
|
|
114
127
|
tool_response = tool_calls.find do |tool_call|
|
|
@@ -121,16 +134,4 @@ private
|
|
|
121
134
|
extract_text_response(resp)
|
|
122
135
|
end
|
|
123
136
|
end
|
|
124
|
-
|
|
125
|
-
def extract_response_tool_calls(resp)
|
|
126
|
-
tool_calls = resp.dig("choices", 0, "message", "tool_calls")
|
|
127
|
-
return if tool_calls.blank?
|
|
128
|
-
|
|
129
|
-
tool_calls.map do |tool_call|
|
|
130
|
-
{
|
|
131
|
-
"name" => tool_call["function"]["name"],
|
|
132
|
-
"arguments" => JSON.parse(tool_call["function"]["arguments"])
|
|
133
|
-
}
|
|
134
|
-
end
|
|
135
|
-
end
|
|
136
137
|
end
|
|
@@ -1,14 +1,74 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# == Schema Information
|
|
4
|
+
#
|
|
5
|
+
# Table name: raif_model_completions
|
|
6
|
+
#
|
|
7
|
+
# id :bigint not null, primary key
|
|
8
|
+
# available_model_tools :jsonb not null
|
|
9
|
+
# cache_creation_input_tokens :integer
|
|
10
|
+
# cache_read_input_tokens :integer
|
|
11
|
+
# citations :jsonb
|
|
12
|
+
# completed_at :datetime
|
|
13
|
+
# completion_tokens :integer
|
|
14
|
+
# failed_at :datetime
|
|
15
|
+
# failure_error :string
|
|
16
|
+
# failure_reason :text
|
|
17
|
+
# llm_model_key :string not null
|
|
18
|
+
# max_completion_tokens :integer
|
|
19
|
+
# messages :jsonb not null
|
|
20
|
+
# model_api_name :string not null
|
|
21
|
+
# output_token_cost :decimal(10, 6)
|
|
22
|
+
# prompt_token_cost :decimal(10, 6)
|
|
23
|
+
# prompt_tokens :integer
|
|
24
|
+
# raw_response :text
|
|
25
|
+
# response_array :jsonb
|
|
26
|
+
# response_format :integer default("text"), not null
|
|
27
|
+
# response_format_parameter :string
|
|
28
|
+
# response_tool_calls :jsonb
|
|
29
|
+
# retry_count :integer default(0), not null
|
|
30
|
+
# source_type :string
|
|
31
|
+
# started_at :datetime
|
|
32
|
+
# stream_response :boolean default(FALSE), not null
|
|
33
|
+
# system_prompt :text
|
|
34
|
+
# temperature :decimal(5, 3)
|
|
35
|
+
# tool_choice :string
|
|
36
|
+
# total_cost :decimal(10, 6)
|
|
37
|
+
# total_tokens :integer
|
|
38
|
+
# created_at :datetime not null
|
|
39
|
+
# updated_at :datetime not null
|
|
40
|
+
# response_id :string
|
|
41
|
+
# source_id :bigint
|
|
42
|
+
#
|
|
43
|
+
# Indexes
|
|
44
|
+
#
|
|
45
|
+
# index_raif_model_completions_on_completed_at (completed_at)
|
|
46
|
+
# index_raif_model_completions_on_created_at (created_at)
|
|
47
|
+
# index_raif_model_completions_on_failed_at (failed_at)
|
|
48
|
+
# index_raif_model_completions_on_source (source_type,source_id)
|
|
49
|
+
# index_raif_model_completions_on_started_at (started_at)
|
|
50
|
+
#
|
|
3
51
|
class Raif::ModelCompletion < Raif::ApplicationRecord
|
|
4
52
|
include Raif::Concerns::LlmResponseParsing
|
|
5
53
|
include Raif::Concerns::HasAvailableModelTools
|
|
54
|
+
include Raif::Concerns::HasRuntimeDuration
|
|
55
|
+
include Raif::Concerns::ProviderManagedToolCalls
|
|
56
|
+
include Raif::Concerns::BooleanTimestamp
|
|
57
|
+
|
|
58
|
+
attr_accessor :anthropic_prompt_caching_enabled, :bedrock_prompt_caching_enabled
|
|
59
|
+
|
|
60
|
+
boolean_timestamp :started_at
|
|
61
|
+
boolean_timestamp :completed_at
|
|
62
|
+
boolean_timestamp :failed_at
|
|
6
63
|
|
|
7
64
|
belongs_to :source, polymorphic: true, optional: true
|
|
8
65
|
|
|
9
66
|
validates :llm_model_key, presence: true, inclusion: { in: ->{ Raif.available_llm_keys.map(&:to_s) } }
|
|
10
67
|
validates :model_api_name, presence: true
|
|
11
68
|
|
|
69
|
+
# Scope to find completions that have response tool calls
|
|
70
|
+
scope :with_response_tool_calls, -> { where_json_not_blank(:response_tool_calls) }
|
|
71
|
+
|
|
12
72
|
delegate :json_response_schema, to: :source, allow_nil: true
|
|
13
73
|
|
|
14
74
|
before_save :set_total_tokens
|
|
@@ -28,8 +88,12 @@ class Raif::ModelCompletion < Raif::ApplicationRecord
|
|
|
28
88
|
end
|
|
29
89
|
|
|
30
90
|
def calculate_costs
|
|
91
|
+
# Each retry resends the same prompt, so the provider charges input tokens
|
|
92
|
+
# for every attempt. Factor in retry_count to reflect actual billing.
|
|
93
|
+
total_attempts = (retry_count || 0) + 1
|
|
94
|
+
|
|
31
95
|
if prompt_tokens.present? && llm_config[:input_token_cost].present?
|
|
32
|
-
self.prompt_token_cost =
|
|
96
|
+
self.prompt_token_cost = calculate_prompt_token_cost(total_attempts)
|
|
33
97
|
end
|
|
34
98
|
|
|
35
99
|
if completion_tokens.present? && llm_config[:output_token_cost].present?
|
|
@@ -41,8 +105,46 @@ class Raif::ModelCompletion < Raif::ApplicationRecord
|
|
|
41
105
|
end
|
|
42
106
|
end
|
|
43
107
|
|
|
108
|
+
def record_failure!(exception)
|
|
109
|
+
self.failed_at = Time.current
|
|
110
|
+
self.failure_error = exception.class.name
|
|
111
|
+
self.failure_reason = exception.message.truncate(255)
|
|
112
|
+
save!
|
|
113
|
+
end
|
|
114
|
+
|
|
44
115
|
private
|
|
45
116
|
|
|
117
|
+
def calculate_prompt_token_cost(total_attempts)
|
|
118
|
+
input_cost = llm_config[:input_token_cost]
|
|
119
|
+
llm_class = llm_config[:llm_class]
|
|
120
|
+
cache_read_multiplier = llm_class&.cache_read_input_token_cost_multiplier
|
|
121
|
+
cache_creation_multiplier = llm_class&.cache_creation_input_token_cost_multiplier
|
|
122
|
+
cached_reads = cache_read_input_tokens.to_i
|
|
123
|
+
cached_writes = cache_creation_input_tokens.to_i
|
|
124
|
+
|
|
125
|
+
if cached_reads > 0 && cache_read_multiplier.present?
|
|
126
|
+
cache_read_cost = input_cost * cache_read_multiplier
|
|
127
|
+
|
|
128
|
+
if llm_class.prompt_tokens_include_cached_tokens?
|
|
129
|
+
# OpenAI / Google / OpenRouter: cached tokens are a subset of prompt_tokens
|
|
130
|
+
non_cached = prompt_tokens - cached_reads
|
|
131
|
+
cost = (non_cached * input_cost) + (cached_reads * cache_read_cost)
|
|
132
|
+
else
|
|
133
|
+
# Anthropic / Bedrock: cached tokens are separate from prompt_tokens
|
|
134
|
+
cost = (prompt_tokens * input_cost) + (cached_reads * cache_read_cost)
|
|
135
|
+
end
|
|
136
|
+
else
|
|
137
|
+
cost = prompt_tokens * input_cost
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Cache creation surcharge (Anthropic / Bedrock)
|
|
141
|
+
if cached_writes > 0 && cache_creation_multiplier.present?
|
|
142
|
+
cost += cached_writes * input_cost * cache_creation_multiplier
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
cost * total_attempts
|
|
146
|
+
end
|
|
147
|
+
|
|
46
148
|
def llm_config
|
|
47
149
|
@llm_config ||= Raif.llm_config(llm_model_key.to_sym)
|
|
48
150
|
end
|