raif 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/app/assets/builds/raif_admin.css +40 -2
- data/app/assets/builds/raif_admin_sprockets.js +2709 -0
- data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
- data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
- data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
- data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
- data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
- data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
- data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
- data/app/assets/javascript/raif_admin.js +23 -0
- data/app/assets/javascript/raif_admin_sprockets.js +24 -0
- data/app/assets/stylesheets/raif_admin.scss +50 -1
- data/app/controllers/raif/admin/agents_controller.rb +27 -1
- data/app/controllers/raif/admin/configs_controller.rb +1 -0
- data/app/controllers/raif/admin/llms_controller.rb +27 -0
- data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
- data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
- data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
- data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
- data/app/controllers/raif/admin/tasks_controller.rb +5 -0
- data/app/helpers/raif/application_helper.rb +40 -0
- data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
- data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
- data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
- data/app/models/raif/agent.rb +36 -5
- data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
- data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
- data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
- data/app/models/raif/concerns/json_schema_definition.rb +16 -3
- data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
- data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
- data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
- data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
- data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
- data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
- data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
- data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
- data/app/models/raif/conversation.rb +24 -3
- data/app/models/raif/conversation_entry.rb +6 -3
- data/app/models/raif/embedding_models/bedrock.rb +10 -1
- data/app/models/raif/embedding_models/google.rb +37 -0
- data/app/models/raif/evals/llm_judge.rb +70 -0
- data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
- data/app/models/raif/llm.rb +82 -7
- data/app/models/raif/llms/anthropic.rb +26 -4
- data/app/models/raif/llms/bedrock.rb +59 -5
- data/app/models/raif/llms/google.rb +28 -2
- data/app/models/raif/llms/open_ai_base.rb +4 -0
- data/app/models/raif/llms/open_ai_completions.rb +9 -2
- data/app/models/raif/llms/open_ai_responses.rb +9 -2
- data/app/models/raif/llms/open_router.rb +10 -3
- data/app/models/raif/model_completion.rb +75 -34
- data/app/models/raif/model_tool.rb +45 -3
- data/app/models/raif/model_tool_invocation.rb +31 -1
- data/app/models/raif/prompt_studio_batch_run.rb +155 -0
- data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
- data/app/models/raif/streaming_responses/bedrock.rb +60 -1
- data/app/models/raif/task.rb +30 -6
- data/app/views/layouts/raif/admin.html.erb +31 -1
- data/app/views/raif/admin/agents/_agent.html.erb +1 -0
- data/app/views/raif/admin/agents/index.html.erb +48 -0
- data/app/views/raif/admin/agents/show.html.erb +4 -0
- data/app/views/raif/admin/llms/index.html.erb +110 -0
- data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
- data/app/views/raif/admin/model_completions/index.html.erb +14 -1
- data/app/views/raif/admin/model_completions/show.html.erb +164 -55
- data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
- data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
- data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
- data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
- data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
- data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
- data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
- data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
- data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
- data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
- data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
- data/app/views/raif/admin/tasks/_task.html.erb +1 -0
- data/app/views/raif/admin/tasks/index.html.erb +17 -5
- data/app/views/raif/admin/tasks/show.html.erb +20 -0
- data/app/views/raif/conversation_entries/_message.html.erb +10 -6
- data/config/importmap.rb +8 -0
- data/config/locales/admin.en.yml +128 -0
- data/config/locales/en.yml +36 -2
- data/config/routes.rb +8 -0
- data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
- data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
- data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
- data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
- data/lib/generators/raif/agent/agent_generator.rb +18 -0
- data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
- data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
- data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
- data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
- data/lib/generators/raif/install/templates/initializer.rb +68 -27
- data/lib/generators/raif/task/task_generator.rb +18 -0
- data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
- data/lib/generators/raif/task/templates/task.rb.tt +9 -8
- data/lib/raif/configuration.rb +10 -0
- data/lib/raif/embedding_model_registry.rb +8 -0
- data/lib/raif/engine.rb +16 -1
- data/lib/raif/errors/blank_response_error.rb +8 -0
- data/lib/raif/errors/prompt_template_error.rb +15 -0
- data/lib/raif/errors.rb +2 -0
- data/lib/raif/evals.rb +0 -6
- data/lib/raif/llm_registry.rb +230 -9
- data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
- data/lib/raif/token_estimator.rb +28 -0
- data/lib/raif/version.rb +1 -1
- data/lib/raif.rb +2 -0
- data/spec/support/rspec_helpers.rb +7 -1
- data/spec/support/test_task.rb +9 -0
- data/spec/support/test_template_task.rb +41 -0
- metadata +65 -7
- data/lib/raif/evals/llm_judge.rb +0 -32
- /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Raif::Concerns::HasRuntimeDuration
|
|
4
|
+
extend ActiveSupport::Concern
|
|
5
|
+
|
|
6
|
+
def runtime_ended_at
|
|
7
|
+
completed_at || failed_at
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def runtime_duration_seconds
|
|
11
|
+
return if started_at.blank? || runtime_ended_at.blank?
|
|
12
|
+
|
|
13
|
+
duration_in_seconds = runtime_ended_at - started_at
|
|
14
|
+
return if duration_in_seconds.negative?
|
|
15
|
+
|
|
16
|
+
duration_in_seconds
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def runtime_duration
|
|
20
|
+
duration_in_seconds = runtime_duration_seconds
|
|
21
|
+
return "-" if duration_in_seconds.nil?
|
|
22
|
+
|
|
23
|
+
if duration_in_seconds < 1
|
|
24
|
+
"#{(duration_in_seconds * 1000).round}ms"
|
|
25
|
+
elsif duration_in_seconds < 60
|
|
26
|
+
seconds = (duration_in_seconds * 100).round / 100.0
|
|
27
|
+
"#{seconds.to_s.sub(/\.0+\z/, "").sub(/(\.\d*[1-9])0+\z/, "\\1")}s"
|
|
28
|
+
else
|
|
29
|
+
total_seconds = duration_in_seconds.round
|
|
30
|
+
hours = total_seconds / 3600
|
|
31
|
+
minutes = (total_seconds % 3600) / 60
|
|
32
|
+
seconds = total_seconds % 60
|
|
33
|
+
|
|
34
|
+
parts = []
|
|
35
|
+
parts << "#{hours}h" if hours.positive?
|
|
36
|
+
parts << "#{minutes}m" if minutes.positive? || hours.positive?
|
|
37
|
+
parts << "#{seconds}s"
|
|
38
|
+
parts.join(" ")
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -6,7 +6,7 @@ module Raif
|
|
|
6
6
|
extend ActiveSupport::Concern
|
|
7
7
|
|
|
8
8
|
class_methods do
|
|
9
|
-
def json_schema_definition(schema_name, &block)
|
|
9
|
+
def json_schema_definition(schema_name, dynamic: false, &block)
|
|
10
10
|
raise ArgumentError, "A block must be provided to define the JSON schema" unless block_given?
|
|
11
11
|
|
|
12
12
|
# Check if block expects an instance parameter (arity == 1)
|
|
@@ -16,6 +16,10 @@ module Raif
|
|
|
16
16
|
# Store block for instance-dependent schema building
|
|
17
17
|
@schema_blocks ||= {}
|
|
18
18
|
@schema_blocks[schema_name] = block
|
|
19
|
+
elsif dynamic
|
|
20
|
+
# Store block for class-level dynamic schema (re-evaluated each call)
|
|
21
|
+
@dynamic_schema_blocks ||= {}
|
|
22
|
+
@dynamic_schema_blocks[schema_name] = block
|
|
19
23
|
else
|
|
20
24
|
# Build schema immediately for class-level (backward compatible)
|
|
21
25
|
@schemas ||= {}
|
|
@@ -25,7 +29,9 @@ module Raif
|
|
|
25
29
|
end
|
|
26
30
|
|
|
27
31
|
def schema_defined?(schema_name)
|
|
28
|
-
@schemas&.dig(schema_name).present? ||
|
|
32
|
+
@schemas&.dig(schema_name).present? ||
|
|
33
|
+
@schema_blocks&.dig(schema_name).present? ||
|
|
34
|
+
@dynamic_schema_blocks&.dig(schema_name).present?
|
|
29
35
|
end
|
|
30
36
|
|
|
31
37
|
def schema_for(schema_name)
|
|
@@ -36,6 +42,13 @@ module Raif
|
|
|
36
42
|
"Call this method on an instance instead."
|
|
37
43
|
end
|
|
38
44
|
|
|
45
|
+
# Check if this is a dynamic schema (re-evaluate each call)
|
|
46
|
+
if @dynamic_schema_blocks&.dig(schema_name).present?
|
|
47
|
+
builder = Raif::JsonSchemaBuilder.new
|
|
48
|
+
builder.instance_eval(&@dynamic_schema_blocks[schema_name])
|
|
49
|
+
return builder.to_schema
|
|
50
|
+
end
|
|
51
|
+
|
|
39
52
|
@schemas[schema_name].to_schema
|
|
40
53
|
end
|
|
41
54
|
|
|
@@ -54,7 +67,7 @@ module Raif
|
|
|
54
67
|
builder.build_with_instance(self, &block)
|
|
55
68
|
builder.to_schema
|
|
56
69
|
elsif self.class.schema_defined?(schema_name)
|
|
57
|
-
# Fall back to class-level schema
|
|
70
|
+
# Fall back to class-level schema (handles both static and dynamic)
|
|
58
71
|
self.class.schema_for(schema_name)
|
|
59
72
|
end
|
|
60
73
|
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Raif::Concerns::LlmPromptCaching
|
|
4
|
+
extend ActiveSupport::Concern
|
|
5
|
+
|
|
6
|
+
included do
|
|
7
|
+
class_attribute :anthropic_prompt_caching_enabled, instance_writer: false, default: false
|
|
8
|
+
class_attribute :bedrock_prompt_caching_enabled, instance_writer: false, default: false
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class_methods do
|
|
12
|
+
def enable_anthropic_prompt_caching
|
|
13
|
+
self.anthropic_prompt_caching_enabled = true
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def enable_bedrock_prompt_caching
|
|
17
|
+
self.bedrock_prompt_caching_enabled = true
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -3,6 +3,12 @@
|
|
|
3
3
|
module Raif::Concerns::Llms::Anthropic::MessageFormatting
|
|
4
4
|
extend ActiveSupport::Concern
|
|
5
5
|
|
|
6
|
+
def format_messages(messages)
|
|
7
|
+
# Anthropic tool results come back as user-role content blocks, so conversation
|
|
8
|
+
# continuations may need adjacent user messages collapsed after formatting.
|
|
9
|
+
consolidate_consecutive_role_messages(super, content_key: "content")
|
|
10
|
+
end
|
|
11
|
+
|
|
6
12
|
def format_model_image_input_message(image_input)
|
|
7
13
|
if image_input.source_type == :url
|
|
8
14
|
{
|
|
@@ -55,6 +55,10 @@ module Raif::Concerns::Llms::Anthropic::ToolFormatting
|
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
def build_forced_tool_choice(tool_name)
|
|
58
|
-
{ "type" => "tool", "name" => tool_name }
|
|
58
|
+
{ "type" => "tool", "name" => tool_name, "disable_parallel_tool_use" => true }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def build_required_tool_choice
|
|
62
|
+
{ "type" => "any", "disable_parallel_tool_use" => true }
|
|
59
63
|
end
|
|
60
64
|
end
|
|
@@ -3,6 +3,13 @@
|
|
|
3
3
|
module Raif::Concerns::Llms::Bedrock::MessageFormatting
|
|
4
4
|
extend ActiveSupport::Concern
|
|
5
5
|
|
|
6
|
+
def format_messages(messages)
|
|
7
|
+
# Bedrock tool results are represented as user-role content blocks, so a
|
|
8
|
+
# tool_result followed by the next user prompt must be merged into one user
|
|
9
|
+
# message before sending it to the provider.
|
|
10
|
+
consolidate_consecutive_role_messages(super, content_key: "content")
|
|
11
|
+
end
|
|
12
|
+
|
|
6
13
|
def format_string_message(content, role: nil)
|
|
7
14
|
{ "text" => content }
|
|
8
15
|
end
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
module Raif::Concerns::Llms::Google::MessageFormatting
|
|
4
4
|
extend ActiveSupport::Concern
|
|
5
5
|
|
|
6
|
-
#
|
|
6
|
+
# Google uses a different envelope ("parts") and also represents tool results as
|
|
7
|
+
# user-role messages, so we normalize adjacent same-role messages after formatting.
|
|
7
8
|
def format_messages(messages)
|
|
8
|
-
messages.map do |message|
|
|
9
|
+
formatted_messages = messages.map do |message|
|
|
9
10
|
if message.is_a?(Hash) && message["type"] == "tool_call"
|
|
10
11
|
format_tool_call_message(message)
|
|
11
12
|
elsif message.is_a?(Hash) && message["type"] == "tool_call_result"
|
|
@@ -20,6 +21,8 @@ module Raif::Concerns::Llms::Google::MessageFormatting
|
|
|
20
21
|
}
|
|
21
22
|
end
|
|
22
23
|
end
|
|
24
|
+
|
|
25
|
+
consolidate_consecutive_role_messages(formatted_messages, content_key: "parts")
|
|
23
26
|
end
|
|
24
27
|
|
|
25
28
|
def format_string_message(content, role: nil)
|
|
@@ -49,6 +49,10 @@ module Raif::Concerns::Llms::Google::ToolFormatting
|
|
|
49
49
|
{ mode: "ANY", allowedFunctionNames: [tool_name] }
|
|
50
50
|
end
|
|
51
51
|
|
|
52
|
+
def build_required_tool_choice
|
|
53
|
+
{ mode: "ANY" }
|
|
54
|
+
end
|
|
55
|
+
|
|
52
56
|
private
|
|
53
57
|
|
|
54
58
|
# Google's API doesn't support additionalProperties in JSON schemas
|
|
@@ -45,4 +45,34 @@ module Raif::Concerns::Llms::MessageFormatting
|
|
|
45
45
|
{ "type" => "text", "text" => content }
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
def consolidate_consecutive_role_messages(messages, content_key:)
|
|
49
|
+
# Bedrock, Anthropic, and Google all model tool results as normal role-based
|
|
50
|
+
# message content blocks. After formatting, a tool result can therefore be a
|
|
51
|
+
# "user" message immediately followed by the next user turn. Those providers
|
|
52
|
+
# expect alternating roles, so their adapters collapse adjacent same-role blocks.
|
|
53
|
+
return messages if messages.size <= 1
|
|
54
|
+
|
|
55
|
+
messages.each_with_object([]) do |message, consolidated|
|
|
56
|
+
candidate = message.deep_dup
|
|
57
|
+
previous_message = consolidated.last
|
|
58
|
+
|
|
59
|
+
if mergeable_consecutive_role_messages?(previous_message, candidate, content_key:)
|
|
60
|
+
previous_message[content_key] += candidate[content_key]
|
|
61
|
+
else
|
|
62
|
+
consolidated << candidate
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
private
|
|
68
|
+
|
|
69
|
+
def mergeable_consecutive_role_messages?(previous_message, message, content_key:)
|
|
70
|
+
previous_message.is_a?(Hash) &&
|
|
71
|
+
message.is_a?(Hash) &&
|
|
72
|
+
previous_message["role"].present? &&
|
|
73
|
+
previous_message["role"] == message["role"] &&
|
|
74
|
+
previous_message[content_key].is_a?(Array) &&
|
|
75
|
+
message[content_key].is_a?(Array)
|
|
76
|
+
end
|
|
77
|
+
|
|
48
78
|
end
|
|
@@ -4,7 +4,7 @@ module Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
|
|
|
4
4
|
extend ActiveSupport::Concern
|
|
5
5
|
|
|
6
6
|
def extract_response_tool_calls(resp)
|
|
7
|
-
tool_calls = resp
|
|
7
|
+
tool_calls = resp&.dig("choices", 0, "message", "tool_calls")
|
|
8
8
|
return if tool_calls.blank?
|
|
9
9
|
|
|
10
10
|
tool_calls.map do |tool_call|
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Raif::Concerns::ProviderManagedToolCalls
|
|
4
|
+
extend ActiveSupport::Concern
|
|
5
|
+
|
|
6
|
+
# Provider-managed tool data is not normalized by the provider SDKs the same
|
|
7
|
+
# way developer-managed tool calls are. This method smooths those differences
|
|
8
|
+
# into one admin-friendly structure for the model completion page.
|
|
9
|
+
def provider_managed_tool_calls
|
|
10
|
+
# Memoized for repeated reads during a request/render. This assumes the
|
|
11
|
+
# completion's response payload is not mutated after first access.
|
|
12
|
+
@provider_managed_tool_calls ||= begin
|
|
13
|
+
tool_calls = extract_provider_managed_tool_calls
|
|
14
|
+
tool_calls = inferred_provider_managed_tool_calls if tool_calls.empty?
|
|
15
|
+
|
|
16
|
+
tool_calls.map do |tool_call|
|
|
17
|
+
next tool_call unless tool_call["tool_name"] == "web_search"
|
|
18
|
+
|
|
19
|
+
# Search sources can come from explicit provider result blocks
|
|
20
|
+
# (Anthropic) or from top-level citations (OpenAI / Google), so we
|
|
21
|
+
# merge both.
|
|
22
|
+
tool_call.merge("sources" => merge_provider_managed_sources(tool_call["sources"], citations))
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Returns citations with URLs sanitized to only allow http/https schemes.
|
|
28
|
+
def sanitized_citations
|
|
29
|
+
@sanitized_citations ||= Array(citations).map do |citation|
|
|
30
|
+
url = citation["url"]
|
|
31
|
+
safe_url = url.present? && url.match?(%r{\Ahttps?://}i) ? url : nil
|
|
32
|
+
citation.merge("url" => safe_url)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def extract_provider_managed_tool_calls
|
|
39
|
+
response_blocks = Array(response_array).select { |block| block.is_a?(Hash) }
|
|
40
|
+
result_blocks_by_tool_use_id = response_blocks.each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |block, hash|
|
|
41
|
+
next if block["tool_use_id"].blank?
|
|
42
|
+
|
|
43
|
+
hash[block["tool_use_id"]] << block
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
response_blocks.filter_map do |block|
|
|
47
|
+
case block["type"]
|
|
48
|
+
when "server_tool_use"
|
|
49
|
+
# Anthropic stores the tool invocation in one block and the result in a
|
|
50
|
+
# separate block keyed by `tool_use_id`.
|
|
51
|
+
build_provider_managed_server_tool_call(block, result_blocks_by_tool_use_id)
|
|
52
|
+
when "web_search_call", "web_search_preview",
|
|
53
|
+
"code_interpreter_call", "code_interpreter",
|
|
54
|
+
"image_generation_call", "image_generation"
|
|
55
|
+
# OpenAI Responses persists provider-managed calls as top-level typed
|
|
56
|
+
# blocks like `web_search_call`, `code_interpreter`, etc.
|
|
57
|
+
build_provider_managed_tool_call_from_type(block)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def build_provider_managed_server_tool_call(block, result_blocks_by_tool_use_id)
|
|
63
|
+
tool_name = normalize_provider_managed_tool_name(block["name"])
|
|
64
|
+
return unless provider_managed_tool_available?(tool_name)
|
|
65
|
+
|
|
66
|
+
raw_result = result_blocks_by_tool_use_id[block["id"]].presence
|
|
67
|
+
{
|
|
68
|
+
"tool_name" => tool_name,
|
|
69
|
+
"provider_tool_call_id" => block["id"],
|
|
70
|
+
"status" => block["status"],
|
|
71
|
+
"arguments" => block["input"].presence,
|
|
72
|
+
"sources" => extract_provider_managed_sources(raw_result),
|
|
73
|
+
"raw_result" => raw_result,
|
|
74
|
+
"inferred" => false
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def build_provider_managed_tool_call_from_type(block)
|
|
79
|
+
tool_name = normalize_provider_managed_tool_name(block["type"])
|
|
80
|
+
return unless provider_managed_tool_available?(tool_name)
|
|
81
|
+
|
|
82
|
+
payload = block.except("id", "type", "status").presence
|
|
83
|
+
{
|
|
84
|
+
"tool_name" => tool_name,
|
|
85
|
+
"provider_tool_call_id" => block["id"],
|
|
86
|
+
"status" => block["status"],
|
|
87
|
+
"arguments" => payload,
|
|
88
|
+
"sources" => [],
|
|
89
|
+
"raw_result" => payload,
|
|
90
|
+
"inferred" => false
|
|
91
|
+
}
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def inferred_provider_managed_tool_calls
|
|
95
|
+
# Google currently gives us citations for provider-managed web search, but
|
|
96
|
+
# not a first-class tool call block in `response_array`, so we infer a
|
|
97
|
+
# single search invocation when web search was available and citations exist.
|
|
98
|
+
return [] unless provider_managed_tool_available?("web_search") && citations.present?
|
|
99
|
+
|
|
100
|
+
[{
|
|
101
|
+
"tool_name" => "web_search",
|
|
102
|
+
"provider_tool_call_id" => nil,
|
|
103
|
+
"status" => "completed",
|
|
104
|
+
"arguments" => nil,
|
|
105
|
+
"sources" => merge_provider_managed_sources([], citations),
|
|
106
|
+
"raw_result" => nil,
|
|
107
|
+
"inferred" => true
|
|
108
|
+
}]
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def extract_provider_managed_sources(result_blocks)
|
|
112
|
+
Array(result_blocks).flat_map do |result_block|
|
|
113
|
+
Array(result_block["content"]).filter_map do |content_block|
|
|
114
|
+
next unless content_block.is_a?(Hash) && content_block["type"] == "web_search_result"
|
|
115
|
+
|
|
116
|
+
{
|
|
117
|
+
"title" => content_block["title"],
|
|
118
|
+
"url" => normalize_provider_managed_source_url(content_block["url"]),
|
|
119
|
+
"page_age" => content_block["page_age"]
|
|
120
|
+
}.compact
|
|
121
|
+
end
|
|
122
|
+
end.uniq { |source| source["url"].presence || source["title"] }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def merge_provider_managed_sources(existing_sources, extra_sources)
|
|
126
|
+
(Array(existing_sources) + Array(extra_sources)).filter_map do |source|
|
|
127
|
+
next unless source.is_a?(Hash)
|
|
128
|
+
|
|
129
|
+
{
|
|
130
|
+
"title" => source["title"],
|
|
131
|
+
"url" => normalize_provider_managed_source_url(source["url"]),
|
|
132
|
+
"page_age" => source["page_age"]
|
|
133
|
+
}.compact.presence
|
|
134
|
+
end.uniq { |source| source["url"].presence || source["title"] }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def normalize_provider_managed_tool_name(name)
|
|
138
|
+
case name.to_s
|
|
139
|
+
when "web_search", "web_search_call", "web_search_preview"
|
|
140
|
+
"web_search"
|
|
141
|
+
when "code_execution", "code_interpreter", "code_interpreter_call"
|
|
142
|
+
"code_execution"
|
|
143
|
+
when "image_generation", "image_generation_call"
|
|
144
|
+
"image_generation"
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def provider_managed_tool_available?(tool_name)
|
|
149
|
+
return false if tool_name.blank?
|
|
150
|
+
|
|
151
|
+
available_model_tools_map[tool_name]&.provider_managed?
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def normalize_provider_managed_source_url(url)
|
|
155
|
+
return if url.blank?
|
|
156
|
+
|
|
157
|
+
url = Raif::Utils::HtmlFragmentProcessor.strip_tracking_parameters(url)
|
|
158
|
+
return unless url.match?(%r{\Ahttps?://}i)
|
|
159
|
+
|
|
160
|
+
url
|
|
161
|
+
end
|
|
162
|
+
end
|
|
@@ -29,10 +29,13 @@
|
|
|
29
29
|
# index_raif_conversations_on_source (source_type,source_id)
|
|
30
30
|
#
|
|
31
31
|
class Raif::Conversation < Raif::ApplicationRecord
|
|
32
|
+
prepend Raif::Concerns::HasPromptTemplates
|
|
33
|
+
|
|
32
34
|
include Raif::Concerns::HasLlm
|
|
33
35
|
include Raif::Concerns::HasRequestedLanguage
|
|
34
36
|
include Raif::Concerns::HasAvailableModelTools
|
|
35
37
|
include Raif::Concerns::LlmResponseParsing
|
|
38
|
+
include Raif::Concerns::LlmPromptCaching
|
|
36
39
|
|
|
37
40
|
belongs_to :creator, polymorphic: true
|
|
38
41
|
belongs_to :source, polymorphic: true, optional: true
|
|
@@ -103,6 +106,8 @@ class Raif::Conversation < Raif::ApplicationRecord
|
|
|
103
106
|
response_format: response_format.to_sym,
|
|
104
107
|
system_prompt: system_prompt,
|
|
105
108
|
available_model_tools: available_model_tools,
|
|
109
|
+
anthropic_prompt_caching_enabled: self.class.anthropic_prompt_caching_enabled,
|
|
110
|
+
bedrock_prompt_caching_enabled: self.class.bedrock_prompt_caching_enabled,
|
|
106
111
|
&block
|
|
107
112
|
)
|
|
108
113
|
|
|
@@ -153,15 +158,18 @@ class Raif::Conversation < Raif::ApplicationRecord
|
|
|
153
158
|
tool_invocations = entry.raif_model_tool_invocations.to_a
|
|
154
159
|
|
|
155
160
|
if tool_invocations.any?
|
|
156
|
-
# First tool call includes the assistant's message (if any)
|
|
161
|
+
# First tool call includes the assistant's message (if any).
|
|
162
|
+
# For the result payload we send the model-facing observation when the tool
|
|
163
|
+
# opts into observations, while keeping the raw invocation.result persisted
|
|
164
|
+
# for admin/UI rendering.
|
|
157
165
|
first_invocation = tool_invocations.shift
|
|
158
166
|
messages << first_invocation.as_tool_call_message(assistant_message: entry.model_response_message.presence)
|
|
159
|
-
messages << first_invocation.as_tool_call_result_message
|
|
167
|
+
messages << first_invocation.as_tool_call_result_message(result: tool_result_for_llm(first_invocation))
|
|
160
168
|
|
|
161
169
|
# Remaining tool calls (if multiple)
|
|
162
170
|
tool_invocations.each do |tool_invocation|
|
|
163
171
|
messages << tool_invocation.as_tool_call_message
|
|
164
|
-
messages << tool_invocation.as_tool_call_result_message
|
|
172
|
+
messages << tool_invocation.as_tool_call_result_message(result: tool_result_for_llm(tool_invocation))
|
|
165
173
|
end
|
|
166
174
|
elsif entry.model_response_message.present?
|
|
167
175
|
# No tool calls, just a regular assistant response
|
|
@@ -176,4 +184,17 @@ class Raif::Conversation < Raif::ApplicationRecord
|
|
|
176
184
|
available_user_tools.map(&:constantize)
|
|
177
185
|
end
|
|
178
186
|
|
|
187
|
+
private
|
|
188
|
+
|
|
189
|
+
def tool_result_for_llm(tool_invocation)
|
|
190
|
+
# Some tools persist a compact structured result for display/admin purposes but
|
|
191
|
+
# need to send richer text/XML back to the model for the continuation turn.
|
|
192
|
+
return tool_invocation.result unless tool_invocation.triggers_observation_to_model?
|
|
193
|
+
|
|
194
|
+
tool = tool_invocation.tool
|
|
195
|
+
return tool_invocation.result unless tool.respond_to?(:observation_for_invocation)
|
|
196
|
+
|
|
197
|
+
tool.observation_for_invocation(tool_invocation).presence || tool_invocation.result
|
|
198
|
+
end
|
|
199
|
+
|
|
179
200
|
end
|
|
@@ -59,8 +59,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
|
|
|
59
59
|
def add_user_tool_invocation_to_user_message
|
|
60
60
|
return unless raif_user_tool_invocation.present?
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
self.user_message = [user_message, raif_user_tool_invocation.as_user_message].join(separator)
|
|
62
|
+
self.user_message = [user_message, raif_user_tool_invocation.as_user_message].join("\n\n")
|
|
64
63
|
end
|
|
65
64
|
|
|
66
65
|
def response_format
|
|
@@ -74,7 +73,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
|
|
|
74
73
|
def process_entry!
|
|
75
74
|
self.model_response_message = ""
|
|
76
75
|
|
|
77
|
-
|
|
76
|
+
model_completion = raif_conversation.prompt_model_for_entry_response(entry: self) do |model_completion, _delta, _sse_event|
|
|
78
77
|
self.raw_response = model_completion.raw_response
|
|
79
78
|
self.model_response_message = raif_conversation.process_model_response_message(
|
|
80
79
|
message: model_completion.parsed_response(force_reparse: true),
|
|
@@ -90,6 +89,10 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
|
|
|
90
89
|
broadcast_replace_to raif_conversation
|
|
91
90
|
end
|
|
92
91
|
|
|
92
|
+
# Failed prompt attempts can still persist a model completion for debugging.
|
|
93
|
+
# Avoid clearing the has_one association with nil, which would delete that row.
|
|
94
|
+
self.raif_model_completion = model_completion if model_completion.present?
|
|
95
|
+
|
|
93
96
|
if raif_model_completion.present? && (raif_model_completion.parsed_response.present? || raif_model_completion.response_tool_calls.present?)
|
|
94
97
|
extract_message_and_invoke_tools!
|
|
95
98
|
create_entry_for_observation! if triggers_observation_to_model?
|
|
@@ -29,6 +29,15 @@ private
|
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def bedrock_client
|
|
32
|
-
@bedrock_client ||=
|
|
32
|
+
@bedrock_client ||= begin
|
|
33
|
+
client_options = {
|
|
34
|
+
region: Raif.config.aws_bedrock_region
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
|
|
38
|
+
client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
|
|
39
|
+
|
|
40
|
+
Aws::BedrockRuntime::Client.new(client_options)
|
|
41
|
+
end
|
|
33
42
|
end
|
|
34
43
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Raif::EmbeddingModels::Google < Raif::EmbeddingModel
|
|
4
|
+
def generate_embedding!(input, dimensions: nil)
|
|
5
|
+
unless input.is_a?(String)
|
|
6
|
+
raise ArgumentError, "Raif::EmbeddingModels::Google#generate_embedding! input must be a string"
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
response = connection.post("models/#{api_name}:embedContent") do |req|
|
|
10
|
+
req.body = build_request_parameters(input, dimensions:)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
response.body.dig("embedding", "values")
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def build_request_parameters(input, dimensions: nil)
|
|
19
|
+
params = {
|
|
20
|
+
content: {
|
|
21
|
+
parts: [{ text: input }]
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
params[:outputDimensionality] = dimensions if dimensions.present?
|
|
26
|
+
params
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def connection
|
|
30
|
+
@connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
|
|
31
|
+
f.headers["x-goog-api-key"] = Raif.config.google_api_key
|
|
32
|
+
f.request :json
|
|
33
|
+
f.response :json
|
|
34
|
+
f.response :raise_error
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# == Schema Information
|
|
4
|
+
#
|
|
5
|
+
# Table name: raif_tasks
|
|
6
|
+
#
|
|
7
|
+
# id :bigint not null, primary key
|
|
8
|
+
# available_model_tools :jsonb not null
|
|
9
|
+
# completed_at :datetime
|
|
10
|
+
# creator_type :string
|
|
11
|
+
# failed_at :datetime
|
|
12
|
+
# llm_model_key :string not null
|
|
13
|
+
# prompt :text
|
|
14
|
+
# prompt_studio_run :boolean default(FALSE), not null
|
|
15
|
+
# raw_response :text
|
|
16
|
+
# requested_language_key :string
|
|
17
|
+
# response_format :integer default("text"), not null
|
|
18
|
+
# run_with :jsonb
|
|
19
|
+
# source_type :string
|
|
20
|
+
# started_at :datetime
|
|
21
|
+
# system_prompt :text
|
|
22
|
+
# type :string not null
|
|
23
|
+
# created_at :datetime not null
|
|
24
|
+
# updated_at :datetime not null
|
|
25
|
+
# creator_id :bigint
|
|
26
|
+
# source_id :bigint
|
|
27
|
+
#
|
|
28
|
+
# Indexes
|
|
29
|
+
#
|
|
30
|
+
# index_raif_tasks_on_completed_at (completed_at)
|
|
31
|
+
# index_raif_tasks_on_created_at (created_at)
|
|
32
|
+
# index_raif_tasks_on_creator (creator_type,creator_id)
|
|
33
|
+
# index_raif_tasks_on_failed_at (failed_at)
|
|
34
|
+
# index_raif_tasks_on_source (source_type,source_id)
|
|
35
|
+
# index_raif_tasks_on_started_at (started_at)
|
|
36
|
+
# index_raif_tasks_on_type (type)
|
|
37
|
+
# index_raif_tasks_on_type_and_completed_at (type,completed_at)
|
|
38
|
+
# index_raif_tasks_on_type_and_failed_at (type,failed_at)
|
|
39
|
+
# index_raif_tasks_on_type_and_started_at (type,started_at)
|
|
40
|
+
#
|
|
41
|
+
module Raif
|
|
42
|
+
module Evals
|
|
43
|
+
class LlmJudge < Raif::Task
|
|
44
|
+
# Set default temperature for consistent judging
|
|
45
|
+
llm_temperature 0.0
|
|
46
|
+
|
|
47
|
+
# Default to JSON response format for structured output
|
|
48
|
+
llm_response_format :json
|
|
49
|
+
|
|
50
|
+
run_with :content_to_judge # the content to judge
|
|
51
|
+
run_with :additional_context # additional context to be provided to the judge
|
|
52
|
+
|
|
53
|
+
def default_llm_model_key
|
|
54
|
+
Raif.config.evals_default_llm_judge_model_key || super
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def judgment_reasoning
|
|
58
|
+
parsed_response["reasoning"] if completed?
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def judgment_confidence
|
|
62
|
+
parsed_response["confidence"] if completed?
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def low_confidence?
|
|
66
|
+
judgment_confidence && judgment_confidence < 0.5
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|