raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/app/assets/builds/raif_admin.css +40 -2
  4. data/app/assets/builds/raif_admin_sprockets.js +2709 -0
  5. data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
  6. data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
  7. data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
  8. data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
  9. data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
  10. data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
  11. data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
  12. data/app/assets/javascript/raif_admin.js +23 -0
  13. data/app/assets/javascript/raif_admin_sprockets.js +24 -0
  14. data/app/assets/stylesheets/raif_admin.scss +50 -1
  15. data/app/controllers/raif/admin/agents_controller.rb +27 -1
  16. data/app/controllers/raif/admin/configs_controller.rb +1 -0
  17. data/app/controllers/raif/admin/llms_controller.rb +27 -0
  18. data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
  19. data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
  20. data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
  21. data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
  22. data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
  23. data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
  24. data/app/controllers/raif/admin/tasks_controller.rb +5 -0
  25. data/app/helpers/raif/application_helper.rb +40 -0
  26. data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
  27. data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
  28. data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
  29. data/app/models/raif/agent.rb +36 -5
  30. data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
  31. data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
  32. data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
  33. data/app/models/raif/concerns/json_schema_definition.rb +16 -3
  34. data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
  35. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
  36. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
  37. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
  38. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
  39. data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
  40. data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
  41. data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
  42. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
  43. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
  44. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
  45. data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
  46. data/app/models/raif/conversation.rb +24 -3
  47. data/app/models/raif/conversation_entry.rb +6 -3
  48. data/app/models/raif/embedding_models/bedrock.rb +10 -1
  49. data/app/models/raif/embedding_models/google.rb +37 -0
  50. data/app/models/raif/evals/llm_judge.rb +70 -0
  51. data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
  52. data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
  53. data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
  54. data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
  55. data/app/models/raif/llm.rb +82 -7
  56. data/app/models/raif/llms/anthropic.rb +26 -4
  57. data/app/models/raif/llms/bedrock.rb +59 -5
  58. data/app/models/raif/llms/google.rb +28 -2
  59. data/app/models/raif/llms/open_ai_base.rb +4 -0
  60. data/app/models/raif/llms/open_ai_completions.rb +9 -2
  61. data/app/models/raif/llms/open_ai_responses.rb +9 -2
  62. data/app/models/raif/llms/open_router.rb +10 -3
  63. data/app/models/raif/model_completion.rb +75 -34
  64. data/app/models/raif/model_tool.rb +45 -3
  65. data/app/models/raif/model_tool_invocation.rb +31 -1
  66. data/app/models/raif/prompt_studio_batch_run.rb +155 -0
  67. data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
  68. data/app/models/raif/streaming_responses/bedrock.rb +60 -1
  69. data/app/models/raif/task.rb +30 -6
  70. data/app/views/layouts/raif/admin.html.erb +31 -1
  71. data/app/views/raif/admin/agents/_agent.html.erb +1 -0
  72. data/app/views/raif/admin/agents/index.html.erb +48 -0
  73. data/app/views/raif/admin/agents/show.html.erb +4 -0
  74. data/app/views/raif/admin/llms/index.html.erb +110 -0
  75. data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
  76. data/app/views/raif/admin/model_completions/index.html.erb +14 -1
  77. data/app/views/raif/admin/model_completions/show.html.erb +164 -55
  78. data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
  79. data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
  80. data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
  81. data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
  82. data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
  83. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
  84. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
  85. data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
  86. data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
  87. data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
  88. data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
  89. data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
  90. data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
  91. data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
  92. data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
  93. data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
  94. data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
  95. data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
  96. data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
  97. data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
  98. data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
  99. data/app/views/raif/admin/tasks/_task.html.erb +1 -0
  100. data/app/views/raif/admin/tasks/index.html.erb +17 -5
  101. data/app/views/raif/admin/tasks/show.html.erb +20 -0
  102. data/app/views/raif/conversation_entries/_message.html.erb +10 -6
  103. data/config/importmap.rb +8 -0
  104. data/config/locales/admin.en.yml +128 -0
  105. data/config/locales/en.yml +36 -2
  106. data/config/routes.rb +8 -0
  107. data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
  108. data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
  109. data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
  110. data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
  111. data/lib/generators/raif/agent/agent_generator.rb +18 -0
  112. data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
  113. data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
  114. data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
  115. data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
  116. data/lib/generators/raif/install/templates/initializer.rb +68 -27
  117. data/lib/generators/raif/task/task_generator.rb +18 -0
  118. data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
  119. data/lib/generators/raif/task/templates/task.rb.tt +9 -8
  120. data/lib/raif/configuration.rb +10 -0
  121. data/lib/raif/embedding_model_registry.rb +8 -0
  122. data/lib/raif/engine.rb +16 -1
  123. data/lib/raif/errors/blank_response_error.rb +8 -0
  124. data/lib/raif/errors/prompt_template_error.rb +15 -0
  125. data/lib/raif/errors.rb +2 -0
  126. data/lib/raif/evals.rb +0 -6
  127. data/lib/raif/llm_registry.rb +230 -9
  128. data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
  129. data/lib/raif/token_estimator.rb +28 -0
  130. data/lib/raif/version.rb +1 -1
  131. data/lib/raif.rb +2 -0
  132. data/spec/support/rspec_helpers.rb +7 -1
  133. data/spec/support/test_task.rb +9 -0
  134. data/spec/support/test_template_task.rb +41 -0
  135. metadata +65 -7
  136. data/lib/raif/evals/llm_judge.rb +0 -32
  137. /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif::Concerns::HasRuntimeDuration
4
+ extend ActiveSupport::Concern
5
+
6
+ def runtime_ended_at
7
+ completed_at || failed_at
8
+ end
9
+
10
+ def runtime_duration_seconds
11
+ return if started_at.blank? || runtime_ended_at.blank?
12
+
13
+ duration_in_seconds = runtime_ended_at - started_at
14
+ return if duration_in_seconds.negative?
15
+
16
+ duration_in_seconds
17
+ end
18
+
19
+ def runtime_duration
20
+ duration_in_seconds = runtime_duration_seconds
21
+ return "-" if duration_in_seconds.nil?
22
+
23
+ if duration_in_seconds < 1
24
+ "#{(duration_in_seconds * 1000).round}ms"
25
+ elsif duration_in_seconds < 60
26
+ seconds = (duration_in_seconds * 100).round / 100.0
27
+ "#{seconds.to_s.sub(/\.0+\z/, "").sub(/(\.\d*[1-9])0+\z/, "\\1")}s"
28
+ else
29
+ total_seconds = duration_in_seconds.round
30
+ hours = total_seconds / 3600
31
+ minutes = (total_seconds % 3600) / 60
32
+ seconds = total_seconds % 60
33
+
34
+ parts = []
35
+ parts << "#{hours}h" if hours.positive?
36
+ parts << "#{minutes}m" if minutes.positive? || hours.positive?
37
+ parts << "#{seconds}s"
38
+ parts.join(" ")
39
+ end
40
+ end
41
+ end
@@ -6,7 +6,7 @@ module Raif
6
6
  extend ActiveSupport::Concern
7
7
 
8
8
  class_methods do
9
- def json_schema_definition(schema_name, &block)
9
+ def json_schema_definition(schema_name, dynamic: false, &block)
10
10
  raise ArgumentError, "A block must be provided to define the JSON schema" unless block_given?
11
11
 
12
12
  # Check if block expects an instance parameter (arity == 1)
@@ -16,6 +16,10 @@ module Raif
16
16
  # Store block for instance-dependent schema building
17
17
  @schema_blocks ||= {}
18
18
  @schema_blocks[schema_name] = block
19
+ elsif dynamic
20
+ # Store block for class-level dynamic schema (re-evaluated each call)
21
+ @dynamic_schema_blocks ||= {}
22
+ @dynamic_schema_blocks[schema_name] = block
19
23
  else
20
24
  # Build schema immediately for class-level (backward compatible)
21
25
  @schemas ||= {}
@@ -25,7 +29,9 @@ module Raif
25
29
  end
26
30
 
27
31
  def schema_defined?(schema_name)
28
- @schemas&.dig(schema_name).present? || @schema_blocks&.dig(schema_name).present?
32
+ @schemas&.dig(schema_name).present? ||
33
+ @schema_blocks&.dig(schema_name).present? ||
34
+ @dynamic_schema_blocks&.dig(schema_name).present?
29
35
  end
30
36
 
31
37
  def schema_for(schema_name)
@@ -36,6 +42,13 @@ module Raif
36
42
  "Call this method on an instance instead."
37
43
  end
38
44
 
45
+ # Check if this is a dynamic schema (re-evaluate each call)
46
+ if @dynamic_schema_blocks&.dig(schema_name).present?
47
+ builder = Raif::JsonSchemaBuilder.new
48
+ builder.instance_eval(&@dynamic_schema_blocks[schema_name])
49
+ return builder.to_schema
50
+ end
51
+
39
52
  @schemas[schema_name].to_schema
40
53
  end
41
54
 
@@ -54,7 +67,7 @@ module Raif
54
67
  builder.build_with_instance(self, &block)
55
68
  builder.to_schema
56
69
  elsif self.class.schema_defined?(schema_name)
57
- # Fall back to class-level schema
70
+ # Fall back to class-level schema (handles both static and dynamic)
58
71
  self.class.schema_for(schema_name)
59
72
  end
60
73
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif::Concerns::LlmPromptCaching
4
+ extend ActiveSupport::Concern
5
+
6
+ included do
7
+ class_attribute :anthropic_prompt_caching_enabled, instance_writer: false, default: false
8
+ class_attribute :bedrock_prompt_caching_enabled, instance_writer: false, default: false
9
+ end
10
+
11
+ class_methods do
12
+ def enable_anthropic_prompt_caching
13
+ self.anthropic_prompt_caching_enabled = true
14
+ end
15
+
16
+ def enable_bedrock_prompt_caching
17
+ self.bedrock_prompt_caching_enabled = true
18
+ end
19
+ end
20
+ end
@@ -3,6 +3,12 @@
3
3
  module Raif::Concerns::Llms::Anthropic::MessageFormatting
4
4
  extend ActiveSupport::Concern
5
5
 
6
+ def format_messages(messages)
7
+ # Anthropic tool results come back as user-role content blocks, so conversation
8
+ # continuations may need adjacent user messages collapsed after formatting.
9
+ consolidate_consecutive_role_messages(super, content_key: "content")
10
+ end
11
+
6
12
  def format_model_image_input_message(image_input)
7
13
  if image_input.source_type == :url
8
14
  {
@@ -55,6 +55,10 @@ module Raif::Concerns::Llms::Anthropic::ToolFormatting
55
55
  end
56
56
 
57
57
  def build_forced_tool_choice(tool_name)
58
- { "type" => "tool", "name" => tool_name }
58
+ { "type" => "tool", "name" => tool_name, "disable_parallel_tool_use" => true }
59
+ end
60
+
61
+ def build_required_tool_choice
62
+ { "type" => "any", "disable_parallel_tool_use" => true }
59
63
  end
60
64
  end
@@ -3,6 +3,13 @@
3
3
  module Raif::Concerns::Llms::Bedrock::MessageFormatting
4
4
  extend ActiveSupport::Concern
5
5
 
6
+ def format_messages(messages)
7
+ # Bedrock tool results are represented as user-role content blocks, so a
8
+ # tool_result followed by the next user prompt must be merged into one user
9
+ # message before sending it to the provider.
10
+ consolidate_consecutive_role_messages(super, content_key: "content")
11
+ end
12
+
6
13
  def format_string_message(content, role: nil)
7
14
  { "text" => content }
8
15
  end
@@ -38,4 +38,8 @@ module Raif::Concerns::Llms::Bedrock::ToolFormatting
38
38
  def build_forced_tool_choice(tool_name)
39
39
  { tool: { name: tool_name } }
40
40
  end
41
+
42
+ def build_required_tool_choice
43
+ { any: {} }
44
+ end
41
45
  end
@@ -3,9 +3,10 @@
3
3
  module Raif::Concerns::Llms::Google::MessageFormatting
4
4
  extend ActiveSupport::Concern
5
5
 
6
- # Override the base format_messages to use Google's message format
6
+ # Google uses a different envelope ("parts") and also represents tool results as
7
+ # user-role messages, so we normalize adjacent same-role messages after formatting.
7
8
  def format_messages(messages)
8
- messages.map do |message|
9
+ formatted_messages = messages.map do |message|
9
10
  if message.is_a?(Hash) && message["type"] == "tool_call"
10
11
  format_tool_call_message(message)
11
12
  elsif message.is_a?(Hash) && message["type"] == "tool_call_result"
@@ -20,6 +21,8 @@ module Raif::Concerns::Llms::Google::MessageFormatting
20
21
  }
21
22
  end
22
23
  end
24
+
25
+ consolidate_consecutive_role_messages(formatted_messages, content_key: "parts")
23
26
  end
24
27
 
25
28
  def format_string_message(content, role: nil)
@@ -49,6 +49,10 @@ module Raif::Concerns::Llms::Google::ToolFormatting
49
49
  { mode: "ANY", allowedFunctionNames: [tool_name] }
50
50
  end
51
51
 
52
+ def build_required_tool_choice
53
+ { mode: "ANY" }
54
+ end
55
+
52
56
  private
53
57
 
54
58
  # Google's API doesn't support additionalProperties in JSON schemas
@@ -45,4 +45,34 @@ module Raif::Concerns::Llms::MessageFormatting
45
45
  { "type" => "text", "text" => content }
46
46
  end
47
47
 
48
+ def consolidate_consecutive_role_messages(messages, content_key:)
49
+ # Bedrock, Anthropic, and Google all model tool results as normal role-based
50
+ # message content blocks. After formatting, a tool result can therefore be a
51
+ # "user" message immediately followed by the next user turn. Those providers
52
+ # expect alternating roles, so their adapters collapse adjacent same-role blocks.
53
+ return messages if messages.size <= 1
54
+
55
+ messages.each_with_object([]) do |message, consolidated|
56
+ candidate = message.deep_dup
57
+ previous_message = consolidated.last
58
+
59
+ if mergeable_consecutive_role_messages?(previous_message, candidate, content_key:)
60
+ previous_message[content_key] += candidate[content_key]
61
+ else
62
+ consolidated << candidate
63
+ end
64
+ end
65
+ end
66
+
67
+ private
68
+
69
+ def mergeable_consecutive_role_messages?(previous_message, message, content_key:)
70
+ previous_message.is_a?(Hash) &&
71
+ message.is_a?(Hash) &&
72
+ previous_message["role"].present? &&
73
+ previous_message["role"] == message["role"] &&
74
+ previous_message[content_key].is_a?(Array) &&
75
+ message[content_key].is_a?(Array)
76
+ end
77
+
48
78
  end
@@ -4,7 +4,7 @@ module Raif::Concerns::Llms::OpenAiCompletions::ResponseToolCalls
4
4
  extend ActiveSupport::Concern
5
5
 
6
6
  def extract_response_tool_calls(resp)
7
- tool_calls = resp.dig("choices", 0, "message", "tool_calls")
7
+ tool_calls = resp&.dig("choices", 0, "message", "tool_calls")
8
8
  return if tool_calls.blank?
9
9
 
10
10
  tool_calls.map do |tool_call|
@@ -27,4 +27,8 @@ module Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
27
27
  def build_forced_tool_choice(tool_name)
28
28
  { "type" => "function", "function" => { "name" => tool_name } }
29
29
  end
30
+
31
+ def build_required_tool_choice
32
+ "required"
33
+ end
30
34
  end
@@ -43,4 +43,8 @@ module Raif::Concerns::Llms::OpenAiResponses::ToolFormatting
43
43
  def build_forced_tool_choice(tool_name)
44
44
  { "type" => "function", "name" => tool_name }
45
45
  end
46
+
47
+ def build_required_tool_choice
48
+ "required"
49
+ end
46
50
  end
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif::Concerns::ProviderManagedToolCalls
4
+ extend ActiveSupport::Concern
5
+
6
+ # Provider-managed tool data is not normalized by the provider SDKs the same
7
+ # way developer-managed tool calls are. This method smooths those differences
8
+ # into one admin-friendly structure for the model completion page.
9
+ def provider_managed_tool_calls
10
+ # Memoized for repeated reads during a request/render. This assumes the
11
+ # completion's response payload is not mutated after first access.
12
+ @provider_managed_tool_calls ||= begin
13
+ tool_calls = extract_provider_managed_tool_calls
14
+ tool_calls = inferred_provider_managed_tool_calls if tool_calls.empty?
15
+
16
+ tool_calls.map do |tool_call|
17
+ next tool_call unless tool_call["tool_name"] == "web_search"
18
+
19
+ # Search sources can come from explicit provider result blocks
20
+ # (Anthropic) or from top-level citations (OpenAI / Google), so we
21
+ # merge both.
22
+ tool_call.merge("sources" => merge_provider_managed_sources(tool_call["sources"], citations))
23
+ end
24
+ end
25
+ end
26
+
27
+ # Returns citations with URLs sanitized to only allow http/https schemes.
28
+ def sanitized_citations
29
+ @sanitized_citations ||= Array(citations).map do |citation|
30
+ url = citation["url"]
31
+ safe_url = url.present? && url.match?(%r{\Ahttps?://}i) ? url : nil
32
+ citation.merge("url" => safe_url)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def extract_provider_managed_tool_calls
39
+ response_blocks = Array(response_array).select { |block| block.is_a?(Hash) }
40
+ result_blocks_by_tool_use_id = response_blocks.each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |block, hash|
41
+ next if block["tool_use_id"].blank?
42
+
43
+ hash[block["tool_use_id"]] << block
44
+ end
45
+
46
+ response_blocks.filter_map do |block|
47
+ case block["type"]
48
+ when "server_tool_use"
49
+ # Anthropic stores the tool invocation in one block and the result in a
50
+ # separate block keyed by `tool_use_id`.
51
+ build_provider_managed_server_tool_call(block, result_blocks_by_tool_use_id)
52
+ when "web_search_call", "web_search_preview",
53
+ "code_interpreter_call", "code_interpreter",
54
+ "image_generation_call", "image_generation"
55
+ # OpenAI Responses persists provider-managed calls as top-level typed
56
+ # blocks like `web_search_call`, `code_interpreter`, etc.
57
+ build_provider_managed_tool_call_from_type(block)
58
+ end
59
+ end
60
+ end
61
+
62
+ def build_provider_managed_server_tool_call(block, result_blocks_by_tool_use_id)
63
+ tool_name = normalize_provider_managed_tool_name(block["name"])
64
+ return unless provider_managed_tool_available?(tool_name)
65
+
66
+ raw_result = result_blocks_by_tool_use_id[block["id"]].presence
67
+ {
68
+ "tool_name" => tool_name,
69
+ "provider_tool_call_id" => block["id"],
70
+ "status" => block["status"],
71
+ "arguments" => block["input"].presence,
72
+ "sources" => extract_provider_managed_sources(raw_result),
73
+ "raw_result" => raw_result,
74
+ "inferred" => false
75
+ }
76
+ end
77
+
78
+ def build_provider_managed_tool_call_from_type(block)
79
+ tool_name = normalize_provider_managed_tool_name(block["type"])
80
+ return unless provider_managed_tool_available?(tool_name)
81
+
82
+ payload = block.except("id", "type", "status").presence
83
+ {
84
+ "tool_name" => tool_name,
85
+ "provider_tool_call_id" => block["id"],
86
+ "status" => block["status"],
87
+ "arguments" => payload,
88
+ "sources" => [],
89
+ "raw_result" => payload,
90
+ "inferred" => false
91
+ }
92
+ end
93
+
94
+ def inferred_provider_managed_tool_calls
95
+ # Google currently gives us citations for provider-managed web search, but
96
+ # not a first-class tool call block in `response_array`, so we infer a
97
+ # single search invocation when web search was available and citations exist.
98
+ return [] unless provider_managed_tool_available?("web_search") && citations.present?
99
+
100
+ [{
101
+ "tool_name" => "web_search",
102
+ "provider_tool_call_id" => nil,
103
+ "status" => "completed",
104
+ "arguments" => nil,
105
+ "sources" => merge_provider_managed_sources([], citations),
106
+ "raw_result" => nil,
107
+ "inferred" => true
108
+ }]
109
+ end
110
+
111
+ def extract_provider_managed_sources(result_blocks)
112
+ Array(result_blocks).flat_map do |result_block|
113
+ Array(result_block["content"]).filter_map do |content_block|
114
+ next unless content_block.is_a?(Hash) && content_block["type"] == "web_search_result"
115
+
116
+ {
117
+ "title" => content_block["title"],
118
+ "url" => normalize_provider_managed_source_url(content_block["url"]),
119
+ "page_age" => content_block["page_age"]
120
+ }.compact
121
+ end
122
+ end.uniq { |source| source["url"].presence || source["title"] }
123
+ end
124
+
125
+ def merge_provider_managed_sources(existing_sources, extra_sources)
126
+ (Array(existing_sources) + Array(extra_sources)).filter_map do |source|
127
+ next unless source.is_a?(Hash)
128
+
129
+ {
130
+ "title" => source["title"],
131
+ "url" => normalize_provider_managed_source_url(source["url"]),
132
+ "page_age" => source["page_age"]
133
+ }.compact.presence
134
+ end.uniq { |source| source["url"].presence || source["title"] }
135
+ end
136
+
137
+ def normalize_provider_managed_tool_name(name)
138
+ case name.to_s
139
+ when "web_search", "web_search_call", "web_search_preview"
140
+ "web_search"
141
+ when "code_execution", "code_interpreter", "code_interpreter_call"
142
+ "code_execution"
143
+ when "image_generation", "image_generation_call"
144
+ "image_generation"
145
+ end
146
+ end
147
+
148
+ def provider_managed_tool_available?(tool_name)
149
+ return false if tool_name.blank?
150
+
151
+ available_model_tools_map[tool_name]&.provider_managed?
152
+ end
153
+
154
+ def normalize_provider_managed_source_url(url)
155
+ return if url.blank?
156
+
157
+ url = Raif::Utils::HtmlFragmentProcessor.strip_tracking_parameters(url)
158
+ return unless url.match?(%r{\Ahttps?://}i)
159
+
160
+ url
161
+ end
162
+ end
@@ -29,10 +29,13 @@
29
29
  # index_raif_conversations_on_source (source_type,source_id)
30
30
  #
31
31
  class Raif::Conversation < Raif::ApplicationRecord
32
+ prepend Raif::Concerns::HasPromptTemplates
33
+
32
34
  include Raif::Concerns::HasLlm
33
35
  include Raif::Concerns::HasRequestedLanguage
34
36
  include Raif::Concerns::HasAvailableModelTools
35
37
  include Raif::Concerns::LlmResponseParsing
38
+ include Raif::Concerns::LlmPromptCaching
36
39
 
37
40
  belongs_to :creator, polymorphic: true
38
41
  belongs_to :source, polymorphic: true, optional: true
@@ -103,6 +106,8 @@ class Raif::Conversation < Raif::ApplicationRecord
103
106
  response_format: response_format.to_sym,
104
107
  system_prompt: system_prompt,
105
108
  available_model_tools: available_model_tools,
109
+ anthropic_prompt_caching_enabled: self.class.anthropic_prompt_caching_enabled,
110
+ bedrock_prompt_caching_enabled: self.class.bedrock_prompt_caching_enabled,
106
111
  &block
107
112
  )
108
113
 
@@ -153,15 +158,18 @@ class Raif::Conversation < Raif::ApplicationRecord
153
158
  tool_invocations = entry.raif_model_tool_invocations.to_a
154
159
 
155
160
  if tool_invocations.any?
156
- # First tool call includes the assistant's message (if any)
161
+ # First tool call includes the assistant's message (if any).
162
+ # For the result payload we send the model-facing observation when the tool
163
+ # opts into observations, while keeping the raw invocation.result persisted
164
+ # for admin/UI rendering.
157
165
  first_invocation = tool_invocations.shift
158
166
  messages << first_invocation.as_tool_call_message(assistant_message: entry.model_response_message.presence)
159
- messages << first_invocation.as_tool_call_result_message
167
+ messages << first_invocation.as_tool_call_result_message(result: tool_result_for_llm(first_invocation))
160
168
 
161
169
  # Remaining tool calls (if multiple)
162
170
  tool_invocations.each do |tool_invocation|
163
171
  messages << tool_invocation.as_tool_call_message
164
- messages << tool_invocation.as_tool_call_result_message
172
+ messages << tool_invocation.as_tool_call_result_message(result: tool_result_for_llm(tool_invocation))
165
173
  end
166
174
  elsif entry.model_response_message.present?
167
175
  # No tool calls, just a regular assistant response
@@ -176,4 +184,17 @@ class Raif::Conversation < Raif::ApplicationRecord
176
184
  available_user_tools.map(&:constantize)
177
185
  end
178
186
 
187
+ private
188
+
189
+ def tool_result_for_llm(tool_invocation)
190
+ # Some tools persist a compact structured result for display/admin purposes but
191
+ # need to send richer text/XML back to the model for the continuation turn.
192
+ return tool_invocation.result unless tool_invocation.triggers_observation_to_model?
193
+
194
+ tool = tool_invocation.tool
195
+ return tool_invocation.result unless tool.respond_to?(:observation_for_invocation)
196
+
197
+ tool.observation_for_invocation(tool_invocation).presence || tool_invocation.result
198
+ end
199
+
179
200
  end
@@ -59,8 +59,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
59
59
  def add_user_tool_invocation_to_user_message
60
60
  return unless raif_user_tool_invocation.present?
61
61
 
62
- separator = response_format == "html" ? "<br>" : "\n\n"
63
- self.user_message = [user_message, raif_user_tool_invocation.as_user_message].join(separator)
62
+ self.user_message = [user_message, raif_user_tool_invocation.as_user_message].join("\n\n")
64
63
  end
65
64
 
66
65
  def response_format
@@ -74,7 +73,7 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
74
73
  def process_entry!
75
74
  self.model_response_message = ""
76
75
 
77
- self.raif_model_completion = raif_conversation.prompt_model_for_entry_response(entry: self) do |model_completion, _delta, _sse_event|
76
+ model_completion = raif_conversation.prompt_model_for_entry_response(entry: self) do |model_completion, _delta, _sse_event|
78
77
  self.raw_response = model_completion.raw_response
79
78
  self.model_response_message = raif_conversation.process_model_response_message(
80
79
  message: model_completion.parsed_response(force_reparse: true),
@@ -90,6 +89,10 @@ class Raif::ConversationEntry < Raif::ApplicationRecord
90
89
  broadcast_replace_to raif_conversation
91
90
  end
92
91
 
92
+ # Failed prompt attempts can still persist a model completion for debugging.
93
+ # Avoid clearing the has_one association with nil, which would delete that row.
94
+ self.raif_model_completion = model_completion if model_completion.present?
95
+
93
96
  if raif_model_completion.present? && (raif_model_completion.parsed_response.present? || raif_model_completion.response_tool_calls.present?)
94
97
  extract_message_and_invoke_tools!
95
98
  create_entry_for_observation! if triggers_observation_to_model?
@@ -29,6 +29,15 @@ private
29
29
  end
30
30
 
31
31
  def bedrock_client
32
- @bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
32
+ @bedrock_client ||= begin
33
+ client_options = {
34
+ region: Raif.config.aws_bedrock_region
35
+ }
36
+
37
+ client_options[:http_read_timeout] = Raif.config.request_read_timeout if Raif.config.request_read_timeout
38
+ client_options[:http_open_timeout] = Raif.config.request_open_timeout if Raif.config.request_open_timeout
39
+
40
+ Aws::BedrockRuntime::Client.new(client_options)
41
+ end
33
42
  end
34
43
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Raif::EmbeddingModels::Google < Raif::EmbeddingModel
4
+ def generate_embedding!(input, dimensions: nil)
5
+ unless input.is_a?(String)
6
+ raise ArgumentError, "Raif::EmbeddingModels::Google#generate_embedding! input must be a string"
7
+ end
8
+
9
+ response = connection.post("models/#{api_name}:embedContent") do |req|
10
+ req.body = build_request_parameters(input, dimensions:)
11
+ end
12
+
13
+ response.body.dig("embedding", "values")
14
+ end
15
+
16
+ private
17
+
18
+ def build_request_parameters(input, dimensions: nil)
19
+ params = {
20
+ content: {
21
+ parts: [{ text: input }]
22
+ }
23
+ }
24
+
25
+ params[:outputDimensionality] = dimensions if dimensions.present?
26
+ params
27
+ end
28
+
29
+ def connection
30
+ @connection ||= Faraday.new(url: "https://generativelanguage.googleapis.com/v1beta", request: Raif.default_request_options) do |f|
31
+ f.headers["x-goog-api-key"] = Raif.config.google_api_key
32
+ f.request :json
33
+ f.response :json
34
+ f.response :raise_error
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ # == Schema Information
4
+ #
5
+ # Table name: raif_tasks
6
+ #
7
+ # id :bigint not null, primary key
8
+ # available_model_tools :jsonb not null
9
+ # completed_at :datetime
10
+ # creator_type :string
11
+ # failed_at :datetime
12
+ # llm_model_key :string not null
13
+ # prompt :text
14
+ # prompt_studio_run :boolean default(FALSE), not null
15
+ # raw_response :text
16
+ # requested_language_key :string
17
+ # response_format :integer default("text"), not null
18
+ # run_with :jsonb
19
+ # source_type :string
20
+ # started_at :datetime
21
+ # system_prompt :text
22
+ # type :string not null
23
+ # created_at :datetime not null
24
+ # updated_at :datetime not null
25
+ # creator_id :bigint
26
+ # source_id :bigint
27
+ #
28
+ # Indexes
29
+ #
30
+ # index_raif_tasks_on_completed_at (completed_at)
31
+ # index_raif_tasks_on_created_at (created_at)
32
+ # index_raif_tasks_on_creator (creator_type,creator_id)
33
+ # index_raif_tasks_on_failed_at (failed_at)
34
+ # index_raif_tasks_on_source (source_type,source_id)
35
+ # index_raif_tasks_on_started_at (started_at)
36
+ # index_raif_tasks_on_type (type)
37
+ # index_raif_tasks_on_type_and_completed_at (type,completed_at)
38
+ # index_raif_tasks_on_type_and_failed_at (type,failed_at)
39
+ # index_raif_tasks_on_type_and_started_at (type,started_at)
40
+ #
41
+ module Raif
42
+ module Evals
43
+ class LlmJudge < Raif::Task
44
+ # Set default temperature for consistent judging
45
+ llm_temperature 0.0
46
+
47
+ # Default to JSON response format for structured output
48
+ llm_response_format :json
49
+
50
+ run_with :content_to_judge # the content to judge
51
+ run_with :additional_context # additional context to be provided to the judge
52
+
53
+ def default_llm_model_key
54
+ Raif.config.evals_default_llm_judge_model_key || super
55
+ end
56
+
57
+ def judgment_reasoning
58
+ parsed_response["reasoning"] if completed?
59
+ end
60
+
61
+ def judgment_confidence
62
+ parsed_response["confidence"] if completed?
63
+ end
64
+
65
+ def low_confidence?
66
+ judgment_confidence && judgment_confidence < 0.5
67
+ end
68
+ end
69
+ end
70
+ end