raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/app/assets/builds/raif_admin.css +40 -2
  4. data/app/assets/builds/raif_admin_sprockets.js +2709 -0
  5. data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
  6. data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
  7. data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
  8. data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
  9. data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
  10. data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
  11. data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
  12. data/app/assets/javascript/raif_admin.js +23 -0
  13. data/app/assets/javascript/raif_admin_sprockets.js +24 -0
  14. data/app/assets/stylesheets/raif_admin.scss +50 -1
  15. data/app/controllers/raif/admin/agents_controller.rb +27 -1
  16. data/app/controllers/raif/admin/configs_controller.rb +1 -0
  17. data/app/controllers/raif/admin/llms_controller.rb +27 -0
  18. data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
  19. data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
  20. data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
  21. data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
  22. data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
  23. data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
  24. data/app/controllers/raif/admin/tasks_controller.rb +5 -0
  25. data/app/helpers/raif/application_helper.rb +40 -0
  26. data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
  27. data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
  28. data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
  29. data/app/models/raif/agent.rb +36 -5
  30. data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
  31. data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
  32. data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
  33. data/app/models/raif/concerns/json_schema_definition.rb +16 -3
  34. data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
  35. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
  36. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
  37. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
  38. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
  39. data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
  40. data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
  41. data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
  42. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
  43. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
  44. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
  45. data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
  46. data/app/models/raif/conversation.rb +24 -3
  47. data/app/models/raif/conversation_entry.rb +6 -3
  48. data/app/models/raif/embedding_models/bedrock.rb +10 -1
  49. data/app/models/raif/embedding_models/google.rb +37 -0
  50. data/app/models/raif/evals/llm_judge.rb +70 -0
  51. data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
  52. data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
  53. data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
  54. data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
  55. data/app/models/raif/llm.rb +82 -7
  56. data/app/models/raif/llms/anthropic.rb +26 -4
  57. data/app/models/raif/llms/bedrock.rb +59 -5
  58. data/app/models/raif/llms/google.rb +28 -2
  59. data/app/models/raif/llms/open_ai_base.rb +4 -0
  60. data/app/models/raif/llms/open_ai_completions.rb +9 -2
  61. data/app/models/raif/llms/open_ai_responses.rb +9 -2
  62. data/app/models/raif/llms/open_router.rb +10 -3
  63. data/app/models/raif/model_completion.rb +75 -34
  64. data/app/models/raif/model_tool.rb +45 -3
  65. data/app/models/raif/model_tool_invocation.rb +31 -1
  66. data/app/models/raif/prompt_studio_batch_run.rb +155 -0
  67. data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
  68. data/app/models/raif/streaming_responses/bedrock.rb +60 -1
  69. data/app/models/raif/task.rb +30 -6
  70. data/app/views/layouts/raif/admin.html.erb +31 -1
  71. data/app/views/raif/admin/agents/_agent.html.erb +1 -0
  72. data/app/views/raif/admin/agents/index.html.erb +48 -0
  73. data/app/views/raif/admin/agents/show.html.erb +4 -0
  74. data/app/views/raif/admin/llms/index.html.erb +110 -0
  75. data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
  76. data/app/views/raif/admin/model_completions/index.html.erb +14 -1
  77. data/app/views/raif/admin/model_completions/show.html.erb +164 -55
  78. data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
  79. data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
  80. data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
  81. data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
  82. data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
  83. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
  84. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
  85. data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
  86. data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
  87. data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
  88. data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
  89. data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
  90. data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
  91. data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
  92. data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
  93. data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
  94. data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
  95. data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
  96. data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
  97. data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
  98. data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
  99. data/app/views/raif/admin/tasks/_task.html.erb +1 -0
  100. data/app/views/raif/admin/tasks/index.html.erb +17 -5
  101. data/app/views/raif/admin/tasks/show.html.erb +20 -0
  102. data/app/views/raif/conversation_entries/_message.html.erb +10 -6
  103. data/config/importmap.rb +8 -0
  104. data/config/locales/admin.en.yml +128 -0
  105. data/config/locales/en.yml +36 -2
  106. data/config/routes.rb +8 -0
  107. data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
  108. data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
  109. data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
  110. data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
  111. data/lib/generators/raif/agent/agent_generator.rb +18 -0
  112. data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
  113. data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
  114. data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
  115. data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
  116. data/lib/generators/raif/install/templates/initializer.rb +68 -27
  117. data/lib/generators/raif/task/task_generator.rb +18 -0
  118. data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
  119. data/lib/generators/raif/task/templates/task.rb.tt +9 -8
  120. data/lib/raif/configuration.rb +10 -0
  121. data/lib/raif/embedding_model_registry.rb +8 -0
  122. data/lib/raif/engine.rb +16 -1
  123. data/lib/raif/errors/blank_response_error.rb +8 -0
  124. data/lib/raif/errors/prompt_template_error.rb +15 -0
  125. data/lib/raif/errors.rb +2 -0
  126. data/lib/raif/evals.rb +0 -6
  127. data/lib/raif/llm_registry.rb +230 -9
  128. data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
  129. data/lib/raif/token_estimator.rb +28 -0
  130. data/lib/raif/version.rb +1 -1
  131. data/lib/raif.rb +2 -0
  132. data/spec/support/rspec_helpers.rb +7 -1
  133. data/spec/support/test_task.rb +9 -0
  134. data/spec/support/test_template_task.rb +41 -0
  135. metadata +65 -7
  136. data/lib/raif/evals/llm_judge.rb +0 -32
  137. /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
@@ -0,0 +1,35 @@
1
+ <div class="d-flex justify-content-between align-items-center my-4">
2
+ <h1 class="mb-0"><%= t("raif.admin.prompt_studio.common.prompt_studio") %></h1>
3
+ </div>
4
+
5
+ <%= render "raif/admin/prompt_studio/shared/nav_tabs", active_tab: :tasks %>
6
+
7
+ <% if @show_batch_runs %>
8
+ <div data-controller="raif--select-all-checkboxes raif--cost-estimate" data-raif--cost-estimate-pricing-value="<%= llm_pricing_json %>" data-action="raif--select-all-checkboxes:toggled->raif--cost-estimate#calculate">
9
+ <%= render "raif/admin/prompt_studio/tasks/task_type_filter",
10
+ task_types: @task_types,
11
+ selected_type: @selected_type,
12
+ llm_model_keys: @llm_model_keys,
13
+ show_batch_runs: true %>
14
+
15
+ <%= form_tag raif.admin_prompt_studio_batch_runs_path, method: :post, id: "batch-run-form" do %>
16
+ <%= hidden_field_tag :task_type, @selected_type %>
17
+ <%= render "raif/admin/prompt_studio/tasks/tasks_table", tasks: @tasks, pagy: @pagy, show_checkbox: true %>
18
+ <%= render "raif/admin/prompt_studio/batch_runs/modal" %>
19
+ <% end %>
20
+ </div>
21
+ <% else %>
22
+ <%= render "raif/admin/prompt_studio/tasks/task_type_filter",
23
+ task_types: @task_types,
24
+ selected_type: @selected_type,
25
+ llm_model_keys: @llm_model_keys,
26
+ show_batch_runs: false %>
27
+
28
+ <% if @selected_type.present? && @tasks.present? %>
29
+ <%= render "raif/admin/prompt_studio/tasks/tasks_table", tasks: @tasks, pagy: @pagy, show_checkbox: false %>
30
+ <% elsif @selected_type.present? %>
31
+ <div class="alert alert-info">
32
+ <%= t("raif.admin.prompt_studio.common.no_instances") %>
33
+ </div>
34
+ <% end %>
35
+ <% end %>
@@ -0,0 +1,19 @@
1
+ <% if @task.status == :in_progress %>
2
+ <%= turbo_stream_from @task %>
3
+ <% end %>
4
+
5
+ <div class="d-flex justify-content-between align-items-center my-4">
6
+ <h1 class="mb-0"><%= t("raif.admin.prompt_studio.tasks.show.page_title", id: @task.id) %></h1>
7
+ <%= link_to t("raif.admin.prompt_studio.common.back"),
8
+ raif.admin_prompt_studio_tasks_path(task_type: @task.type),
9
+ class: "btn btn-outline-secondary" %>
10
+ </div>
11
+
12
+ <% if flash[:alert].present? %>
13
+ <div class="alert alert-danger alert-dismissible fade show" role="alert">
14
+ <%= flash[:alert] %>
15
+ <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
16
+ </div>
17
+ <% end %>
18
+
19
+ <%= render "raif/admin/prompt_studio/tasks/task_result", task: @task, comparison: @comparison, original_task: @original_task %>
@@ -16,5 +16,6 @@
16
16
  <span class="badge bg-secondary"><%= t("raif.admin.common.pending") %></span>
17
17
  <% end %>
18
18
  </td>
19
+ <td><small class="text-muted"><%= task.runtime_duration %></small></td>
19
20
  <td><small class="text-muted"><%= truncate(task.prompt, length: 100) %></small></td>
20
21
  </tr>
@@ -9,15 +9,15 @@
9
9
  <div class="col-12">
10
10
  <%= form_tag raif.admin_tasks_path, method: :get, class: "mb-4" do %>
11
11
  <div class="row align-items-end">
12
- <div class="col-md-4">
12
+ <div class="col-md-3">
13
13
  <div class="form-group">
14
14
  <label for="task_types"><%= t("raif.admin.common.type") %></label>
15
15
  <%= select_tag :task_types,
16
- options_for_select([["All", "all"]] + @task_types.map{|type| [type, type] }, @selected_type),
17
- { class: "form-select" } %>
16
+ options_for_select([[t("raif.admin.common.all"), "all"]] + @task_types.map{|type| [type, type] }, @selected_type),
17
+ { class: "form-select", data: { controller: "raif--tom-select" } } %>
18
18
  </div>
19
19
  </div>
20
- <div class="col-md-4">
20
+ <div class="col-md-3">
21
21
  <div class="form-group">
22
22
  <label for="task_statuses"><%= t("raif.admin.common.status") %></label>
23
23
  <%= select_tag :task_statuses,
@@ -31,7 +31,18 @@
31
31
  ],
32
32
  @selected_statuses
33
33
  ),
34
- { class: "form-select" } %>
34
+ { class: "form-select", data: { controller: "raif--tom-select" } } %>
35
+ </div>
36
+ </div>
37
+ <div class="col-md-3">
38
+ <div class="form-group">
39
+ <label for="llm_model_key"><%= t("raif.admin.common.model") %></label>
40
+ <%= select_tag :llm_model_key,
41
+ options_for_select(
42
+ [[t("raif.admin.common.all"), ""]] + @llm_model_keys.map { |key| [key, key] },
43
+ @selected_llm_model_key
44
+ ),
45
+ { class: "form-select", data: { controller: "raif--tom-select" } } %>
35
46
  </div>
36
47
  </div>
37
48
  <div class="col-md-2">
@@ -51,6 +62,7 @@
51
62
  <th><%= t("raif.admin.common.creator") %></th>
52
63
  <th><%= t("raif.admin.common.model") %></th>
53
64
  <th><%= t("raif.admin.common.status") %></th>
65
+ <th><%= t("raif.admin.common.duration") %></th>
54
66
  <th><%= t("raif.admin.common.prompt") %></th>
55
67
  </tr>
56
68
  </thead>
@@ -68,12 +68,24 @@
68
68
  <% end %>
69
69
  </div>
70
70
  </div>
71
+ <div class="row mb-3">
72
+ <div class="col-md-3"><strong><%= t("raif.admin.common.duration") %>:</strong></div>
73
+ <div class="col-md-9"><%= @task.runtime_duration %></div>
74
+ </div>
71
75
  <% if @task.requested_language_key.present? %>
72
76
  <div class="row mb-3">
73
77
  <div class="col-md-3"><strong><%= t("raif.admin.common.requested_language") %>:</strong></div>
74
78
  <div class="col-md-9"><%= @task.requested_language_key %></div>
75
79
  </div>
76
80
  <% end %>
81
+ <% if @task.run_with.present? %>
82
+ <div class="row mb-3">
83
+ <div class="col-md-3"><strong><%= t("raif.admin.common.run_with") %>:</strong></div>
84
+ <div class="col-md-9">
85
+ <pre class="pre-wrap mb-0"><%= JSON.pretty_generate(@task.run_with) %></pre>
86
+ </div>
87
+ </div>
88
+ <% end %>
77
89
  </div>
78
90
  </div>
79
91
 
@@ -136,6 +148,14 @@
136
148
  <% end %>
137
149
  </div>
138
150
  </div>
151
+ <div class="row mb-3">
152
+ <div class="col-md-3"><strong><%= t("raif.admin.common.cache_read_input_tokens") %>:</strong></div>
153
+ <div class="col-md-9"><%= @task.raif_model_completion.cache_read_input_tokens ? number_with_delimiter(@task.raif_model_completion.cache_read_input_tokens) : "-" %></div>
154
+ </div>
155
+ <div class="row mb-3">
156
+ <div class="col-md-3"><strong><%= t("raif.admin.common.cache_creation_input_tokens") %>:</strong></div>
157
+ <div class="col-md-9"><%= @task.raif_model_completion.cache_creation_input_tokens ? number_with_delimiter(@task.raif_model_completion.cache_creation_input_tokens) : "-" %></div>
158
+ </div>
139
159
  </div>
140
160
  </div>
141
161
  <% end %>
@@ -6,13 +6,17 @@
6
6
 
7
7
  <% if content.present? %>
8
8
  <div class="mb-1 rounded-2 p-3 <%= message_type == :user ? "bg-primary text-white" : "border" %>">
9
- <% case local_assigns[:conversation_entry]&.response_format %>
10
- <% when "text" %>
11
- <%= simple_format content %>
12
- <% when "html" %>
13
- <%= sanitize content %>
9
+ <% if message_type == :user %>
10
+ <%= simple_format h(content), {}, sanitize: false %>
14
11
  <% else %>
15
- <%= content %>
12
+ <% case local_assigns[:conversation_entry]&.response_format %>
13
+ <% when "text" %>
14
+ <%= simple_format content %>
15
+ <% when "html" %>
16
+ <%= sanitize content %>
17
+ <% else %>
18
+ <%= content %>
19
+ <% end %>
16
20
  <% end %>
17
21
 
18
22
  <% if message_type == :model_response && local_assigns[:conversation_entry]&.citations.present? %>
data/config/importmap.rb CHANGED
@@ -1,6 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  pin "raif", to: "raif.js"
4
+ pin "raif_admin", to: "raif_admin.js"
4
5
 
5
6
  pin "raif/controllers/conversations_controller", to: "raif/controllers/conversations_controller.js"
7
+ pin "raif/admin/judge_config_controller", to: "raif/admin/judge_config_controller.js"
8
+ pin "raif/admin/select_all_checkboxes_controller", to: "raif/admin/select_all_checkboxes_controller.js"
9
+ pin "raif/admin/cost_estimate_controller", to: "raif/admin/cost_estimate_controller.js"
10
+ pin "raif/admin/tom_select_controller", to: "raif/admin/tom_select_controller.js"
11
+ pin "raif/admin/table_search_controller", to: "raif/admin/table_search_controller.js"
12
+ pin "raif/admin/sortable_table_controller", to: "raif/admin/sortable_table_controller.js"
13
+ pin "raif/admin/copy_to_clipboard_controller", to: "raif/admin/copy_to_clipboard_controller.js"
6
14
  pin "raif/stream_actions/raif_scroll_to_bottom", to: "raif/stream_actions/raif_scroll_to_bottom.js"
@@ -13,6 +13,8 @@ en:
13
13
  arguments_schema: Arguments Schema
14
14
  at: at
15
15
  available_tools: Available Tools
16
+ cache_creation_input_tokens: Cache Creation Tokens
17
+ cache_read_input_tokens: Cache Read Tokens
16
18
  citations: Citations
17
19
  completed: Completed
18
20
  completed_at: Completed At
@@ -24,8 +26,10 @@ en:
24
26
  count: Count
25
27
  created_at: Created At
26
28
  creator: Creator
29
+ default: default
27
30
  description: Description
28
31
  details: Details
32
+ duration: Duration
29
33
  entries_count: Entries Count
30
34
  entry: Entry
31
35
  est_cost: est. cost
@@ -38,12 +42,14 @@ en:
38
42
  final_answer: Final Answer
39
43
  id: ID
40
44
  in_progress: In Progress
45
+ inferred_from_citations: Inferred from response citations.
41
46
  initial_task: Initial Task
42
47
  input: Input
43
48
  input_token_cost: Input Token Cost
44
49
  invalid_json: Invalid JSON
45
50
  iterations: Iterations
46
51
  language: Language
52
+ llms: LLMs
47
53
  messages: Messages
48
54
  model: Model
49
55
  model_completion: Model Completion
@@ -68,16 +74,21 @@ en:
68
74
  prettified: Prettified
69
75
  prompt: Prompt
70
76
  prompt_tokens: Prompt Tokens
77
+ provider_managed_tool_calls: Provider-Managed Tool Calls
78
+ provider_tool_call_id: Provider Tool Call ID
71
79
  raw: Raw
72
80
  rendered: Rendered
73
81
  requested_language: Requested Language
74
82
  response: Response
83
+ response_array: Response Array
75
84
  response_format: Response Format
76
85
  result: Result
77
86
  retry_count: Retry Count
87
+ run_with: Run With
78
88
  running: Running
79
89
  since: Since
80
90
  source: Source
91
+ sources: Sources
81
92
  started_at: Started At
82
93
  stats: Stats
83
94
  status: Status
@@ -117,6 +128,18 @@ en:
117
128
  layouts:
118
129
  admin:
119
130
  title: Raif Admin
131
+ llms:
132
+ index:
133
+ api_name: API Name
134
+ cost_note: Costs shown are per 1,000,000 tokens.
135
+ default_badge: default
136
+ input_cost: Input Cost (per 1M tokens)
137
+ name: Name
138
+ no_llms: No LLMs are currently registered.
139
+ output_cost: Output Cost (per 1M tokens)
140
+ provider: Provider
141
+ search_placeholder: Search by name, provider, or API name...
142
+ title: Registered LLMs
120
143
  model_completions:
121
144
  show:
122
145
  back_to_model_completions: Back to Model Completions
@@ -124,7 +147,112 @@ en:
124
147
  model_tool_invocations:
125
148
  show:
126
149
  back_to_model_tool_invocations: Back to Model Tool Invocations
150
+ observation_disclaimer: This observation is regenerated from current data and code. It may differ from what was originally sent to the model.
151
+ observation_sent_to_model: Observation Sent to Model
152
+ observation_unavailable: 'Could not generate observation: %{error}'
127
153
  title: 'Model Tool Invocation #%{id}'
154
+ prompt_studio:
155
+ agents:
156
+ show:
157
+ agent_details: Agent Details
158
+ page_title: 'Prompt Studio - Agent #%{id}'
159
+ batch_runs:
160
+ create:
161
+ confirm: This will create a batch run and send requests to the selected LLM. Continue?
162
+ error: 'Batch run failed: %{message}'
163
+ new_batch_run: New Batch Run
164
+ no_tasks_selected: Please select at least one task.
165
+ submit: Run Selected Batch
166
+ tasks_selected: tasks selected
167
+ title: Batch Run
168
+ judge:
169
+ new_response: New Response
170
+ original_response: Original Response
171
+ tie: Tie
172
+ winner: 'Winner: %{name}'
173
+ judge_config:
174
+ comparison_criteria_label: Comparison Criteria
175
+ comparison_criteria_placeholder: Which response better addresses the prompt
176
+ criteria_label: Criteria
177
+ criteria_placeholder: Response is accurate and complete
178
+ description: Optionally evaluate each result with an LLM judge.
179
+ include_original_prompt_as_context_description: Provides the original task prompt to the judge as additional context for evaluation.
180
+ include_original_prompt_as_context_label: Include original prompt as context
181
+ judge_model_label: Judge Model
182
+ rubric_accuracy: Accuracy
183
+ rubric_clarity: Clarity
184
+ rubric_helpfulness: Helpfulness
185
+ rubric_label: Scoring Rubric
186
+ strict_mode_label: Strict Mode
187
+ summarization_description: Uses the original task prompt as the source content and the new response as the summary to evaluate.
188
+ title: LLM Judge Configuration
189
+ type_binary: Binary (Pass/Fail)
190
+ type_comparative: Comparative (A vs B)
191
+ type_label: Judge Type
192
+ type_none: None
193
+ type_scored: Scored (Rubric)
194
+ type_summarization: Summarization
195
+ show:
196
+ average_score: Average Score
197
+ judge_reasoning: Judge Reasoning
198
+ judge_result: Judge Result
199
+ new_response: New Response
200
+ original_response: Original Response
201
+ page_title: 'Batch Run #%{id}'
202
+ pass_rate: Pass Rate
203
+ progress: Progress
204
+ results: Results
205
+ source_task: Source Task
206
+ summary: Summary
207
+ status:
208
+ completed: Completed
209
+ failed: Failed
210
+ in_progress: In Progress
211
+ judging: Judging
212
+ pending: Pending
213
+ running: Running
214
+ common:
215
+ back: Back
216
+ created_after: Created After
217
+ created_before: Created Before
218
+ current_prompt: Current Prompt
219
+ current_system_prompt: Current System Prompt
220
+ estimated_cost: "~%{cost} (estimate)"
221
+ estimated_tokens: "~%{count} tokens (estimate)"
222
+ no_instances: No instances found.
223
+ original_prompt: Original Prompt
224
+ original_system_prompt: Original System Prompt
225
+ prompt_changed: Prompt Changed
226
+ prompt_studio: Prompt Studio
227
+ prompt_tokens: "%{count} tokens"
228
+ prompt_unchanged: Prompt Unchanged
229
+ run_in_prompt_studio: Run in Prompt Studio
230
+ runs_disabled: Prompt Studio runs are not enabled in this environment.
231
+ select_type: Select a type to browse instances
232
+ system_prompt_changed: System Prompt Changed
233
+ system_prompt_unchanged: System Prompt Unchanged
234
+ type: Type
235
+ warning_stale_reference: 'Warning: Some run_with references could not be loaded. The current prompt may differ from what would be generated with the original data.'
236
+ conversations:
237
+ show:
238
+ conversation_details: Conversation Details
239
+ page_title: 'Prompt Studio - Conversation #%{id}'
240
+ tasks:
241
+ rerun:
242
+ confirm: This will create a new task and send a request to the selected LLM. Continue?
243
+ description: Re-run this task with the current prompt templates and a selected model. This will create a new task record.
244
+ error: 'Re-run failed: %{message}'
245
+ invalid_model: Please select a valid model.
246
+ running: Running...
247
+ submit: Re-run Task
248
+ title: Re-run with Current Prompts
249
+ show:
250
+ new_response: New Response
251
+ original_response: Original Response
252
+ page_title: 'Prompt Studio - Task #%{id}'
253
+ response_comparison: Response Comparison
254
+ task_details: Task Details
255
+ view_response: View Response
128
256
  stats:
129
257
  model_tool_invocations:
130
258
  back_to_stats: Back to Stats
@@ -25,6 +25,7 @@ en:
25
25
  title: Past Conversations
26
26
  embedding_model_names:
27
27
  bedrock_titan_embed_text_v2: AWS Bedrock Titan Text Embeddings v2
28
+ google_gemini_embedding_2: Google Gemini Embedding 2
28
29
  open_ai_text_embedding_3_large: OpenAI Text Embedding 3 Large
29
30
  open_ai_text_embedding_3_small: OpenAI Text Embedding 3 Small
30
31
  open_ai_text_embedding_ada_002: OpenAI Text Embedding Ada 002
@@ -58,7 +59,11 @@ en:
58
59
  anthropic_claude_3_opus: Anthropic Claude 3 Opus
59
60
  anthropic_claude_4_1_opus: Anthropic Claude 4.1 Opus
60
61
  anthropic_claude_4_5_haiku: Anthropic Claude 4.5 Haiku
62
+ anthropic_claude_4_5_opus: Anthropic Claude 4.5 Opus
61
63
  anthropic_claude_4_5_sonnet: Anthropic Claude 4.5 Sonnet
64
+ anthropic_claude_4_6_opus: Anthropic Claude 4.6 Opus
65
+ anthropic_claude_4_6_sonnet: Anthropic Claude 4.6 Sonnet
66
+ anthropic_claude_4_7_opus: Anthropic Claude 4.7 Opus
62
67
  anthropic_claude_4_opus: Anthropic Claude 4 Opus
63
68
  anthropic_claude_4_sonnet: Anthropic Claude 4 Sonnet
64
69
  bedrock_amazon_nova_lite: Amazon Nova Lite (via AWS Bedrock)
@@ -70,13 +75,23 @@ en:
70
75
  bedrock_claude_3_opus: Anthropic Claude 3 Opus (via AWS Bedrock)
71
76
  bedrock_claude_4_1_opus: Claude 4.1 Opus (via AWS Bedrock)
72
77
  bedrock_claude_4_5_haiku: Anthropic Claude 4.5 Haiku (via AWS Bedrock)
78
+ bedrock_claude_4_5_opus: Anthropic Claude 4.5 Opus (via AWS Bedrock)
73
79
  bedrock_claude_4_5_sonnet: Anthropic Claude 4.5 Sonnet (via AWS Bedrock)
80
+ bedrock_claude_4_6_opus: Anthropic Claude 4.6 Opus (via AWS Bedrock)
81
+ bedrock_claude_4_6_sonnet: Anthropic Claude 4.6 Sonnet (via AWS Bedrock)
82
+ bedrock_claude_4_7_opus: Anthropic Claude 4.7 Opus (via AWS Bedrock)
74
83
  bedrock_claude_4_opus: Anthropic Claude 4 Opus (via AWS Bedrock)
75
84
  bedrock_claude_4_sonnet: Anthropic Claude 4 Sonnet (via AWS Bedrock)
85
+ bedrock_deepseek_r1: DeepSeek R1 (via AWS Bedrock)
86
+ bedrock_deepseek_v3_2: DeepSeek v3.2 (via AWS Bedrock)
87
+ bedrock_gpt_oss_120b: OpenAI GPT-OSS 120B (via AWS Bedrock)
88
+ bedrock_gpt_oss_20b: OpenAI GPT-OSS 20B (via AWS Bedrock)
76
89
  google_gemini_2_5_flash: Google Gemini 2.5 Flash
77
90
  google_gemini_2_5_pro: Google Gemini 2.5 Pro
78
- google_gemini_3_0_flash: Google Gemini 3.0 Flash
79
- google_gemini_3_0_pro: Google Gemini 3.0 Pro
91
+ google_gemini_3_0_flash: Google Gemini 3 Flash
92
+ google_gemini_3_0_pro: Google Gemini 3 Pro
93
+ google_gemini_3_1_flash_lite: Google Gemini 3.1 Flash-Lite
94
+ google_gemini_3_1_pro: Google Gemini 3.1 Pro
80
95
  open_ai_gpt_3_5_turbo: OpenAI GPT-3.5 Turbo
81
96
  open_ai_gpt_4_1: OpenAI GPT-4.1
82
97
  open_ai_gpt_4_1_mini: OpenAI GPT-4.1 Mini
@@ -86,6 +101,10 @@ en:
86
101
  open_ai_gpt_5: OpenAI GPT-5
87
102
  open_ai_gpt_5_1: OpenAI GPT-5.1
88
103
  open_ai_gpt_5_2: OpenAI GPT-5.2
104
+ open_ai_gpt_5_3: OpenAI GPT-5.3
105
+ open_ai_gpt_5_4: OpenAI GPT-5.4
106
+ open_ai_gpt_5_4_mini: OpenAI GPT-5.4 Mini
107
+ open_ai_gpt_5_4_nano: OpenAI GPT-5.4 Nano
89
108
  open_ai_gpt_5_mini: OpenAI GPT-5 Mini
90
109
  open_ai_gpt_5_nano: OpenAI GPT-5 Nano
91
110
  open_ai_o1: OpenAI o1
@@ -102,8 +121,15 @@ en:
102
121
  open_ai_responses_gpt_5: OpenAI GPT-5 (Responses API)
103
122
  open_ai_responses_gpt_5_1: OpenAI GPT-5.1 (Responses API)
104
123
  open_ai_responses_gpt_5_2: OpenAI GPT-5.2 (Responses API)
124
+ open_ai_responses_gpt_5_2_pro: OpenAI GPT-5.2 Pro (Responses API)
125
+ open_ai_responses_gpt_5_3: OpenAI GPT-5.3 (Responses API)
126
+ open_ai_responses_gpt_5_4: OpenAI GPT-5.4 (Responses API)
127
+ open_ai_responses_gpt_5_4_mini: OpenAI GPT-5.4 Mini (Responses API)
128
+ open_ai_responses_gpt_5_4_nano: OpenAI GPT-5.4 Nano (Responses API)
129
+ open_ai_responses_gpt_5_4_pro: OpenAI GPT-5.4 Pro (Responses API)
105
130
  open_ai_responses_gpt_5_mini: OpenAI GPT-5 Mini (Responses API)
106
131
  open_ai_responses_gpt_5_nano: OpenAI GPT-5 Nano (Responses API)
132
+ open_ai_responses_gpt_5_pro: OpenAI GPT-5 Pro (Responses API)
107
133
  open_ai_responses_o1: OpenAI o1 (Responses API)
108
134
  open_ai_responses_o1_mini: OpenAI o1 Mini (Responses API)
109
135
  open_ai_responses_o1_pro: OpenAI o1 Pro (Responses API)
@@ -114,18 +140,26 @@ en:
114
140
  open_router_claude_3_7_sonnet: Anthropic Claude 3.7 Sonnet (via OpenRouter)
115
141
  open_router_deepseek_chat_v3: DeepSeek Chat v3 (via OpenRouter)
116
142
  open_router_deepseek_v3_1: DeepSeek v3.1 (via OpenRouter)
143
+ open_router_deepseek_v3_2: DeepSeek v3.2 (via OpenRouter)
117
144
  open_router_gemini_2_0_flash: Google Gemini 2.0 Flash (via OpenRouter)
118
145
  open_router_gemini_2_5_flash: Gemini 2.5 Flash (via OpenRouter)
119
146
  open_router_gemini_2_5_pro: Gemini 2.5 Pro (via OpenRouter)
147
+ open_router_gemini_3_1_flash_lite_preview: Gemini 3.1 Flash-Lite Preview (via OpenRouter)
148
+ open_router_gemini_3_1_pro_preview: Gemini 3.1 Pro Preview (via OpenRouter)
120
149
  open_router_gemini_3_pro_preview: Gemini 3 Pro Preview (via OpenRouter)
150
+ open_router_google_gemma_4_31b_it: Google Gemma 4 31B IT (via OpenRouter)
121
151
  open_router_grok_4: Grok 4 (via OpenRouter)
122
152
  open_router_grok_4_1_fast: Grok 4.1 Fast (via OpenRouter)
153
+ open_router_grok_4_20: Grok 4.20 (via OpenRouter)
154
+ open_router_kimi_k2_5: Kimi K2.5 (via OpenRouter)
123
155
  open_router_kimi_k2_thinking: Kimi K2 Thinking (via OpenRouter)
124
156
  open_router_llama_3_1_8b_instruct: Meta Llama 3.1 8B Instruct (via OpenRouter)
125
157
  open_router_llama_3_3_70b_instruct: Meta Llama 3.3 70B Instruct (via OpenRouter)
126
158
  open_router_llama_4_maverick: Meta Llama 4 Maverick (via OpenRouter)
127
159
  open_router_llama_4_scout: Meta Llama 4 Scout (via OpenRouter)
128
160
  open_router_minimax_m2: Minimax M2 (via OpenRouter)
161
+ open_router_minimax_m2_1: Minimax M2.1 (via OpenRouter)
162
+ open_router_minimax_m2_5: Minimax M2.5 (via OpenRouter)
129
163
  open_router_mistral_large_3_2512: Mistral Large 3 (via OpenRouter)
130
164
  open_router_mistral_small_3_2_24b: Mistral Small 3.2 24B (via OpenRouter)
131
165
  open_router_open_ai_gpt_oss_120b: OpenAI GPT-OSS 120B (via OpenRouter)
data/config/routes.rb CHANGED
@@ -25,6 +25,14 @@ Raif::Engine.routes.draw do
25
25
  resources :model_completions, only: [:index, :show]
26
26
  resources :agents, only: [:index, :show]
27
27
  resources :model_tool_invocations, only: [:index, :show]
28
+ resources :llms, only: [:index]
28
29
  resource :config, only: [:show]
30
+
31
+ namespace :prompt_studio do
32
+ resources :tasks, only: [:index, :show, :create]
33
+ resources :conversations, only: [:index, :show]
34
+ resources :agents, only: [:index, :show]
35
+ resources :batch_runs, only: [:create, :show]
36
+ end
29
37
  end
30
38
  end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddPromptStudioRunToRaifTasks < ActiveRecord::Migration[7.1]
4
+ def change
5
+ add_column :raif_tasks, :prompt_studio_run, :boolean, default: false, null: false
6
+ end
7
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateRaifPromptStudioBatchRuns < ActiveRecord::Migration[7.1]
4
+ def change
5
+ json_column_type = if connection.adapter_name.downcase.include?("postgresql")
6
+ :jsonb
7
+ else
8
+ :json
9
+ end
10
+
11
+ create_table :raif_prompt_studio_batch_runs do |t|
12
+ t.string :task_type, null: false
13
+ t.string :llm_model_key, null: false
14
+ t.string :judge_type
15
+ t.string :judge_llm_model_key
16
+ t.send json_column_type, :judge_config, null: false
17
+ t.integer :total_count, default: 0, null: false
18
+ t.integer :completed_count, default: 0, null: false
19
+ t.integer :failed_count, default: 0, null: false
20
+ t.datetime :started_at
21
+ t.datetime :completed_at
22
+ t.datetime :failed_at
23
+
24
+ t.timestamps
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateRaifPromptStudioBatchRunItems < ActiveRecord::Migration[7.1]
4
+ def change
5
+ json_column_type = if connection.adapter_name.downcase.include?("postgresql")
6
+ :jsonb
7
+ else
8
+ :json
9
+ end
10
+
11
+ create_table :raif_prompt_studio_batch_run_items do |t|
12
+ t.references :batch_run, null: false, foreign_key: { to_table: :raif_prompt_studio_batch_runs }
13
+ t.references :source_task, null: false, foreign_key: { to_table: :raif_tasks }
14
+ t.references :result_task, foreign_key: { to_table: :raif_tasks }
15
+ t.references :judge_task, foreign_key: { to_table: :raif_tasks }
16
+ t.string :status, default: "pending", null: false
17
+ t.column :metadata, json_column_type
18
+
19
+ t.timestamps
20
+ end
21
+
22
+ add_index :raif_prompt_studio_batch_run_items, :status
23
+ end
24
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddCacheTokenColumnsToRaifModelCompletions < ActiveRecord::Migration[7.1]
4
+ def change
5
+ add_column :raif_model_completions, :cache_read_input_tokens, :integer
6
+ add_column :raif_model_completions, :cache_creation_input_tokens, :integer
7
+ end
8
+ end
@@ -13,6 +13,11 @@ module Raif
13
13
  default: false,
14
14
  desc: "Skip generating the corresponding eval set"
15
15
 
16
+ class_option :skip_prompt_template,
17
+ type: :boolean,
18
+ default: false,
19
+ desc: "Skip generating the system prompt template file"
20
+
16
21
  def create_application_agent
17
22
  template "application_agent.rb.tt", "app/models/raif/application_agent.rb" unless File.exist?("app/models/raif/application_agent.rb")
18
23
  end
@@ -21,6 +26,12 @@ module Raif
21
26
  template "agent.rb.tt", File.join("app/models/raif/agents", class_path, "#{file_name}.rb")
22
27
  end
23
28
 
29
+ def create_system_prompt_template
30
+ return if options[:skip_prompt_template]
31
+
32
+ template "system_prompt.erb.tt", system_prompt_template_file_path
33
+ end
34
+
24
35
  def create_directory
25
36
  empty_directory "app/models/raif/agents" unless File.directory?("app/models/raif/agents")
26
37
  end
@@ -33,6 +44,9 @@ module Raif
33
44
 
34
45
  def show_instructions
35
46
  say "\nAgent created!"
47
+ unless options[:skip_prompt_template]
48
+ say " System prompt template: #{system_prompt_template_file_path}"
49
+ end
36
50
  say ""
37
51
  end
38
52
 
@@ -42,6 +56,10 @@ module Raif
42
56
  File.join("raif_evals", "eval_sets", "agents", class_path, "#{file_name}_eval_set.rb")
43
57
  end
44
58
 
59
+ def system_prompt_template_file_path
60
+ File.join("app/views/raif/agents", class_path, "#{file_name}.system_prompt.erb")
61
+ end
62
+
45
63
  end
46
64
  end
47
65
  end
@@ -9,11 +9,13 @@
9
9
  # ]
10
10
  # end
11
11
 
12
- # Enter your agent's system prompt here. Alternatively, you can change your agent's superclass
13
- # to an existing agent types (like Raif::Agents::NativeToolCallingAgent) to utilize an existing system prompt.
14
- def build_system_prompt
15
- # TODO: Implement your system prompt here
16
- end
12
+ # System prompt is defined in app/views/raif/agents/<%= class_path.any? ? class_path.join("/") + "/" : "" %><%= file_name %>.system_prompt.erb
13
+ # Alternatively, you can change your agent's superclass to an existing agent type
14
+ # (like Raif::Agents::NativeToolCallingAgent) to utilize an existing system prompt,
15
+ # or override build_system_prompt directly:
16
+ # def build_system_prompt
17
+ # "Your system prompt here"
18
+ # end
17
19
 
18
20
  # Each iteration of the agent loop will generate a new Raif::ModelCompletion record and
19
21
  # then call this method with it as an argument.
@@ -0,0 +1,3 @@
1
+ <%% # Define the system prompt for Raif::Agents::<%= class_name %> here. %>
2
+ <%% # All instance methods and run_with attributes are available in this template. %>
3
+ <%% # You can also use Rails view helpers and render partials. %>