RubyGems - raif - Versions diffs - 1.4.0 → 1.5.0 - Mend

raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

data/app/views/raif/admin/prompt_studio/tasks/index.html.erb ADDED Viewed

@@ -0,0 +1,35 @@
+<div class="d-flex justify-content-between align-items-center my-4">
+  <h1 class="mb-0"><%= t("raif.admin.prompt_studio.common.prompt_studio") %></h1>
+</div>
+<%= render "raif/admin/prompt_studio/shared/nav_tabs", active_tab: :tasks %>
+<% if @show_batch_runs %>
+  <div data-controller="raif--select-all-checkboxes raif--cost-estimate" data-raif--cost-estimate-pricing-value="<%= llm_pricing_json %>" data-action="raif--select-all-checkboxes:toggled->raif--cost-estimate#calculate">
+    <%= render "raif/admin/prompt_studio/tasks/task_type_filter",
+          task_types: @task_types,
+          selected_type: @selected_type,
+          llm_model_keys: @llm_model_keys,
+          show_batch_runs: true %>
+    <%= form_tag raif.admin_prompt_studio_batch_runs_path, method: :post, id: "batch-run-form" do %>
+      <%= hidden_field_tag :task_type, @selected_type %>
+      <%= render "raif/admin/prompt_studio/tasks/tasks_table", tasks: @tasks, pagy: @pagy, show_checkbox: true %>
+      <%= render "raif/admin/prompt_studio/batch_runs/modal" %>
+    <% end %>
+  </div>
+<% else %>
+  <%= render "raif/admin/prompt_studio/tasks/task_type_filter",
+        task_types: @task_types,
+        selected_type: @selected_type,
+        llm_model_keys: @llm_model_keys,
+        show_batch_runs: false %>
+  <% if @selected_type.present? && @tasks.present? %>
+    <%= render "raif/admin/prompt_studio/tasks/tasks_table", tasks: @tasks, pagy: @pagy, show_checkbox: false %>
+  <% elsif @selected_type.present? %>
+    <div class="alert alert-info">
+      <%= t("raif.admin.prompt_studio.common.no_instances") %>
+    </div>
+  <% end %>
+<% end %>

data/app/views/raif/admin/prompt_studio/tasks/show.html.erb ADDED Viewed

@@ -0,0 +1,19 @@
+<% if @task.status == :in_progress %>
+  <%= turbo_stream_from @task %>
+<% end %>
+<div class="d-flex justify-content-between align-items-center my-4">
+  <h1 class="mb-0"><%= t("raif.admin.prompt_studio.tasks.show.page_title", id: @task.id) %></h1>
+  <%= link_to t("raif.admin.prompt_studio.common.back"),
+        raif.admin_prompt_studio_tasks_path(task_type: @task.type),
+        class: "btn btn-outline-secondary" %>
+</div>
+<% if flash[:alert].present? %>
+  <div class="alert alert-danger alert-dismissible fade show" role="alert">
+    <%= flash[:alert] %>
+    <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
+  </div>
+<% end %>
+<%= render "raif/admin/prompt_studio/tasks/task_result", task: @task, comparison: @comparison, original_task: @original_task %>

data/app/views/raif/admin/tasks/_task.html.erb CHANGED Viewed

@@ -16,5 +16,6 @@
       <span class="badge bg-secondary"><%= t("raif.admin.common.pending") %></span>
     <% end %>
   </td>
+  <td><small class="text-muted"><%= task.runtime_duration %></small></td>
   <td><small class="text-muted"><%= truncate(task.prompt, length: 100) %></small></td>
 </tr>

data/app/views/raif/admin/tasks/index.html.erb CHANGED Viewed

@@ -9,15 +9,15 @@
   <div class="col-12">
     <%= form_tag raif.admin_tasks_path, method: :get, class: "mb-4" do %>
       <div class="row align-items-end">
-        <div class="col-md-4">
+        <div class="col-md-3">
           <div class="form-group">
             <label for="task_types"><%= t("raif.admin.common.type") %></label>
             <%= select_tag :task_types,
-                  options_for_select([["All", "all"]] + @task_types.map{|type| [type, type] }, @selected_type),
-                  { class: "form-select" } %>
+                  options_for_select([[t("raif.admin.common.all"), "all"]] + @task_types.map{|type| [type, type] }, @selected_type),
+                  { class: "form-select", data: { controller: "raif--tom-select" } } %>
           </div>
         </div>
-        <div class="col-md-4">
+        <div class="col-md-3">
           <div class="form-group">
             <label for="task_statuses"><%= t("raif.admin.common.status") %></label>
             <%= select_tag :task_statuses,
@@ -31,7 +31,18 @@
                     ],
                     @selected_statuses
                   ),
-                  { class: "form-select" } %>
+                  { class: "form-select", data: { controller: "raif--tom-select" } } %>
+          </div>
+        </div>
+        <div class="col-md-3">
+          <div class="form-group">
+            <label for="llm_model_key"><%= t("raif.admin.common.model") %></label>
+            <%= select_tag :llm_model_key,
+                  options_for_select(
+                    [[t("raif.admin.common.all"), ""]] + @llm_model_keys.map { |key| [key, key] },
+                    @selected_llm_model_key
+                  ),
+                  { class: "form-select", data: { controller: "raif--tom-select" } } %>
           </div>
         </div>
         <div class="col-md-2">
@@ -51,6 +62,7 @@
               <th><%= t("raif.admin.common.creator") %></th>
               <th><%= t("raif.admin.common.model") %></th>
               <th><%= t("raif.admin.common.status") %></th>
+              <th><%= t("raif.admin.common.duration") %></th>
               <th><%= t("raif.admin.common.prompt") %></th>
             </tr>
           </thead>

data/app/views/raif/admin/tasks/show.html.erb CHANGED Viewed

@@ -68,12 +68,24 @@
         <% end %>
       </div>
     </div>
+    <div class="row mb-3">
+      <div class="col-md-3"><strong><%= t("raif.admin.common.duration") %>:</strong></div>
+      <div class="col-md-9"><%= @task.runtime_duration %></div>
+    </div>
     <% if @task.requested_language_key.present? %>
       <div class="row mb-3">
         <div class="col-md-3"><strong><%= t("raif.admin.common.requested_language") %>:</strong></div>
         <div class="col-md-9"><%= @task.requested_language_key %></div>
       </div>
     <% end %>
+    <% if @task.run_with.present? %>
+      <div class="row mb-3">
+        <div class="col-md-3"><strong><%= t("raif.admin.common.run_with") %>:</strong></div>
+        <div class="col-md-9">
+          <pre class="pre-wrap mb-0"><%= JSON.pretty_generate(@task.run_with) %></pre>
+        </div>
+      </div>
+    <% end %>
   </div>
 </div>
@@ -136,6 +148,14 @@
         <% end %>
       </div>
     </div>
+    <div class="row mb-3">
+      <div class="col-md-3"><strong><%= t("raif.admin.common.cache_read_input_tokens") %>:</strong></div>
+      <div class="col-md-9"><%= @task.raif_model_completion.cache_read_input_tokens ? number_with_delimiter(@task.raif_model_completion.cache_read_input_tokens) : "-" %></div>
+    </div>
+    <div class="row mb-3">
+      <div class="col-md-3"><strong><%= t("raif.admin.common.cache_creation_input_tokens") %>:</strong></div>
+      <div class="col-md-9"><%= @task.raif_model_completion.cache_creation_input_tokens ? number_with_delimiter(@task.raif_model_completion.cache_creation_input_tokens) : "-" %></div>
+    </div>
   </div>
 </div>
 <% end %>

data/app/views/raif/conversation_entries/_message.html.erb CHANGED Viewed

@@ -6,13 +6,17 @@
     <% if content.present? %>
       <div class="mb-1 rounded-2 p-3 <%= message_type == :user ? "bg-primary text-white" : "border" %>">
-        <% case local_assigns[:conversation_entry]&.response_format %>
-        <% when "text" %>
-          <%= simple_format content %>
-        <% when "html" %>
-          <%= sanitize content %>
+        <% if message_type == :user %>
+          <%= simple_format h(content), {}, sanitize: false %>
         <% else %>
-          <%= content %>
+          <% case local_assigns[:conversation_entry]&.response_format %>
+          <% when "text" %>
+            <%= simple_format content %>
+          <% when "html" %>
+            <%= sanitize content %>
+          <% else %>
+            <%= content %>
+          <% end %>
         <% end %>
         <% if message_type == :model_response && local_assigns[:conversation_entry]&.citations.present? %>

data/config/importmap.rb CHANGED Viewed

@@ -1,6 +1,14 @@
 # frozen_string_literal: true
 pin "raif", to: "raif.js"
+pin "raif_admin", to: "raif_admin.js"
 pin "raif/controllers/conversations_controller", to: "raif/controllers/conversations_controller.js"
+pin "raif/admin/judge_config_controller", to: "raif/admin/judge_config_controller.js"
+pin "raif/admin/select_all_checkboxes_controller", to: "raif/admin/select_all_checkboxes_controller.js"
+pin "raif/admin/cost_estimate_controller", to: "raif/admin/cost_estimate_controller.js"
+pin "raif/admin/tom_select_controller", to: "raif/admin/tom_select_controller.js"
+pin "raif/admin/table_search_controller", to: "raif/admin/table_search_controller.js"
+pin "raif/admin/sortable_table_controller", to: "raif/admin/sortable_table_controller.js"
+pin "raif/admin/copy_to_clipboard_controller", to: "raif/admin/copy_to_clipboard_controller.js"
 pin "raif/stream_actions/raif_scroll_to_bottom", to: "raif/stream_actions/raif_scroll_to_bottom.js"

data/config/locales/admin.en.yml CHANGED Viewed

@@ -13,6 +13,8 @@ en:
         arguments_schema: Arguments Schema
         at: at
         available_tools: Available Tools
+        cache_creation_input_tokens: Cache Creation Tokens
+        cache_read_input_tokens: Cache Read Tokens
         citations: Citations
         completed: Completed
         completed_at: Completed At
@@ -24,8 +26,10 @@ en:
         count: Count
         created_at: Created At
         creator: Creator
+        default: default
         description: Description
         details: Details
+        duration: Duration
         entries_count: Entries Count
         entry: Entry
         est_cost: est. cost
@@ -38,12 +42,14 @@ en:
         final_answer: Final Answer
         id: ID
         in_progress: In Progress
+        inferred_from_citations: Inferred from response citations.
         initial_task: Initial Task
         input: Input
         input_token_cost: Input Token Cost
         invalid_json: Invalid JSON
         iterations: Iterations
         language: Language
+        llms: LLMs
         messages: Messages
         model: Model
         model_completion: Model Completion
@@ -68,16 +74,21 @@ en:
         prettified: Prettified
         prompt: Prompt
         prompt_tokens: Prompt Tokens
+        provider_managed_tool_calls: Provider-Managed Tool Calls
+        provider_tool_call_id: Provider Tool Call ID
         raw: Raw
         rendered: Rendered
         requested_language: Requested Language
         response: Response
+        response_array: Response Array
         response_format: Response Format
         result: Result
         retry_count: Retry Count
+        run_with: Run With
         running: Running
         since: Since
         source: Source
+        sources: Sources
         started_at: Started At
         stats: Stats
         status: Status
@@ -117,6 +128,18 @@ en:
       layouts:
         admin:
           title: Raif Admin
+      llms:
+        index:
+          api_name: API Name
+          cost_note: Costs shown are per 1,000,000 tokens.
+          default_badge: default
+          input_cost: Input Cost (per 1M tokens)
+          name: Name
+          no_llms: No LLMs are currently registered.
+          output_cost: Output Cost (per 1M tokens)
+          provider: Provider
+          search_placeholder: Search by name, provider, or API name...
+          title: Registered LLMs
       model_completions:
         show:
           back_to_model_completions: Back to Model Completions
@@ -124,7 +147,112 @@ en:
       model_tool_invocations:
         show:
           back_to_model_tool_invocations: Back to Model Tool Invocations
+          observation_disclaimer: This observation is regenerated from current data and code. It may differ from what was originally sent to the model.
+          observation_sent_to_model: Observation Sent to Model
+          observation_unavailable: 'Could not generate observation: %{error}'
           title: 'Model Tool Invocation #%{id}'
+      prompt_studio:
+        agents:
+          show:
+            agent_details: Agent Details
+            page_title: 'Prompt Studio - Agent #%{id}'
+        batch_runs:
+          create:
+            confirm: This will create a batch run and send requests to the selected LLM. Continue?
+            error: 'Batch run failed: %{message}'
+            new_batch_run: New Batch Run
+            no_tasks_selected: Please select at least one task.
+            submit: Run Selected Batch
+            tasks_selected: tasks selected
+            title: Batch Run
+          judge:
+            new_response: New Response
+            original_response: Original Response
+            tie: Tie
+            winner: 'Winner: %{name}'
+          judge_config:
+            comparison_criteria_label: Comparison Criteria
+            comparison_criteria_placeholder: Which response better addresses the prompt
+            criteria_label: Criteria
+            criteria_placeholder: Response is accurate and complete
+            description: Optionally evaluate each result with an LLM judge.
+            include_original_prompt_as_context_description: Provides the original task prompt to the judge as additional context for evaluation.
+            include_original_prompt_as_context_label: Include original prompt as context
+            judge_model_label: Judge Model
+            rubric_accuracy: Accuracy
+            rubric_clarity: Clarity
+            rubric_helpfulness: Helpfulness
+            rubric_label: Scoring Rubric
+            strict_mode_label: Strict Mode
+            summarization_description: Uses the original task prompt as the source content and the new response as the summary to evaluate.
+            title: LLM Judge Configuration
+            type_binary: Binary (Pass/Fail)
+            type_comparative: Comparative (A vs B)
+            type_label: Judge Type
+            type_none: None
+            type_scored: Scored (Rubric)
+            type_summarization: Summarization
+          show:
+            average_score: Average Score
+            judge_reasoning: Judge Reasoning
+            judge_result: Judge Result
+            new_response: New Response
+            original_response: Original Response
+            page_title: 'Batch Run #%{id}'
+            pass_rate: Pass Rate
+            progress: Progress
+            results: Results
+            source_task: Source Task
+            summary: Summary
+          status:
+            completed: Completed
+            failed: Failed
+            in_progress: In Progress
+            judging: Judging
+            pending: Pending
+            running: Running
+        common:
+          back: Back
+          created_after: Created After
+          created_before: Created Before
+          current_prompt: Current Prompt
+          current_system_prompt: Current System Prompt
+          estimated_cost: "~%{cost} (estimate)"
+          estimated_tokens: "~%{count} tokens (estimate)"
+          no_instances: No instances found.
+          original_prompt: Original Prompt
+          original_system_prompt: Original System Prompt
+          prompt_changed: Prompt Changed
+          prompt_studio: Prompt Studio
+          prompt_tokens: "%{count} tokens"
+          prompt_unchanged: Prompt Unchanged
+          run_in_prompt_studio: Run in Prompt Studio
+          runs_disabled: Prompt Studio runs are not enabled in this environment.
+          select_type: Select a type to browse instances
+          system_prompt_changed: System Prompt Changed
+          system_prompt_unchanged: System Prompt Unchanged
+          type: Type
+          warning_stale_reference: 'Warning: Some run_with references could not be loaded. The current prompt may differ from what would be generated with the original data.'
+        conversations:
+          show:
+            conversation_details: Conversation Details
+            page_title: 'Prompt Studio - Conversation #%{id}'
+        tasks:
+          rerun:
+            confirm: This will create a new task and send a request to the selected LLM. Continue?
+            description: Re-run this task with the current prompt templates and a selected model. This will create a new task record.
+            error: 'Re-run failed: %{message}'
+            invalid_model: Please select a valid model.
+            running: Running...
+            submit: Re-run Task
+            title: Re-run with Current Prompts
+          show:
+            new_response: New Response
+            original_response: Original Response
+            page_title: 'Prompt Studio - Task #%{id}'
+            response_comparison: Response Comparison
+            task_details: Task Details
+            view_response: View Response
       stats:
         model_tool_invocations:
           back_to_stats: Back to Stats

data/config/locales/en.yml CHANGED Viewed

@@ -25,6 +25,7 @@ en:
         title: Past Conversations
     embedding_model_names:
       bedrock_titan_embed_text_v2: AWS Bedrock Titan Text Embeddings v2
+      google_gemini_embedding_2: Google Gemini Embedding 2
       open_ai_text_embedding_3_large: OpenAI Text Embedding 3 Large
       open_ai_text_embedding_3_small: OpenAI Text Embedding 3 Small
       open_ai_text_embedding_ada_002: OpenAI Text Embedding Ada 002
@@ -58,7 +59,11 @@ en:
       anthropic_claude_3_opus: Anthropic Claude 3 Opus
       anthropic_claude_4_1_opus: Anthropic Claude 4.1 Opus
       anthropic_claude_4_5_haiku: Anthropic Claude 4.5 Haiku
+      anthropic_claude_4_5_opus: Anthropic Claude 4.5 Opus
       anthropic_claude_4_5_sonnet: Anthropic Claude 4.5 Sonnet
+      anthropic_claude_4_6_opus: Anthropic Claude 4.6 Opus
+      anthropic_claude_4_6_sonnet: Anthropic Claude 4.6 Sonnet
+      anthropic_claude_4_7_opus: Anthropic Claude 4.7 Opus
       anthropic_claude_4_opus: Anthropic Claude 4 Opus
       anthropic_claude_4_sonnet: Anthropic Claude 4 Sonnet
       bedrock_amazon_nova_lite: Amazon Nova Lite (via AWS Bedrock)
@@ -70,13 +75,23 @@ en:
       bedrock_claude_3_opus: Anthropic Claude 3 Opus (via AWS Bedrock)
       bedrock_claude_4_1_opus: Claude 4.1 Opus (via AWS Bedrock)
       bedrock_claude_4_5_haiku: Anthropic Claude 4.5 Haiku (via AWS Bedrock)
+      bedrock_claude_4_5_opus: Anthropic Claude 4.5 Opus (via AWS Bedrock)
       bedrock_claude_4_5_sonnet: Anthropic Claude 4.5 Sonnet (via AWS Bedrock)
+      bedrock_claude_4_6_opus: Anthropic Claude 4.6 Opus (via AWS Bedrock)
+      bedrock_claude_4_6_sonnet: Anthropic Claude 4.6 Sonnet (via AWS Bedrock)
+      bedrock_claude_4_7_opus: Anthropic Claude 4.7 Opus (via AWS Bedrock)
       bedrock_claude_4_opus: Anthropic Claude 4 Opus (via AWS Bedrock)
       bedrock_claude_4_sonnet: Anthropic Claude 4 Sonnet (via AWS Bedrock)
+      bedrock_deepseek_r1: DeepSeek R1 (via AWS Bedrock)
+      bedrock_deepseek_v3_2: DeepSeek v3.2 (via AWS Bedrock)
+      bedrock_gpt_oss_120b: OpenAI GPT-OSS 120B (via AWS Bedrock)
+      bedrock_gpt_oss_20b: OpenAI GPT-OSS 20B (via AWS Bedrock)
       google_gemini_2_5_flash: Google Gemini 2.5 Flash
       google_gemini_2_5_pro: Google Gemini 2.5 Pro
-      google_gemini_3_0_flash: Google Gemini 3.0 Flash
-      google_gemini_3_0_pro: Google Gemini 3.0 Pro
+      google_gemini_3_0_flash: Google Gemini 3 Flash
+      google_gemini_3_0_pro: Google Gemini 3 Pro
+      google_gemini_3_1_flash_lite: Google Gemini 3.1 Flash-Lite
+      google_gemini_3_1_pro: Google Gemini 3.1 Pro
       open_ai_gpt_3_5_turbo: OpenAI GPT-3.5 Turbo
       open_ai_gpt_4_1: OpenAI GPT-4.1
       open_ai_gpt_4_1_mini: OpenAI GPT-4.1 Mini
@@ -86,6 +101,10 @@ en:
       open_ai_gpt_5: OpenAI GPT-5
       open_ai_gpt_5_1: OpenAI GPT-5.1
       open_ai_gpt_5_2: OpenAI GPT-5.2
+      open_ai_gpt_5_3: OpenAI GPT-5.3
+      open_ai_gpt_5_4: OpenAI GPT-5.4
+      open_ai_gpt_5_4_mini: OpenAI GPT-5.4 Mini
+      open_ai_gpt_5_4_nano: OpenAI GPT-5.4 Nano
       open_ai_gpt_5_mini: OpenAI GPT-5 Mini
       open_ai_gpt_5_nano: OpenAI GPT-5 Nano
       open_ai_o1: OpenAI o1
@@ -102,8 +121,15 @@ en:
       open_ai_responses_gpt_5: OpenAI GPT-5 (Responses API)
       open_ai_responses_gpt_5_1: OpenAI GPT-5.1 (Responses API)
       open_ai_responses_gpt_5_2: OpenAI GPT-5.2 (Responses API)
+      open_ai_responses_gpt_5_2_pro: OpenAI GPT-5.2 Pro (Responses API)
+      open_ai_responses_gpt_5_3: OpenAI GPT-5.3 (Responses API)
+      open_ai_responses_gpt_5_4: OpenAI GPT-5.4 (Responses API)
+      open_ai_responses_gpt_5_4_mini: OpenAI GPT-5.4 Mini (Responses API)
+      open_ai_responses_gpt_5_4_nano: OpenAI GPT-5.4 Nano (Responses API)
+      open_ai_responses_gpt_5_4_pro: OpenAI GPT-5.4 Pro (Responses API)
       open_ai_responses_gpt_5_mini: OpenAI GPT-5 Mini (Responses API)
       open_ai_responses_gpt_5_nano: OpenAI GPT-5 Nano (Responses API)
+      open_ai_responses_gpt_5_pro: OpenAI GPT-5 Pro (Responses API)
       open_ai_responses_o1: OpenAI o1 (Responses API)
       open_ai_responses_o1_mini: OpenAI o1 Mini (Responses API)
       open_ai_responses_o1_pro: OpenAI o1 Pro (Responses API)
@@ -114,18 +140,26 @@ en:
       open_router_claude_3_7_sonnet: Anthropic Claude 3.7 Sonnet (via OpenRouter)
       open_router_deepseek_chat_v3: DeepSeek Chat v3 (via OpenRouter)
       open_router_deepseek_v3_1: DeepSeek v3.1 (via OpenRouter)
+      open_router_deepseek_v3_2: DeepSeek v3.2 (via OpenRouter)
       open_router_gemini_2_0_flash: Google Gemini 2.0 Flash (via OpenRouter)
       open_router_gemini_2_5_flash: Gemini 2.5 Flash (via OpenRouter)
       open_router_gemini_2_5_pro: Gemini 2.5 Pro (via OpenRouter)
+      open_router_gemini_3_1_flash_lite_preview: Gemini 3.1 Flash-Lite Preview (via OpenRouter)
+      open_router_gemini_3_1_pro_preview: Gemini 3.1 Pro Preview (via OpenRouter)
       open_router_gemini_3_pro_preview: Gemini 3 Pro Preview (via OpenRouter)
+      open_router_google_gemma_4_31b_it: Google Gemma 4 31B IT (via OpenRouter)
       open_router_grok_4: Grok 4 (via OpenRouter)
       open_router_grok_4_1_fast: Grok 4.1 Fast (via OpenRouter)
+      open_router_grok_4_20: Grok 4.20 (via OpenRouter)
+      open_router_kimi_k2_5: Kimi K2.5 (via OpenRouter)
       open_router_kimi_k2_thinking: Kimi K2 Thinking (via OpenRouter)
       open_router_llama_3_1_8b_instruct: Meta Llama 3.1 8B Instruct (via OpenRouter)
       open_router_llama_3_3_70b_instruct: Meta Llama 3.3 70B Instruct (via OpenRouter)
       open_router_llama_4_maverick: Meta Llama 4 Maverick (via OpenRouter)
       open_router_llama_4_scout: Meta Llama 4 Scout (via OpenRouter)
       open_router_minimax_m2: Minimax M2 (via OpenRouter)
+      open_router_minimax_m2_1: Minimax M2.1 (via OpenRouter)
+      open_router_minimax_m2_5: Minimax M2.5 (via OpenRouter)
       open_router_mistral_large_3_2512: Mistral Large 3 (via OpenRouter)
       open_router_mistral_small_3_2_24b: Mistral Small 3.2 24B (via OpenRouter)
       open_router_open_ai_gpt_oss_120b: OpenAI GPT-OSS 120B (via OpenRouter)

data/config/routes.rb CHANGED Viewed

@@ -25,6 +25,14 @@ Raif::Engine.routes.draw do
     resources :model_completions, only: [:index, :show]
     resources :agents, only: [:index, :show]
     resources :model_tool_invocations, only: [:index, :show]
+    resources :llms, only: [:index]
     resource :config, only: [:show]
+    namespace :prompt_studio do
+      resources :tasks, only: [:index, :show, :create]
+      resources :conversations, only: [:index, :show]
+      resources :agents, only: [:index, :show]
+      resources :batch_runs, only: [:create, :show]
+    end
   end
 end

data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb ADDED Viewed

@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+class AddPromptStudioRunToRaifTasks < ActiveRecord::Migration[7.1]
+  def change
+    add_column :raif_tasks, :prompt_studio_run, :boolean, default: false, null: false
+  end
+end

data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+class CreateRaifPromptStudioBatchRuns < ActiveRecord::Migration[7.1]
+  def change
+    json_column_type = if connection.adapter_name.downcase.include?("postgresql")
+      :jsonb
+    else
+      :json
+    end
+    create_table :raif_prompt_studio_batch_runs do |t|
+      t.string :task_type, null: false
+      t.string :llm_model_key, null: false
+      t.string :judge_type
+      t.string :judge_llm_model_key
+      t.send json_column_type, :judge_config, null: false
+      t.integer :total_count, default: 0, null: false
+      t.integer :completed_count, default: 0, null: false
+      t.integer :failed_count, default: 0, null: false
+      t.datetime :started_at
+      t.datetime :completed_at
+      t.datetime :failed_at
+      t.timestamps
+    end
+  end
+end

data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+class CreateRaifPromptStudioBatchRunItems < ActiveRecord::Migration[7.1]
+  def change
+    json_column_type = if connection.adapter_name.downcase.include?("postgresql")
+      :jsonb
+    else
+      :json
+    end
+    create_table :raif_prompt_studio_batch_run_items do |t|
+      t.references :batch_run, null: false, foreign_key: { to_table: :raif_prompt_studio_batch_runs }
+      t.references :source_task, null: false, foreign_key: { to_table: :raif_tasks }
+      t.references :result_task, foreign_key: { to_table: :raif_tasks }
+      t.references :judge_task, foreign_key: { to_table: :raif_tasks }
+      t.string :status, default: "pending", null: false
+      t.column :metadata, json_column_type
+      t.timestamps
+    end
+    add_index :raif_prompt_studio_batch_run_items, :status
+  end
+end

data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb ADDED Viewed

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+class AddCacheTokenColumnsToRaifModelCompletions < ActiveRecord::Migration[7.1]
+  def change
+    add_column :raif_model_completions, :cache_read_input_tokens, :integer
+    add_column :raif_model_completions, :cache_creation_input_tokens, :integer
+  end
+end

data/lib/generators/raif/agent/agent_generator.rb CHANGED Viewed

@@ -13,6 +13,11 @@ module Raif
         default: false,
         desc: "Skip generating the corresponding eval set"
+      class_option :skip_prompt_template,
+        type: :boolean,
+        default: false,
+        desc: "Skip generating the system prompt template file"
       def create_application_agent
         template "application_agent.rb.tt", "app/models/raif/application_agent.rb" unless File.exist?("app/models/raif/application_agent.rb")
       end
@@ -21,6 +26,12 @@ module Raif
         template "agent.rb.tt", File.join("app/models/raif/agents", class_path, "#{file_name}.rb")
       end
+      def create_system_prompt_template
+        return if options[:skip_prompt_template]
+        template "system_prompt.erb.tt", system_prompt_template_file_path
+      end
       def create_directory
         empty_directory "app/models/raif/agents" unless File.directory?("app/models/raif/agents")
       end
@@ -33,6 +44,9 @@ module Raif
       def show_instructions
         say "\nAgent created!"
+        unless options[:skip_prompt_template]
+          say "  System prompt template: #{system_prompt_template_file_path}"
+        end
         say ""
       end
@@ -42,6 +56,10 @@ module Raif
         File.join("raif_evals", "eval_sets", "agents", class_path, "#{file_name}_eval_set.rb")
       end
+      def system_prompt_template_file_path
+        File.join("app/views/raif/agents", class_path, "#{file_name}.system_prompt.erb")
+      end
     end
   end
 end

data/lib/generators/raif/agent/templates/agent.rb.tt CHANGED Viewed

@@ -9,11 +9,13 @@
     #   ]
     # end
-    # Enter your agent's system prompt here. Alternatively, you can change your agent's superclass
-    # to an existing agent types (like Raif::Agents::NativeToolCallingAgent) to utilize an existing system prompt.
-    def build_system_prompt
-      # TODO: Implement your system prompt here
-    end
+    # System prompt is defined in app/views/raif/agents/<%= class_path.any? ? class_path.join("/") + "/" : "" %><%= file_name %>.system_prompt.erb
+    # Alternatively, you can change your agent's superclass to an existing agent type
+    # (like Raif::Agents::NativeToolCallingAgent) to utilize an existing system prompt,
+    # or override build_system_prompt directly:
+    # def build_system_prompt
+    #   "Your system prompt here"
+    # end
     # Each iteration of the agent loop will generate a new Raif::ModelCompletion record and
     # then call this method with it as an argument.

data/lib/generators/raif/agent/templates/system_prompt.erb.tt ADDED Viewed

@@ -0,0 +1,3 @@
+<%% # Define the system prompt for Raif::Agents::<%= class_name %> here. %>
+<%% # All instance methods and run_with attributes are available in this template. %>
+<%% # You can also use Rails view helpers and render partials. %>