ruby_llm-evals 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +180 -8
  3. data/Rakefile +0 -2
  4. data/app/assets/stylesheets/ruby_llm/evals/application.css +15 -0
  5. data/app/assets/stylesheets/ruby_llm/evals/bulma.min.css +3 -0
  6. data/app/assets/stylesheets/ruby_llm/evals/json_editor.css +25 -0
  7. data/app/controllers/concerns/ruby_llm/evals/prompt_executions/prompt_execution_scoped.rb +19 -0
  8. data/app/controllers/ruby_llm/evals/application_controller.rb +14 -0
  9. data/app/controllers/ruby_llm/evals/prompt_executions/failures_controller.rb +15 -0
  10. data/app/controllers/ruby_llm/evals/prompt_executions/passages_controller.rb +15 -0
  11. data/app/controllers/ruby_llm/evals/prompt_executions/retries_controller.rb +16 -0
  12. data/app/controllers/ruby_llm/evals/prompts_controller.rb +87 -0
  13. data/app/controllers/ruby_llm/evals/runs_controller.rb +46 -0
  14. data/app/helpers/ruby_llm/evals/application_helper.rb +39 -0
  15. data/app/helpers/ruby_llm/evals/prompt_executions_helper.rb +6 -0
  16. data/app/helpers/ruby_llm/evals/prompts_helper.rb +37 -0
  17. data/app/helpers/ruby_llm/evals/runs_helper.rb +6 -0
  18. data/app/javascript/ruby_llm/evals/application.js +3 -0
  19. data/app/javascript/ruby_llm/evals/controllers/application.js +13 -0
  20. data/app/javascript/ruby_llm/evals/controllers/eval_type_selector_controller.js +37 -0
  21. data/app/javascript/ruby_llm/evals/controllers/file_input_controller.js +21 -0
  22. data/app/javascript/ruby_llm/evals/controllers/index.js +4 -0
  23. data/app/javascript/ruby_llm/evals/controllers/json_editor_controller.js +129 -0
  24. data/app/javascript/ruby_llm/evals/controllers/provider_model_controller.js +85 -0
  25. data/app/javascript/ruby_llm/evals/controllers/schema_selector_controller.js +31 -0
  26. data/app/jobs/ruby_llm/evals/application_job.rb +6 -0
  27. data/app/jobs/ruby_llm/evals/execute_sample_job.rb +26 -0
  28. data/app/jobs/ruby_llm/evals/perform_run_job.rb +21 -0
  29. data/app/mailers/ruby_llm/evals/application_mailer.rb +8 -0
  30. data/app/models/concerns/ruby_llm/evals/job_trackable.rb +15 -0
  31. data/app/models/ruby_llm/evals/application_record.rb +7 -0
  32. data/app/models/ruby_llm/evals/page.rb +53 -0
  33. data/app/models/ruby_llm/evals/prompt.rb +55 -0
  34. data/app/models/ruby_llm/evals/prompt_execution.rb +169 -0
  35. data/app/models/ruby_llm/evals/run.rb +45 -0
  36. data/app/models/ruby_llm/evals/sample.rb +20 -0
  37. data/app/schemas/ruby_llm/evals/judge_verdict_schema.rb +8 -0
  38. data/app/views/layouts/ruby_llm/evals/application.html.erb +29 -0
  39. data/app/views/ruby_llm/evals/application/_flashes.html.erb +9 -0
  40. data/app/views/ruby_llm/evals/application/_nav.html.erb +12 -0
  41. data/app/views/ruby_llm/evals/application/_pagination.html.erb +7 -0
  42. data/app/views/ruby_llm/evals/application/_tabs.html.erb +6 -0
  43. data/app/views/ruby_llm/evals/prompts/_filters.html.erb +15 -0
  44. data/app/views/ruby_llm/evals/prompts/_form.html.erb +104 -0
  45. data/app/views/ruby_llm/evals/prompts/_prompt.html.erb +14 -0
  46. data/app/views/ruby_llm/evals/prompts/compare.html.erb +90 -0
  47. data/app/views/ruby_llm/evals/prompts/edit.html.erb +5 -0
  48. data/app/views/ruby_llm/evals/prompts/index.html.erb +32 -0
  49. data/app/views/ruby_llm/evals/prompts/new.html.erb +5 -0
  50. data/app/views/ruby_llm/evals/prompts/show.html.erb +107 -0
  51. data/app/views/ruby_llm/evals/runs/_filters.html.erb +17 -0
  52. data/app/views/ruby_llm/evals/runs/_run.html.erb +13 -0
  53. data/app/views/ruby_llm/evals/runs/index.html.erb +30 -0
  54. data/app/views/ruby_llm/evals/runs/show.html.erb +188 -0
  55. data/app/views/ruby_llm/evals/samples/_form.html.erb +88 -0
  56. data/config/importmap.rb +13 -0
  57. data/config/locales/en.yml +7 -0
  58. data/config/routes.rb +20 -1
  59. data/db/migrate/20251022211228_create_ruby_llm_evals_prompts.rb +21 -0
  60. data/db/migrate/20251022211229_create_ruby_llm_evals_samples.rb +14 -0
  61. data/db/migrate/20251022211230_create_ruby_llm_evals_runs.rb +21 -0
  62. data/db/migrate/20251022211231_create_ruby_llm_evals_prompt_executions.rb +26 -0
  63. data/lib/activemodel/validations/json_validator.rb +14 -0
  64. data/lib/ruby_llm/evals/engine.rb +49 -1
  65. data/lib/ruby_llm/evals/version.rb +2 -2
  66. data/lib/ruby_llm/evals.rb +7 -3
  67. metadata +65 -6
  68. /data/lib/tasks/{ruby_llm/evals_tasks.rake → ruby_llm_evals_tasks.rake} +0 -0
@@ -0,0 +1,90 @@
1
+ <% content_for :title, "#{@prompt.name} - Runs Comparison" %>
2
+
3
+ <h1 class="title"><%= @prompt.name %> - Runs Comparison</h1>
4
+
5
+ <% if @runs.any? %>
6
+ <div style="overflow-x: auto;">
7
+ <table class="table is-bordered is-striped is-fullwidth">
8
+ <thead>
9
+ <tr>
10
+ <th>Metric</th>
11
+ <% @runs.each do |run| %>
12
+ <th>
13
+ <%= link_to "Run ##{run.id}", run_path(run) %>
14
+ <div class="is-size-7 has-text-grey">
15
+ <%= run.created_at %>
16
+ </div>
17
+ </th>
18
+ <% end %>
19
+ </tr>
20
+ </thead>
21
+ <tbody>
22
+ <tr>
23
+ <th>Provider/Model</th>
24
+ <% @runs.each do |run| %>
25
+ <td><%= run.provider %>/<%= run.model %></td>
26
+ <% end %>
27
+ </tr>
28
+
29
+ <tr>
30
+ <th>Temperature</th>
31
+ <% @runs.each do |run| %>
32
+ <td><%= run.temperature || "N/A" %></td>
33
+ <% end %>
34
+ </tr>
35
+
36
+ <tr>
37
+ <th>Instructions</th>
38
+ <% @runs.each do |run| %>
39
+ <td><%= truncate run.instructions %></td>
40
+ <% end %>
41
+ </tr>
42
+
43
+ <tr>
44
+ <th>Message</th>
45
+ <% @runs.each do |run| %>
46
+ <td><%= truncate run.message %></td>
47
+ <% end %>
48
+ </tr>
49
+
50
+ <tr>
51
+ <th>Accuracy</th>
52
+ <% @runs.each do |run| %>
53
+ <td><%= accuracy(run) %></td>
54
+ <% end %>
55
+ </tr>
56
+
57
+ <tr>
58
+ <th>Cost</th>
59
+ <% @runs.each do |run| %>
60
+ <td>$<%= run.total_cost %></td>
61
+ <% end %>
62
+ </tr>
63
+
64
+ <tr>
65
+ <th>Duration</th>
66
+ <% @runs.each do |run| %>
67
+ <td><%= duration(run) %></td>
68
+ <% end %>
69
+ </tr>
70
+
71
+ <% if @samples.any? %>
72
+ <% @samples.each_with_index do |sample, sample_index| %>
73
+ <tr>
74
+ <th>
75
+ Sample #<%= sample.id %>
76
+ </th>
77
+ <% @runs.each do |run| %>
78
+ <td class="has-text-centered">
79
+ <%= status_indicator run.prompt_executions.find { |pe| pe.sample.id == sample.id } %>
80
+ </td>
81
+ <% end %>
82
+ </tr>
83
+ <% end %>
84
+ <% end %>
85
+ </tbody>
86
+ </table>
87
+ </div>
88
+ <% else %>
89
+ <div class="mt-6 has-text-centered is-size-4 has-text-grey">There are no runs</div>
90
+ <% end %>
@@ -0,0 +1,5 @@
1
+ <% content_for :title, "Editing prompt" %>
2
+
3
+ <h1 class="title">Editing prompt</h1>
4
+
5
+ <%= render "form", prompt: @prompt %>
@@ -0,0 +1,32 @@
1
+ <% content_for :title, "Prompts" %>
2
+
3
+ <div class="level">
4
+ <div class="level-left">
5
+ <%= render "filters", clear_path: prompts_path %>
6
+ </div>
7
+ <div class="level-right is-align-self-flex-end">
8
+ <%= link_to "New prompt", new_prompt_path, class: "button is-link" %>
9
+ </div>
10
+ </div>
11
+
12
+ <% if @prompts.records.any? %>
13
+ <table class="table is-hoverable is-fullwidth">
14
+ <thead>
15
+ <tr>
16
+ <th>Name</th>
17
+ <th>Provider/Model</th>
18
+ <th>Instructions</th>
19
+ <th>Message</th>
20
+ <th>Runs</th>
21
+ <th></th>
22
+ </tr>
23
+ </thead>
24
+ <tbody>
25
+ <%= render partial: "ruby_llm/evals/prompts/prompt", collection: @prompts.records %>
26
+ </tbody>
27
+ </table>
28
+ <% else %>
29
+ <div class="mt-6 has-text-centered is-size-4 has-text-grey">There are no prompts</div>
30
+ <% end %>
31
+
32
+ <%= render "ruby_llm/evals/application/pagination", page: @prompts %>
@@ -0,0 +1,5 @@
1
+ <% content_for :title, "New prompt" %>
2
+
3
+ <h1 class="title">New prompt</h1>
4
+
5
+ <%= render "form", prompt: @prompt %>
@@ -0,0 +1,107 @@
1
+ <% content_for :title, @prompt.name %>
2
+
3
+ <h1 class="title"><%= @prompt.name %></h1>
4
+
5
+ <table class="table">
6
+ <tbody>
7
+ <tr>
8
+ <th>Slug</th>
9
+ <td><%= @prompt.slug %></td>
10
+ </tr>
11
+ <tr>
12
+ <th>Provider</th>
13
+ <td><%= @prompt.provider %></td>
14
+ </tr>
15
+ <tr>
16
+ <th>Model</th>
17
+ <td><%= @prompt.model %></td>
18
+ </tr>
19
+ <tr>
20
+ <th>Temperature</th>
21
+ <td><%= @prompt.temperature %></td>
22
+ </tr>
23
+ <tr>
24
+ <th>Params</th>
25
+ <td><%= json @prompt.params %></td>
26
+ </tr>
27
+ <tr>
28
+ <th>Tools</th>
29
+ <td><%= @prompt.tools.try :to_sentence %></td>
30
+ </tr>
31
+ <tr>
32
+ <th>Schema</th>
33
+ <td>
34
+ <% if @prompt.schema_other.present? %>
35
+ <%= json @prompt.schema_other %>
36
+ <% elsif @prompt.schema.present? %>
37
+ <%= @prompt.schema %>
38
+ <% end %>
39
+ </td>
40
+ </tr>
41
+ <tr>
42
+ <th>Instructions</th>
43
+ <td style="white-space: pre-wrap;"><%= @prompt.instructions %></td>
44
+ </tr>
45
+ <tr>
46
+ <th>Message</th>
47
+ <td style="white-space: pre-wrap;"><%= @prompt.message %></td>
48
+ </tr>
49
+ </tbody>
50
+ </table>
51
+
52
+ <h2 class="mt-6 subtitle">Samples</h2>
53
+
54
+ <% if @prompt.samples.any? %>
55
+ <table class="table is-fullwidth">
56
+ <thead>
57
+ <tr>
58
+ <th>Sample</th>
59
+ <th>Variables</th>
60
+ <th>Eval type</th>
61
+ <th>Expected output</th>
62
+ <th>Files</th>
63
+ </tr>
64
+ </thead>
65
+ <tbody>
66
+ <% @prompt.samples.each do |sample| %>
67
+ <tr>
68
+ <td>#<%= sample.id %></td>
69
+ <td><%= json sample.variables %></td>
70
+ <td>
71
+ <%= sample.eval_type %>
72
+ <% if sample.llm_judge? %>
73
+ <div>(<%= sample.judge_provider %>/<%= sample.judge_model %>)</div>
74
+ <% end %>
75
+ </td>
76
+ <td><%= expected_output sample %></td>
77
+ <td>
78
+ <ul>
79
+ <% sample.files.each do |file| %>
80
+ <li>
81
+ <% if file.previewable? %>
82
+ <%= image_tag main_app.url_for(file.preview(resize_to_limit: [400, 400]).processed) %>
83
+ <% elsif file.variable? %>
84
+ <%= image_tag main_app.url_for(file.variant(resize_to_limit: [400, 400]).processed) %>
85
+ <% else %>
86
+ <%= link_to file.filename, main_app.url_for(file) %>
87
+ <% end %>
88
+ <div><%= link_to file.filename, main_app.rails_blob_path(file, disposition: "attachment") %> (<%= number_to_human_size(file.byte_size) %>)</div>
89
+ </li>
90
+ <% end %>
91
+ </ul>
92
+ </td>
93
+ </tr>
94
+ <% end %>
95
+ </tbody>
96
+ </table>
97
+ <% else %>
98
+ <div class="mt-6 has-text-centered is-size-4 has-text-grey">There are no samples</div>
99
+ <% end %>
100
+
101
+ <div class="mt-6 buttons">
102
+ <% if @prompt.runs.any? %>
103
+ <%= link_to "Compare runs", compare_prompt_path(@prompt), class: "button" %>
104
+ <% end %>
105
+ <%= link_to "Edit this prompt", edit_prompt_path(@prompt), class: "button" %>
106
+ <%= button_to "Destroy this prompt", @prompt, method: :delete, class: "button is-danger" %>
107
+ </div>
@@ -0,0 +1,17 @@
1
+ <div class="level">
2
+ <div class="level-left">
3
+ <%= form_with scope: :filter, url: runs_path, method: :get, data: {controller: "auto-submit"} do |form| %>
4
+ <div class="field is-grouped">
5
+ <div>
6
+ <%= form.label :ruby_llm_evals_prompt_id, "Prompt", class: "label" %>
7
+ <div class="select">
8
+ <%= form.collection_select :ruby_llm_evals_prompt_id, RubyLLM::Evals::Prompt.all, :id, :name, {selected: filter_param.dig(:filter, :ruby_llm_evals_prompt_id), include_blank: true}, data: {action: "auto-submit#submit"} %>
9
+ </div>
10
+ </div>
11
+ <div class="is-align-self-flex-end">
12
+ <%= link_to "Clear", clear_path, class: "button" %>
13
+ </div>
14
+ </div>
15
+ <% end %>
16
+ </div>
17
+ </div>
@@ -0,0 +1,13 @@
1
+ <tr>
2
+ <td><%= link_to "##{run.id}", run_path(run) %></td>
3
+ <td><%= link_to run.prompt.name, run.prompt %></td>
4
+ <td><%= run.provider %>/<%= run.model %></td>
5
+ <td><%= truncate run.instructions %></td>
6
+ <td><%= truncate run.message %></td>
7
+ <td><%= run.started_at %></td>
8
+ <td><%= run.ended_at %></td>
9
+ <td><%= duration run %></td>
10
+ <td><%= accuracy run %></td>
11
+ <td>$<%= run.cost %></td>
12
+ <td><%= button_to "Destroy", run, method: :delete, class: "button is-danger" %></td>
13
+ </tr>
@@ -0,0 +1,30 @@
1
+ <% content_for :title, "Runs" %>
2
+
3
+ <%= render "filters", clear_path: runs_path %>
4
+
5
+ <% if @runs.records.any? %>
6
+ <table class="table is-hoverable is-fullwidth">
7
+ <thead>
8
+ <tr>
9
+ <th>Run</th>
10
+ <th>Prompt</th>
11
+ <th>Provider/Model</th>
12
+ <th>Instructions</th>
13
+ <th>Message</th>
14
+ <th>Started at</th>
15
+ <th>Ended at</th>
16
+ <th>Duration</th>
17
+ <th>Accuracy</th>
18
+ <th>Cost</th>
19
+ <th></th>
20
+ </tr>
21
+ </thead>
22
+ <tbody>
23
+ <%= render partial: "ruby_llm/evals/runs/run", collection: @runs.records %>
24
+ </tbody>
25
+ </table>
26
+ <% else %>
27
+ <div class="mt-6 has-text-centered is-size-4 has-text-grey">There are no runs</div>
28
+ <% end %>
29
+
30
+ <%= render "ruby_llm/evals/application/pagination", page: @runs %>
@@ -0,0 +1,188 @@
1
+ <% content_for :title, "Run ##{@run.id}" %>
2
+
3
+ <h1 class="title">Run #<%= @run.id %></h1>
4
+
5
+ <table class="table">
6
+ <tbody>
7
+ <tr>
8
+ <th>Prompt</th>
9
+ <td><%= link_to @run.prompt.name, @run.prompt %></td>
10
+ </tr>
11
+ <tr>
12
+ <th>Provider/Model</th>
13
+ <td><%= @run.provider %>/<%= @run.model %></td>
14
+ </tr>
15
+ <tr>
16
+ <th>Temperature</th>
17
+ <td><%= @run.temperature %></td>
18
+ </tr>
19
+ <tr>
20
+ <th>Params</th>
21
+ <td><%= json @run.params %></td>
22
+ </tr>
23
+ <tr>
24
+ <th>Tools</th>
25
+ <td><%= @run.tools.try :to_sentence %></td>
26
+ </tr>
27
+ <tr>
28
+ <th>Schema</th>
29
+ <td>
30
+ <% if @run.schema_other.present? %>
31
+ <%= json @run.schema_other %>
32
+ <% elsif @run.schema.present? %>
33
+ <%= @run.schema %>
34
+ <% end %>
35
+ </td>
36
+ </tr>
37
+ <tr>
38
+ <th>Instructions</th>
39
+ <td style="white-space: pre-wrap;"><%= @run.instructions %></td>
40
+ </tr>
41
+ <tr>
42
+ <th>Message</th>
43
+ <td style="white-space: pre-wrap;"><%= @run.message %></td>
44
+ </tr>
45
+ <tr>
46
+ <th>ActiveJob ID</th>
47
+ <td><%= @run.active_job_id %></td>
48
+ </tr>
49
+ <tr>
50
+ <th>Started at</th>
51
+ <td><%= @run.started_at %></td>
52
+ </tr>
53
+ <tr>
54
+ <th>Ended at</th>
55
+ <td><%= @run.ended_at %></td>
56
+ </tr>
57
+ <tr>
58
+ <th>Duration</th>
59
+ <td><%= duration @run %></td>
60
+ </tr>
61
+ <tr>
62
+ <th>Accuracy</th>
63
+ <td><%= accuracy @run %></td>
64
+ </tr>
65
+ <tr>
66
+ <th>Cost</th>
67
+ <td>$<%= @run.cost %></td>
68
+ </tr>
69
+ <% if @run.judge_cost > 0 %>
70
+ <tr>
71
+ <th>Judge Cost</th>
72
+ <td>$<%= @run.judge_cost %></td>
73
+ </tr>
74
+ <tr>
75
+ <th>Total Cost</th>
76
+ <td>$<%= @run.total_cost %></td>
77
+ </tr>
78
+ <% end %>
79
+ </tbody>
80
+ </table>
81
+
82
+ <h2 class="mt-6 subtitle">Prompt executions</h2>
83
+
84
+ <table class="table is-fullwidth">
85
+ <thead>
86
+ <tr>
87
+ <th>Active Job ID</th>
88
+ <th>Variables</th>
89
+ <th>With</th>
90
+ <th>Output message</th>
91
+ <th>Eval type</th>
92
+ <th>Expected output</th>
93
+ <th>Tokens (input/output)</th>
94
+ <th>Duration</th>
95
+ <th>Cost</th>
96
+ <th>Passed?</th>
97
+ <th></th>
98
+ </tr>
99
+ </thead>
100
+ <tbody>
101
+ <% @run.prompt_executions.each do |prompt_execution| %>
102
+ <tr>
103
+ <td>
104
+ <%= prompt_execution.active_job_id %>
105
+ <% if prompt_execution.error_message.present? %>
106
+ <span title="<%= prompt_execution.error_message %>">⚠️</span>
107
+ <% end %>
108
+ </td>
109
+ <td><%= json prompt_execution.variables %></td>
110
+ <td>
111
+ <ul>
112
+ <% prompt_execution.files.each do |file| %>
113
+ <li>
114
+ <% if file.previewable? %>
115
+ <%= image_tag main_app.url_for(file.preview(resize_to_limit: [400, 400]).processed) %>
116
+ <% elsif file.variable? %>
117
+ <%= image_tag main_app.url_for(file.variant(resize_to_limit: [400, 400]).processed) %>
118
+ <% else %>
119
+ <%= link_to file.filename, main_app.url_for(file) %>
120
+ <% end %>
121
+ <div><%= link_to file.filename, main_app.rails_blob_path(file, disposition: "attachment") %> (<%= number_to_human_size(file.byte_size) %>)</div>
122
+ </li>
123
+ <% end %>
124
+ </ul>
125
+ </td>
126
+ <td>
127
+ <% if @run.schema_other.present? || @run.schema.present? %>
128
+ <% if prompt_execution.message.present? %>
129
+ <%= json JSON.parse(prompt_execution.message) %>
130
+ <% end %>
131
+ <% else %>
132
+ <div style="white-space: pre-wrap;"><%= prompt_execution.message %></div>
133
+ <% end %>
134
+ </td>
135
+ <td>
136
+ <%= prompt_execution.eval_type %>
137
+ <% if prompt_execution.llm_judge? %>
138
+ <div>(<%= prompt_execution.judge_provider %>/<%= prompt_execution.judge_model %>)</div>
139
+ <% end %>
140
+ </td>
141
+ <td>
142
+ <% if prompt_execution.llm_judge? %>
143
+ <div><strong>Criteria:</strong> <%= prompt_execution.expected_output %></div>
144
+ <% if prompt_execution.judge_message&.[]("explanation").present? %>
145
+ <div><strong>Explanation:</strong> <%= prompt_execution.judge_message["explanation"] %></div>
146
+ <% end %>
147
+ <% else %>
148
+ <%= expected_output prompt_execution %>
149
+ <% end %>
150
+ </td>
151
+ <td>
152
+ <div><%= prompt_execution.input %>/<%= prompt_execution.output %></div>
153
+ <% if prompt_execution.llm_judge? %>
154
+ <div><%= prompt_execution.judge_input %>/<%= prompt_execution.judge_output %> (judge)</div>
155
+ <% end %>
156
+ </td>
157
+ <td><%= duration prompt_execution %></td>
158
+ <td>
159
+ <div>$<%= prompt_execution.cost %></div>
160
+ <% if prompt_execution.llm_judge? %>
161
+ <div>$<%= prompt_execution.judge_cost %> (judge)</div>
162
+ <% end %>
163
+ </td>
164
+ <td><%= status_indicator(prompt_execution) %></td>
165
+ <td>
166
+ <div class="buttons">
167
+ <% if prompt_execution.error_message.present? %>
168
+ <%= button_to "Retry", retry_prompt_execution_path(prompt_execution), method: :post, class: "button is-warning is-light" %>
169
+ <% end %>
170
+ <% if prompt_execution.sample.human_judge? %>
171
+ <% if prompt_execution.passed.nil? %>
172
+ <%= button_to "Pass", prompt_execution_passage_path(prompt_execution), method: :post, class: "button is-success is-light" %>
173
+ <%= button_to "Fail", prompt_execution_failure_path(prompt_execution), method: :post, class: "button is-danger is-light" %>
174
+ <% else %>
175
+ <% target_path = prompt_execution.passed? ? prompt_execution_failure_path(prompt_execution) : prompt_execution_passage_path(prompt_execution) %>
176
+ <%= button_to "Toggle", target_path, method: :post, class: "button" %>
177
+ <% end %>
178
+ <% end %>
179
+ </div>
180
+ </td>
181
+ </tr>
182
+ <% end %>
183
+ </tbody>
184
+ </table>
185
+
186
+ <div class="mt-6 buttons">
187
+ <%= button_to "Destroy this run", @run, method: :delete, class: "button is-danger" %>
188
+ </div>
@@ -0,0 +1,88 @@
1
+ <div class="box nested-form-wrapper" data-new-record="<%= form.object.new_record? %>" data-controller="eval-type-selector">
2
+ <div class="columns">
3
+ <div class="column field">
4
+ <%= form.label :variables, class: "label" %>
5
+ <div data-controller="json-editor" data-json-editor-height-value="150px">
6
+ <div data-json-editor-target="editor" class="json-editor"></div>
7
+ <%= form.text_area :variables, value: form.object.variables&.to_json, class: "textarea", data: { json_editor_target: "textarea" } %>
8
+ <p class="help is-danger is-hidden" data-json-editor-target="error"></p>
9
+ </div>
10
+ <p class="help is-danger"><%= form.object.errors.full_messages_for(:variables).join(", ") %></p>
11
+ </div>
12
+
13
+ <div class="column">
14
+ <div class="field">
15
+ <%= form.label :eval_type, class: "label" %>
16
+ <div class="select is-fullwidth">
17
+ <%= form.select :eval_type, RubyLLM::Evals::Sample.eval_types, {}, data: { action: "change->eval-type-selector#toggle", eval_type_selector_target: "select" } %>
18
+ </div>
19
+ <p class="help is-danger"><%= form.object.errors.full_messages_for(:eval_type).join(", ") %></p>
20
+ </div>
21
+
22
+ <div data-eval-type-selector-target="judgeFields" data-controller="provider-model" data-provider-model-models-by-provider-value="<%= models_by_provider_data %>" style="<%= 'display: none;' unless form.object.llm_judge? %>">
23
+ <div class="field">
24
+ <%= form.label :judge_provider, class: "label" %>
25
+ <div class="select is-fullwidth">
26
+ <%= form.select :judge_provider, options_for_select(RubyLLM.providers.map { [_1.name, _1.slug] }, form.object.judge_provider), { include_blank: true }, data: { provider_model_target: "provider", action: "change->provider-model#provider-changed" } %>
27
+ </div>
28
+ <p class="help is-danger"><%= form.object.errors.full_messages_for(:judge_provider).join(", ") %></p>
29
+ </div>
30
+
31
+ <div class="field">
32
+ <%= form.label :judge_model, class: "label" %>
33
+ <div class="select is-fullwidth is-hidden" data-provider-model-target="modelSelectWrapper">
34
+ <%= form.select :judge_model, [], {}, { class: "input", name: nil, data: { provider_model_target: "modelSelect", action: "change->provider-model#syncToInput" } } %>
35
+ </div>
36
+ <%= form.text_field :judge_model, class: "input", data: { provider_model_target: "modelInput" } %>
37
+ <p class="help is-danger"><%= form.object.errors.full_messages_for(:judge_model).join(", ") %></p>
38
+ </div>
39
+ </div>
40
+ </div>
41
+
42
+ <div class="column field" data-eval-type-selector-target="expectedOutput" style="<%= 'display: none;' if form.object.human_judge? %>">
43
+ <%= form.label :expected_output, class: "label" %>
44
+ <%= form.text_area :expected_output, class: "textarea" %>
45
+ <p class="help is-danger"><%= form.object.errors.full_messages_for(:expected_output).join(", ") %></p>
46
+ </div>
47
+
48
+ <div class="column field">
49
+ <%= form.label :files, "Attach files", class: "label" %>
50
+
51
+ <% if form.object.persisted? && form.object.files.attached? %>
52
+ <% form.object.files.each do |file| %>
53
+ <%= form.hidden_field :files, multiple: true, value: file.signed_id %>
54
+ <% end %>
55
+ <% end %>
56
+
57
+ <div class="file has-name is-fullwidth" data-controller="file-input">
58
+ <label class="file-label">
59
+ <%= form.file_field :files, multiple: true, class: "file-input", data: { action: "change->file-input#update", file_input_target: "input" } %>
60
+ <span class="file-cta">
61
+ <span class="file-label">Choose files...</span>
62
+ </span>
63
+ <span class="file-name" data-file-input-target="name">No files selected</span>
64
+ </label>
65
+ </div>
66
+
67
+ <% if form.object.persisted? && form.object.files.any? %>
68
+ <div>
69
+ <p>Current attachments:</p>
70
+ <ul>
71
+ <% form.object.files.each do |file| %>
72
+ <li>
73
+ <span><%= file.filename %> (<%= number_to_human_size(file.byte_size) %>)</span>
74
+ <%= link_to "Remove", main_app.rails_blob_path(file, disposition: "attachment"), data: { turbo_method: :delete, turbo_confirm: "Are you sure?" } %>
75
+ </li>
76
+ <% end %>
77
+ </ul>
78
+ </div>
79
+ <% end %>
80
+ </div>
81
+ </div>
82
+
83
+ <div class="field">
84
+ <button type="button" class="button is-danger is-outlined" data-action="nested-form#remove">Remove sample</button>
85
+ </div>
86
+
87
+ <%= form.hidden_field :_destroy %>
88
+ </div>
@@ -0,0 +1,13 @@
1
+ pin "@hotwired/turbo-rails", to: "turbo.min.js", preload: true
2
+ pin "@hotwired/stimulus", to: "stimulus.min.js", preload: true
3
+ pin "@hotwired/stimulus-loading", to: "stimulus-loading.js", preload: true
4
+ pin "@stimulus-components/auto-submit", to: "https://ga.jspm.io/npm:@stimulus-components/auto-submit@6.0.0/dist/stimulus-auto-submit.mjs"
5
+ pin "@stimulus-components/rails-nested-form", to: "https://ga.jspm.io/npm:@stimulus-components/rails-nested-form@5.0.0/dist/stimulus-rails-nested-form.mjs", preload: true
6
+
7
+ pin "ace-builds", to: "https://cdn.jsdelivr.net/npm/ace-builds@1.35.5/src-min-noconflict/ace.min.js", preload: true
8
+ pin "ace-mode-json", to: "https://cdn.jsdelivr.net/npm/ace-builds@1.35.5/src-min-noconflict/mode-json.min.js", preload: true
9
+ pin "ace-theme-github", to: "https://cdn.jsdelivr.net/npm/ace-builds@1.35.5/src-min-noconflict/theme-github.min.js", preload: true
10
+ pin "ace-theme-github-dark", to: "https://cdn.jsdelivr.net/npm/ace-builds@1.35.5/src-min-noconflict/theme-github_dark.min.js", preload: true
11
+
12
+ pin "application", to: "ruby_llm/evals/application.js", preload: true
13
+ pin_all_from RubyLLM::Evals::Engine.root.join("app/javascript/ruby_llm/evals/controllers"), under: "controllers", to: "ruby_llm/evals/controllers"
@@ -0,0 +1,7 @@
1
+ en:
2
+ activemodel:
3
+ errors:
4
+ messages:
5
+ invalid_json: "is invalid JSON"
6
+ must_be_json_object: "must be a JSON object"
7
+ must_be_json_array: "must be a JSON array"
data/config/routes.rb CHANGED
@@ -1,2 +1,21 @@
1
- Rails.application.routes.draw do
1
+ RubyLLM::Evals::Engine.routes.draw do
2
+ resources :runs, only: %i[destroy index show]
3
+
4
+ resources :prompt_executions, only: [] do
5
+ scope module: :prompt_executions do
6
+ resource :failure, only: %i[create]
7
+ resource :passage, only: %i[create]
8
+ resource :retry, only: %i[create]
9
+ end
10
+ end
11
+
12
+ resources :prompts do
13
+ resources :runs, only: %i[create]
14
+
15
+ member do
16
+ get :compare
17
+ end
18
+ end
19
+
20
+ root to: "prompts#index"
2
21
  end
@@ -0,0 +1,21 @@
1
+ class CreateRubyLLMEvalsPrompts < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :ruby_llm_evals_prompts do |t|
4
+ t.string :name, null: false
5
+ t.string :slug, null: false
6
+ t.string :provider, null: false
7
+ t.string :model, null: false
8
+ t.float :temperature
9
+ t.json :params
10
+ t.json :tools
11
+ t.string :schema
12
+ t.json :schema_other
13
+ t.text :instructions
14
+ t.text :message
15
+
16
+ t.timestamps
17
+ end
18
+ add_index :ruby_llm_evals_prompts, :name, unique: true
19
+ add_index :ruby_llm_evals_prompts, :slug, unique: true
20
+ end
21
+ end
@@ -0,0 +1,14 @@
1
+ class CreateRubyLLMEvalsSamples < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :ruby_llm_evals_samples do |t|
4
+ t.references :ruby_llm_evals_prompt, null: false, foreign_key: true
5
+ t.string :eval_type, null: false
6
+ t.text :expected_output
7
+ t.string :judge_model
8
+ t.string :judge_provider
9
+ t.json :variables
10
+
11
+ t.timestamps
12
+ end
13
+ end
14
+ end