leva 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +52 -16
  3. data/app/controllers/leva/dataset_records_controller.rb +21 -0
  4. data/app/controllers/leva/datasets_controller.rb +9 -2
  5. data/app/controllers/leva/experiments_controller.rb +34 -9
  6. data/app/controllers/leva/runner_results_controller.rb +8 -0
  7. data/app/controllers/leva/workbench_controller.rb +85 -12
  8. data/app/helpers/leva/application_helper.rb +39 -0
  9. data/app/javascript/controllers/prompt_form_controller.js +45 -0
  10. data/app/javascript/controllers/prompt_selector_controller.js +31 -0
  11. data/app/jobs/leva/experiment_job.rb +9 -4
  12. data/app/jobs/leva/run_eval_job.rb +40 -0
  13. data/app/models/concerns/leva/recordable.rb +37 -0
  14. data/app/models/leva/dataset.rb +15 -6
  15. data/app/models/leva/dataset_record.rb +40 -2
  16. data/app/models/leva/evaluation_result.rb +15 -7
  17. data/app/models/leva/experiment.rb +24 -12
  18. data/app/models/leva/prompt.rb +14 -1
  19. data/app/models/leva/runner_result.rb +54 -0
  20. data/app/views/layouts/leva/application.html.erb +24 -13
  21. data/app/views/leva/dataset_records/index.html.erb +49 -0
  22. data/app/views/leva/dataset_records/show.html.erb +30 -0
  23. data/app/views/leva/datasets/_dataset.html.erb +18 -0
  24. data/app/views/leva/datasets/_form.html.erb +24 -0
  25. data/app/views/leva/datasets/edit.html.erb +5 -0
  26. data/app/views/leva/datasets/index.html.erb +51 -38
  27. data/app/views/leva/datasets/new.html.erb +5 -0
  28. data/app/views/leva/datasets/show.html.erb +160 -8
  29. data/app/views/leva/experiments/_experiment.html.erb +42 -0
  30. data/app/views/leva/experiments/_form.html.erb +49 -0
  31. data/app/views/leva/experiments/edit.html.erb +5 -0
  32. data/app/views/leva/experiments/index.html.erb +53 -37
  33. data/app/views/leva/experiments/new.html.erb +5 -0
  34. data/app/views/leva/experiments/show.html.erb +115 -19
  35. data/app/views/leva/runner_results/show.html.erb +64 -0
  36. data/app/views/leva/workbench/_evaluation_area.html.erb +5 -0
  37. data/app/views/leva/workbench/_prompt_content.html.erb +216 -0
  38. data/app/views/leva/workbench/_prompt_form.html.erb +89 -0
  39. data/app/views/leva/workbench/_prompt_sidebar.html.erb +21 -0
  40. data/app/views/leva/workbench/_results_section.html.erb +159 -0
  41. data/app/views/leva/workbench/_top_bar.html.erb +10 -0
  42. data/app/views/leva/workbench/edit.html.erb +20 -0
  43. data/app/views/leva/workbench/index.html.erb +5 -91
  44. data/app/views/leva/workbench/new.html.erb +79 -36
  45. data/config/routes.rb +15 -6
  46. data/db/migrate/20240813172916_create_leva_datasets.rb +1 -0
  47. data/db/migrate/20240813173035_create_leva_experiments.rb +1 -0
  48. data/db/migrate/20240816201419_create_leva_runner_results.rb +11 -0
  49. data/db/migrate/20240816201433_update_leva_evaluation_results.rb +8 -0
  50. data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb +6 -0
  51. data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb +5 -0
  52. data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb +6 -0
  53. data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb +5 -0
  54. data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb +5 -0
  55. data/lib/generators/leva/templates/eval.rb.erb +6 -7
  56. data/lib/leva/version.rb +1 -1
  57. data/lib/leva.rb +62 -45
  58. metadata +48 -5
  59. data/app/evals/test_sentiment_accuracy_eval.rb +0 -6
  60. data/app/runners/test_sentiment_run.rb +0 -13
  61. data/lib/leva/base_eval.rb +0 -75
@@ -1,13 +1,165 @@
1
1
  <% content_for :title, @dataset.name %>
2
- <div class="px-4 sm:px-6 lg:px-8">
3
- <div class="sm:flex sm:items-center">
4
- <div class="sm:flex-auto">
5
- <h1 class="text-2xl font-semibold text-gray-900"><%= @dataset.name %></h1>
6
- <p class="mt-2 text-sm text-gray-700"><%= @dataset.description %></p>
2
+ <div class="container mx-auto px-4 py-8 bg-gray-950 text-white">
3
+ <div class="mb-8">
4
+ <div class="flex justify-between items-center">
5
+ <h1 class="text-3xl font-bold text-indigo-400 mb-2"><%= @dataset.name %></h1>
6
+ <div class="flex items-center space-x-4">
7
+ <%= link_to edit_dataset_path(@dataset), class: 'btn btn-secondary flex items-center' do %>
8
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
9
+ <path d="M13.586 3.586a2 2 0 112.828 2.828l-.793.793-2.828-2.828.793-.793zM11.379 5.793L3 14.172V17h2.828l8.38-8.379-2.83-2.828z" />
10
+ </svg>
11
+ Edit Dataset
12
+ <% end %>
13
+ <% if @dataset.dataset_records.empty? %>
14
+ <%= button_to dataset_path(@dataset), method: :delete, class: 'btn btn-danger flex items-center', data: { confirm: 'Are you sure you want to delete this dataset?' } do %>
15
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
16
+ <path fill-rule="evenodd" d="M9 2a1 1 0 00-.894.553L7.382 4H4a1 1 0 000 2v10a2 2 0 002 2h8a2 2 0 002-2V6a1 1 0 100-2h-3.382l-.724-1.447A1 1 0 0011 2H9zM7 8a1 1 0 012 0v6a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v6a1 1 0 102 0V8a1 1 0 00-1-1z" clip-rule="evenodd" />
17
+ </svg>
18
+ Delete Dataset
19
+ <% end %>
20
+ <% else %>
21
+ <button class="btn btn-danger flex items-center opacity-50 cursor-not-allowed" disabled title="Cannot delete dataset with existing records">
22
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
23
+ <path fill-rule="evenodd" d="M9 2a1 1 0 00-.894.553L7.382 4H4a1 1 0 000 2v10a2 2 0 002 2h8a2 2 0 002-2V6a1 1 0 100-2h-3.382l-.724-1.447A1 1 0 0011 2H9zM7 8a1 1 0 012 0v6a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v6a1 1 0 102 0V8a1 1 0 00-1-1z" clip-rule="evenodd" />
24
+ </svg>
25
+ Delete Dataset
26
+ </button>
27
+ <% end %>
28
+ </div>
7
29
  </div>
8
- <div class="mt-4 sm:mt-0 sm:ml-16 sm:flex-none">
9
- <%= link_to 'Edit Dataset', edit_dataset_path(@dataset), class: 'btn btn-primary' %>
30
+ <p class="text-gray-400"><%= @dataset.description %></p>
31
+ </div>
32
+ <div class="mb-8">
33
+ <div class="flex justify-between items-center mb-4">
34
+ <h2 class="text-2xl font-semibold text-indigo-300">Dataset Records</h2>
35
+ <div class="flex space-x-2">
36
+ <%= link_to dataset_dataset_records_path(@dataset), class: "btn btn-secondary flex items-center" do %>
37
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
38
+ <path d="M10 12a2 2 0 100-4 2 2 0 000 4z" />
39
+ <path fill-rule="evenodd" d="M.458 10C1.732 5.943 5.522 3 10 3s8.268 2.943 9.542 7c-1.274 4.057-5.064 7-9.542 7S1.732 14.057.458 10zM14 10a4 4 0 11-8 0 4 4 0 018 0z" clip-rule="evenodd" />
40
+ </svg>
41
+ View All Records
42
+ <% end %>
43
+ <%= link_to '#', class: "btn btn-primary flex items-center" do %>
44
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
45
+ <path fill-rule="evenodd" d="M10 3a1 1 0 011 1v5h5a1 1 0 110 2h-5v5a1 1 0 11-2 0v-5H4a1 1 0 110-2h5V4a1 1 0 011-1z" clip-rule="evenodd" />
46
+ </svg>
47
+ Add Record
48
+ <% end %>
49
+ </div>
50
+ </div>
51
+ <% if @dataset.dataset_records.any? %>
52
+ <div class="bg-gray-800 rounded-lg shadow-lg overflow-hidden">
53
+ <table class="min-w-full divide-y divide-gray-700">
54
+ <thead class="bg-gray-700">
55
+ <tr>
56
+ <% @dataset.dataset_records.first.index_attributes.keys.each do |key| %>
57
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider">
58
+ <%= key.to_s.humanize %>
59
+ </th>
60
+ <% end %>
61
+ <th scope="col" class="relative px-6 py-3">
62
+ <span class="sr-only">Actions</span>
63
+ </th>
64
+ </tr>
65
+ </thead>
66
+ <tbody class="bg-gray-800 divide-y divide-gray-700">
67
+ <% @dataset.dataset_records.first(10).each do |record| %>
68
+ <tr class="hover:bg-gray-700 transition-colors duration-200">
69
+ <% record.index_attributes.values.each do |value| %>
70
+ <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300">
71
+ <%= value %>
72
+ </td>
73
+ <% end %>
74
+ <td class="px-6 py-4 whitespace-nowrap text-right text-sm font-medium">
75
+ <%= link_to 'View', dataset_dataset_record_path(@dataset, record), class: 'text-indigo-400 hover:text-indigo-300 transition-colors duration-200' %>
76
+ </td>
77
+ </tr>
78
+ <% end %>
79
+ </tbody>
80
+ </table>
81
+ </div>
82
+ <% total_records = @dataset.dataset_records.count %>
83
+ <% displayed_records = [total_records, 10].min %>
84
+ <div class="mt-4 text-gray-400 text-sm">
85
+ Showing <%= displayed_records %> of <%= total_records %> records.
86
+ <% if total_records > 10 %>
87
+ <%= link_to 'View all records', dataset_dataset_records_path(@dataset), class: 'text-indigo-400 hover:text-indigo-300' %>
88
+ <% end %>
89
+ </div>
90
+ <% else %>
91
+ <div class="bg-gray-800 rounded-lg shadow-lg p-12 text-center">
92
+ <svg class="mx-auto h-12 w-12 text-indigo-400" fill="none" viewBox="0 0 24 24" stroke="currentColor" aria-hidden="true">
93
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 13h6m-3-3v6m5 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
94
+ </svg>
95
+ <h3 class="mt-2 text-xl font-medium text-indigo-300">No records yet</h3>
96
+ <p class="mt-1 text-gray-400">Start adding records to your dataset.</p>
97
+ <div class="mt-6">
98
+ <%= link_to '#', class: "btn btn-primary inline-flex items-center" do %>
99
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
100
+ <path fill-rule="evenodd" d="M10 3a1 1 0 011 1v5h5a1 1 0 110 2h-5v5a1 1 0 11-2 0v-5H4a1 1 0 110-2h5V4a1 1 0 011-1z" clip-rule="evenodd" />
101
+ </svg>
102
+ Add your first record
103
+ <% end %>
104
+ </div>
105
+ </div>
106
+ <% end %>
107
+ </div>
108
+ <div class="mb-8">
109
+ <div class="flex justify-between items-center mb-4">
110
+ <h2 class="text-2xl font-semibold text-indigo-300">Experiments</h2>
111
+ <%= link_to new_experiment_path(dataset_id: @dataset.id), class: "btn btn-primary flex items-center" do %>
112
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
113
+ <path fill-rule="evenodd" d="M10 3a1 1 0 011 1v5h5a1 1 0 110 2h-5v5a1 1 0 11-2 0v-5H4a1 1 0 110-2h5V4a1 1 0 011-1z" clip-rule="evenodd" />
114
+ </svg>
115
+ New Experiment
116
+ <% end %>
10
117
  </div>
118
+ <% if @dataset.experiments.any? %>
119
+ <div class="bg-gray-800 rounded-lg shadow-lg overflow-hidden">
120
+ <table class="min-w-full divide-y divide-gray-700">
121
+ <thead class="bg-gray-700">
122
+ <tr>
123
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider">
124
+ Name
125
+ </th>
126
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider">
127
+ Status
128
+ </th>
129
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider">
130
+ Total Results
131
+ </th>
132
+ <% Leva::EvaluationResult.distinct.pluck(:evaluator_class).each do |evaluator_class| %>
133
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider">
134
+ <%= evaluator_class.demodulize %>
135
+ </th>
136
+ <% end %>
137
+ <th scope="col" class="relative px-6 py-3">
138
+ <span class="sr-only">Actions</span>
139
+ </th>
140
+ </tr>
141
+ </thead>
142
+ <tbody class="bg-gray-800 divide-y divide-gray-700">
143
+ <%= render partial: 'leva/experiments/experiment', collection: @dataset.experiments %>
144
+ </tbody>
145
+ </table>
146
+ </div>
147
+ <% else %>
148
+ <div class="bg-gray-800 rounded-lg shadow-lg p-12 text-center">
149
+ <svg class="mx-auto h-12 w-12 text-indigo-400" fill="none" viewBox="0 0 24 24" stroke="currentColor" aria-hidden="true">
150
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19.428 15.428a2 2 0 00-1.022-.547l-2.387-.477a6 6 0 00-3.86.517l-.318.158a6 6 0 01-3.86.517L6.05 15.21a2 2 0 00-1.806.547M8 4h8l-1 1v5.172a2 2 0 00.586 1.414l5 5c1.26 1.26.367 3.414-1.415 3.414H4.828c-1.782 0-2.674-2.154-1.414-3.414l5-5A2 2 0 009 10.172V5L8 4z" />
151
+ </svg>
152
+ <h3 class="mt-2 text-xl font-medium text-indigo-300">No experiments yet</h3>
153
+ <p class="mt-1 text-gray-400">Create an experiment to start evaluating your dataset.</p>
154
+ <div class="mt-6">
155
+ <%= link_to new_experiment_path(dataset_id: @dataset.id), class: "btn btn-primary inline-flex items-center" do %>
156
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
157
+ <path fill-rule="evenodd" d="M10 3a1 1 0 011 1v5h5a1 1 0 110 2h-5v5a1 1 0 11-2 0v-5H4a1 1 0 110-2h5V4a1 1 0 011-1z" clip-rule="evenodd" />
158
+ </svg>
159
+ Create your first experiment
160
+ <% end %>
161
+ </div>
162
+ </div>
163
+ <% end %>
11
164
  </div>
12
- <!-- Add more dataset details here -->
13
165
  </div>
@@ -0,0 +1,42 @@
1
+ <tr class="hover:bg-gray-700 transition-colors duration-200">
2
+ <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300">
3
+ <%= link_to experiment.name, experiment_path(experiment), class: "text-indigo-400 hover:text-indigo-300 transition-colors duration-200" %>
4
+ </td>
5
+ <td class="px-6 py-4 whitespace-nowrap text-sm">
6
+ <% status_color = case experiment.status
7
+ when 'pending' then 'bg-yellow-100 text-yellow-800'
8
+ when 'running' then 'bg-blue-100 text-blue-800'
9
+ when 'completed' then 'bg-green-100 text-green-800'
10
+ when 'failed' then 'bg-red-100 text-red-800'
11
+ else 'bg-gray-100 text-gray-800'
12
+ end %>
13
+ <span class="px-2 inline-flex text-xs leading-5 font-semibold rounded-full <%= status_color %>">
14
+ <%= experiment.status&.capitalize || 'N/A' %>
15
+ </span>
16
+ </td>
17
+ <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300">
18
+ <%= experiment.runner_results.count %>
19
+ </td>
20
+ <% Leva::EvaluationResult.distinct.pluck(:evaluator_class).each do |evaluator_class| %>
21
+ <td class="px-6 py-4 whitespace-nowrap text-sm">
22
+ <% results = experiment.evaluation_results.where(evaluator_class: evaluator_class) %>
23
+ <% if results.any? %>
24
+ <% avg_score = (results.sum(&:score) / results.size.to_f).round(2) %>
25
+ <% color_class = case avg_score
26
+ when 0...0.2 then 'text-red-500'
27
+ when 0.2...0.4 then 'text-orange-500'
28
+ when 0.4...0.6 then 'text-yellow-500'
29
+ when 0.6...0.8 then 'text-lime-500'
30
+ when 0.8...1.0 then 'text-green-400'
31
+ else 'text-green-300'
32
+ end %>
33
+ <span class="<%= color_class %> font-semibold"><%= sprintf('%.2f', avg_score) %></span>
34
+ <% else %>
35
+ <span class="text-gray-400">N/A</span>
36
+ <% end %>
37
+ </td>
38
+ <% end %>
39
+ <td class="px-6 py-4 whitespace-nowrap text-right text-sm font-medium">
40
+ <%= link_to 'View Results', experiment_path(experiment), class: "text-indigo-400 hover:text-indigo-300 transition-colors duration-200" %>
41
+ </td>
42
+ </tr>
@@ -0,0 +1,49 @@
1
+ <%= form_with(model: @experiment, url: @experiment.new_record? ? experiments_path : experiment_path(@experiment), local: true, class: "bg-gray-800 rounded-lg shadow-lg p-6") do |form| %>
2
+ <% if @experiment.errors.any? %>
3
+ <div class="bg-red-900 border border-red-700 text-red-100 px-4 py-3 rounded-lg mb-4" role="alert">
4
+ <strong class="font-bold">Error:</strong>
5
+ <ul class="list-disc list-inside">
6
+ <% @experiment.errors.full_messages.each do |message| %>
7
+ <li><%= message %></li>
8
+ <% end %>
9
+ </ul>
10
+ </div>
11
+ <% end %>
12
+ <div class="mb-4">
13
+ <%= form.label :name, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
14
+ <%= form.text_field :name, autofocus: true, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
15
+ </div>
16
+ <div class="mb-4">
17
+ <%= form.label :description, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
18
+ <%= form.text_area :description, rows: 4, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
19
+ </div>
20
+ <div class="mb-4">
21
+ <%= form.label :dataset_id, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
22
+ <%= form.collection_select :dataset_id, Leva::Dataset.all, :id, :name, {}, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
23
+ </div>
24
+ <div class="mb-4">
25
+ <%= form.label :prompt_id, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
26
+ <%= form.collection_select :prompt_id, Leva::Prompt.all, :id, :name, {}, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
27
+ </div>
28
+ <div class="mb-4">
29
+ <%= form.label :runner_class, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
30
+ <%= form.select :runner_class,
31
+ options_for_select(@runners.map { |r| [r.name.demodulize, r.name] }, @runners.first.name),
32
+ {},
33
+ class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
34
+ </div>
35
+ <div class="mb-4">
36
+ <%= form.label :evaluator_classes, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
37
+ <div class="space-y-2">
38
+ <%= form.collection_check_boxes :evaluator_classes, @evaluators, :name, ->(e) { e.name.demodulize } do |b| %>
39
+ <div class="flex items-center">
40
+ <%= b.check_box(class: "mr-2 bg-gray-700 text-indigo-600 focus:ring-indigo-500", checked: !@experiment.persisted?) %>
41
+ <%= b.label(class: "text-sm text-white") %>
42
+ </div>
43
+ <% end %>
44
+ </div>
45
+ </div>
46
+ <div class="flex items-center justify-end">
47
+ <%= form.submit @experiment.persisted? ? "Update Experiment" : "Create Experiment", class: "px-3 py-2 rounded-md text-sm font-medium bg-indigo-600 text-white shadow-lg hover:bg-indigo-700 transition-colors duration-150 ease-in-out" %>
48
+ </div>
49
+ <% end %>
@@ -0,0 +1,5 @@
1
+ <% content_for :title, "Edit #{@experiment.name}" %>
2
+ <div class="container mx-auto px-4 py-8 bg-gray-950 text-white">
3
+ <h1 class="text-3xl font-bold text-indigo-400 mb-6">Edit Experiment</h1>
4
+ <%= render 'form', experiment: @experiment %>
5
+ </div>
@@ -1,44 +1,60 @@
1
1
  <% content_for :title, 'Experiments' %>
2
- <div class="px-4 sm:px-6 lg:px-8">
3
- <div class="sm:flex sm:items-center">
4
- <div class="sm:flex-auto">
5
- <h1 class="text-2xl font-semibold text-gray-900">Experiments</h1>
6
- <p class="mt-2 text-sm text-gray-700">A list of all experiments in your account.</p>
7
- </div>
8
- <div class="mt-4 sm:mt-0 sm:ml-16 sm:flex-none">
9
- <%= link_to 'New Experiment', new_experiment_path, class: 'btn btn-primary' %>
10
- </div>
2
+ <div class="container mx-auto px-4 py-8 bg-gray-950 text-white">
3
+ <div class="flex flex-col sm:flex-row justify-between items-center mb-6">
4
+ <h1 class="text-3xl font-bold text-indigo-400 mb-4 sm:mb-0">Experiments</h1>
5
+ <%= link_to new_experiment_path, class: "btn btn-primary flex items-center" do %>
6
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
7
+ <path fill-rule="evenodd" d="M10 3a1 1 0 011 1v5h5a1 1 0 110 2h-5v5a1 1 0 11-2 0v-5H4a1 1 0 110-2h5V4a1 1 0 011-1z" clip-rule="evenodd" />
8
+ </svg>
9
+ Create New Experiment
10
+ <% end %>
11
11
  </div>
12
- <div class="mt-8 flex flex-col">
13
- <div class="-my-2 -mx-4 overflow-x-auto sm:-mx-6 lg:-mx-8">
14
- <div class="inline-block min-w-full py-2 align-middle md:px-6 lg:px-8">
15
- <div class="overflow-hidden shadow ring-1 ring-black ring-opacity-5 md:rounded-lg">
16
- <table class="min-w-full divide-y divide-gray-300">
17
- <thead class="bg-gray-50">
18
- <tr>
19
- <th scope="col" class="py-3.5 pl-4 pr-3 text-left text-sm font-semibold text-gray-900 sm:pl-6">Name</th>
20
- <th scope="col" class="px-3 py-3.5 text-left text-sm font-semibold text-gray-900">Status</th>
21
- <th scope="col" class="px-3 py-3.5 text-left text-sm font-semibold text-gray-900">Dataset</th>
22
- <th scope="col" class="relative py-3.5 pl-3 pr-4 sm:pr-6">
23
- <span class="sr-only">Actions</span>
12
+ <% if @experiments.any? %>
13
+ <div class="bg-gray-800 rounded-lg shadow-lg overflow-x-auto">
14
+ <div class="min-w-max">
15
+ <table class="w-full divide-y divide-gray-700">
16
+ <thead class="bg-gray-700">
17
+ <tr>
18
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider whitespace-nowrap">
19
+ Name
20
+ </th>
21
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider whitespace-nowrap">
22
+ Status
23
+ </th>
24
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider whitespace-nowrap">
25
+ Total Results
26
+ </th>
27
+ <% Leva::EvaluationResult.distinct.pluck(:evaluator_class).each do |evaluator_class| %>
28
+ <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider whitespace-nowrap">
29
+ <%= evaluator_class %>
24
30
  </th>
25
- </tr>
26
- </thead>
27
- <tbody class="divide-y divide-gray-200 bg-white">
28
- <% @experiments.each do |experiment| %>
29
- <tr>
30
- <td class="whitespace-nowrap py-4 pl-4 pr-3 text-sm font-medium text-gray-900 sm:pl-6"><%= experiment.name %></td>
31
- <td class="whitespace-nowrap px-3 py-4 text-sm text-gray-500"><%= experiment.status %></td>
32
- <td class="whitespace-nowrap px-3 py-4 text-sm text-gray-500"><%= experiment.dataset.name %></td>
33
- <td class="relative whitespace-nowrap py-4 pl-3 pr-4 text-right text-sm font-medium sm:pr-6">
34
- <%= link_to 'View', experiment_path(experiment), class: 'text-indigo-600 hover:text-indigo-900' %>
35
- </td>
36
- </tr>
37
31
  <% end %>
38
- </tbody>
39
- </table>
40
- </div>
32
+ <th scope="col" class="relative px-6 py-3">
33
+ <span class="sr-only">Actions</span>
34
+ </th>
35
+ </tr>
36
+ </thead>
37
+ <tbody class="bg-gray-800 divide-y divide-gray-700">
38
+ <%= render @experiments %>
39
+ </tbody>
40
+ </table>
41
41
  </div>
42
42
  </div>
43
- </div>
43
+ <% else %>
44
+ <div class="bg-gray-800 rounded-lg shadow-lg p-12 text-center">
45
+ <svg class="mx-auto h-12 w-12 text-indigo-400" fill="none" viewBox="0 0 24 24" stroke="currentColor" aria-hidden="true">
46
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11H5m14 0a2 2 0 012 2v6a2 2 0 01-2 2H5a2 2 0 01-2-2v-6a2 2 0 012-2m14 0V9a2 2 0 00-2-2M5 11V9a2 2 0 012-2m0 0V5a2 2 0 012-2h6a2 2 0 012 2v2M7 7h10" />
47
+ </svg>
48
+ <h3 class="mt-2 text-xl font-medium text-indigo-300">No experiments yet</h3>
49
+ <p class="mt-1 text-gray-400">Get started by creating a new experiment.</p>
50
+ <div class="mt-6">
51
+ <%= link_to new_experiment_path, class: "btn btn-primary inline-flex items-center" do %>
52
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
53
+ <path fill-rule="evenodd" d="M10 3a1 1 0 011 1v5h5a1 1 0 110 2h-5v5a1 1 0 11-2 0v-5H4a1 1 0 110-2h5V4a1 1 0 011-1z" clip-rule="evenodd" />
54
+ </svg>
55
+ Create your first experiment
56
+ <% end %>
57
+ </div>
58
+ </div>
59
+ <% end %>
44
60
  </div>
@@ -0,0 +1,5 @@
1
+ <% content_for :title, 'New Experiment' %>
2
+ <div class="container mx-auto px-4 py-8 bg-gray-950 text-white">
3
+ <h1 class="text-3xl font-bold text-indigo-400 mb-6">New Experiment</h1>
4
+ <%= render 'form', experiment: @experiment %>
5
+ </div>
@@ -1,24 +1,120 @@
1
1
  <% content_for :title, @experiment.name %>
2
- <div class="px-4 sm:px-6 lg:px-8">
3
- <div class="sm:flex sm:items-center">
4
- <div class="sm:flex-auto">
5
- <h1 class="text-2xl font-semibold text-gray-900"><%= @experiment.name %></h1>
6
- <p class="mt-2 text-sm text-gray-700">Status: <%= @experiment.status %></p>
2
+ <% content_for :head do %>
3
+ <% if @experiment.status == 'pending' || @experiment.status == 'running' %>
4
+ <meta http-equiv="refresh" content="5">
5
+ <% end %>
6
+ <% end %>
7
+ <div class="container mx-auto px-4 py-8 bg-gray-950 text-white">
8
+ <div class="mb-8">
9
+ <div class="flex justify-between items-center">
10
+ <h1 class="text-3xl font-bold text-indigo-400 mb-2"><%= @experiment.name %></h1>
11
+ <div class="flex items-center space-x-4">
12
+ <% if @experiment.status != 'completed' %>
13
+ <%= link_to edit_experiment_path(@experiment), class: 'btn btn-secondary flex items-center' do %>
14
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
15
+ <path d="M13.586 3.586a2 2 0 112.828 2.828l-.793.793-2.828-2.828.793-.793zM11.379 5.793L3 14.172V17h2.828l8.38-8.379-2.83-2.828z" />
16
+ </svg>
17
+ Edit Experiment
18
+ <% end %>
19
+ <% end %>
20
+ <%= button_to rerun_experiment_path(@experiment), method: :post, class: 'btn btn-primary flex items-center', data: { confirm: 'Are you sure you want to rerun this experiment? This will delete all existing results.' } do %>
21
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
22
+ <path fill-rule="evenodd" d="M4 2a1 1 0 011 1v2.101a7.002 7.002 0 0111.601 2.566 1 1 0 11-1.885.666A5.002 5.002 0 005.999 7H9a1 1 0 010 2H4a1 1 0 01-1-1V3a1 1 0 011-1zm.008 9.057a1 1 0 011.276.61A5.002 5.002 0 0014.001 13H11a1 1 0 110-2h5a1 1 0 011 1v5a1 1 0 11-2 0v-2.101a7.002 7.002 0 01-11.601-2.566 1 1 0 01.61-1.276z" clip-rule="evenodd" />
23
+ </svg>
24
+ Rerun Experiment
25
+ <% end %>
26
+ </div>
7
27
  </div>
28
+ <p class="text-gray-400"><%= @experiment.description %></p>
29
+ <p class="text-indigo-300 mt-2">Status: <%= @experiment.status&.capitalize || 'N/A' %></p>
8
30
  </div>
9
- <div class="mt-8 bg-white shadow overflow-hidden sm:rounded-lg">
10
- <div class="px-4 py-5 sm:px-6">
11
- <h3 class="text-lg leading-6 font-medium text-gray-900">Experiment Details</h3>
12
- </div>
13
- <div class="border-t border-gray-200 px-4 py-5 sm:p-0">
14
- <dl class="sm:divide-y sm:divide-gray-200">
15
- <div class="py-4 sm:py-5 sm:grid sm:grid-cols-3 sm:gap-4 sm:px-6">
16
- <dt class="text-sm font-medium text-gray-500">Dataset</dt>
17
- <dd class="mt-1 text-sm text-gray-900 sm:mt-0 sm:col-span-2"><%= @experiment.dataset.name %></dd>
18
- </div>
19
- <!-- Add more experiment details here -->
20
- </dl>
21
- </div>
31
+ <div class="bg-gray-800 rounded-lg shadow-lg p-6 mb-8">
32
+ <h2 class="text-3xl font-semibold text-indigo-300 mb-6">Evaluation Summary</h2>
33
+ <% if @experiment.evaluation_results.any? %>
34
+ <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
35
+ <% @experiment.evaluation_results.group_by(&:evaluator_class).each do |evaluator_class, results| %>
36
+ <div class="bg-gray-700 rounded-lg p-6">
37
+ <h3 class="text-xl font-semibold text-indigo-200 mb-4"><%= evaluator_class %></h3>
38
+ <% avg_score = (results.sum(&:score) / results.size.to_f).round(2) %>
39
+ <% color_class = case avg_score
40
+ when 0...0.2 then 'text-red-500'
41
+ when 0.2...0.4 then 'text-orange-500'
42
+ when 0.4...0.6 then 'text-yellow-500'
43
+ when 0.6...0.8 then 'text-lime-500'
44
+ when 0.8...1.0 then 'text-green-400'
45
+ else 'text-green-300'
46
+ end %>
47
+ <p class="text-2xl font-bold <%= color_class %> mb-2"><%= sprintf('%.2f', avg_score) %></p>
48
+ <p class="text-gray-300">Number of Evaluations: <%= results.size %></p>
49
+ </div>
50
+ <% end %>
51
+ </div>
52
+ <% else %>
53
+ <p class="text-gray-400 text-xl">No evaluation results available yet.</p>
54
+ <% end %>
55
+ </div>
56
+ <div class="bg-gray-800 rounded-lg shadow-lg p-6 mb-8">
57
+ <h2 class="text-2xl font-semibold text-indigo-300 mb-4">Experiment Details</h2>
58
+ <p class="text-gray-400">Dataset: <%= link_to @experiment.dataset.name, dataset_path(@experiment.dataset), class: 'text-indigo-400 hover:underline' %></p>
59
+ <p class="text-gray-400">Prompt: <%= @experiment.prompt ? @experiment.prompt.name : 'Not specified' %></p>
60
+ <!-- Add more experiment details as needed -->
61
+ </div>
62
+ <div class="bg-gray-800 rounded-lg shadow-lg p-6">
63
+ <h2 class="text-2xl font-semibold text-indigo-300 mb-4">Runner Results</h2>
64
+ <% if @experiment.runner_results.any? %>
65
+ <div class="overflow-x-auto">
66
+ <table class="min-w-full divide-y divide-gray-700">
67
+ <thead>
68
+ <tr>
69
+ <th class="px-6 py-3 text-left text-xs font-medium text-indigo-300 uppercase tracking-wider">Dataset Record</th>
70
+ <th class="px-6 py-3 text-left text-xs font-medium text-indigo-300 uppercase tracking-wider">Prompt</th>
71
+ <th class="px-6 py-3 text-left text-xs font-medium text-indigo-300 uppercase tracking-wider">Prediction</th>
72
+ <th class="px-6 py-3 text-left text-xs font-medium text-indigo-300 uppercase tracking-wider">Ground Truth</th>
73
+ <% @experiment.evaluation_results.group_by(&:evaluator_class).keys.each do |evaluator_class| %>
74
+ <th class="px-6 py-3 text-left text-xs font-medium text-indigo-300 uppercase tracking-wider"><%= evaluator_class %></th>
75
+ <% end %>
76
+ <th class="px-6 py-3 text-left text-xs font-medium text-indigo-300 uppercase tracking-wider">Created At</th>
77
+ <th class="px-6 py-3 text-left text-xs font-medium text-indigo-300 uppercase tracking-wider">Actions</th>
78
+ </tr>
79
+ </thead>
80
+ <tbody class="bg-gray-700 divide-y divide-gray-600">
81
+ <% @experiment.runner_results.each do |runner_result| %>
82
+ <tr>
83
+ <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300"><%= runner_result.dataset_record.display_name %></td>
84
+ <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300"><%= runner_result.prompt.name %> (v<%= runner_result.prompt_version %>)</td>
85
+ <td class="px-6 py-4 text-sm text-gray-300"><%= truncate(runner_result.prediction, length: 30) %></td>
86
+ <td class="px-6 py-4 text-sm text-gray-300"><%= truncate(runner_result.ground_truth, length: 30) %></td>
87
+ <% @experiment.evaluation_results.group_by(&:evaluator_class).keys.each do |evaluator_class| %>
88
+ <% eval_result = runner_result.evaluation_results.find_by(evaluator_class: evaluator_class) %>
89
+ <td class="px-6 py-4 whitespace-nowrap text-sm">
90
+ <% if eval_result %>
91
+ <% score = eval_result.score %>
92
+ <% color_class = case score
93
+ when 0...0.2 then 'text-red-500'
94
+ when 0.2...0.4 then 'text-orange-500'
95
+ when 0.4...0.6 then 'text-yellow-500'
96
+ when 0.6...0.8 then 'text-lime-500'
97
+ when 0.8...1.0 then 'text-green-400'
98
+ else 'text-green-300'
99
+ end %>
100
+ <span class="<%= color_class %> font-semibold"><%= sprintf('%.2f', score) %></span>
101
+ <% else %>
102
+ <span class="text-gray-400">N/A</span>
103
+ <% end %>
104
+ </td>
105
+ <% end %>
106
+ <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300"><%= runner_result.created_at.strftime("%Y-%m-%d %H:%M:%S") %></td>
107
+ <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300">
108
+ <%= link_to 'View Details', experiment_runner_result_path(@experiment, runner_result), class: 'text-indigo-400 hover:underline mr-2' %>
109
+ <%= link_to 'Experiment', workbench_index_path(prompt_id: runner_result.prompt_id, dataset_record_id: runner_result.dataset_record_id, runner: @experiment.runner_class), class: 'text-indigo-400 hover:underline' %>
110
+ </td>
111
+ </tr>
112
+ <% end %>
113
+ </tbody>
114
+ </table>
115
+ </div>
116
+ <% else %>
117
+ <p class="text-gray-400">No runner results available yet.</p>
118
+ <% end %>
22
119
  </div>
23
- <!-- Add experiment results or other relevant information here -->
24
120
  </div>
@@ -0,0 +1,64 @@
1
+ <% content_for :title, "Runner Result Details" %>
2
+ <div class="container mx-auto px-4 py-8 bg-gray-950 text-white">
3
+ <div class="mb-8">
4
+ <h1 class="text-3xl font-bold text-indigo-400 mb-2">Runner Result Details</h1>
5
+ <%= link_to "Back to Experiment", experiment_path(@experiment), class: "text-indigo-400 hover:underline" %>
6
+ </div>
7
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-6 mb-8">
8
+ <div class="bg-gray-800 rounded-lg shadow-lg p-6">
9
+ <h2 class="text-2xl font-semibold text-indigo-300 mb-4">Details</h2>
10
+ <p class="text-gray-400">
11
+ <strong class="text-indigo-300">Dataset Record:</strong>
12
+ <%= link_to @runner_result.dataset_record.display_name, dataset_dataset_record_path(@runner_result.dataset_record.dataset, @runner_result.dataset_record), class: "text-indigo-400 hover:underline" %>
13
+ </p>
14
+ <p class="text-gray-400">
15
+ <strong class="text-indigo-300">Prompt:</strong>
16
+ <%= link_to "#{@runner_result.prompt.name} (v#{@runner_result.prompt_version})", prompt_path(@runner_result.prompt), class: "text-indigo-400 hover:underline" %>
17
+ </p>
18
+ <p class="text-gray-400"><strong class="text-indigo-300">Created At:</strong> <%= @runner_result.created_at.strftime("%Y-%m-%d %H:%M:%S") %></p>
19
+ <%= link_to 'Run in Workbench', workbench_index_path(prompt_id: @runner_result.prompt_id, dataset_record_id: @runner_result.dataset_record_id, runner: @experiment.runner_class), class: 'mt-4 inline-block px-4 py-2 bg-indigo-600 text-white rounded hover:bg-indigo-700 transition-colors duration-200' %>
20
+ </div>
21
+ <div class="bg-gray-800 rounded-lg shadow-lg p-6">
22
+ <h2 class="text-2xl font-semibold text-indigo-300 mb-4">Evaluation Results</h2>
23
+ <% if @runner_result.evaluation_results.any? %>
24
+ <div class="space-y-4">
25
+ <% @runner_result.evaluation_results.each do |eval_result| %>
26
+ <div class="bg-gray-700 rounded-lg p-4">
27
+ <h3 class="text-lg font-semibold text-indigo-200 mb-2"><%= eval_result.evaluator_class %></h3>
28
+ <% score = eval_result.score %>
29
+ <% color_class = case score
30
+ when 0...0.2 then 'text-red-500'
31
+ when 0.2...0.4 then 'text-orange-500'
32
+ when 0.4...0.6 then 'text-yellow-500'
33
+ when 0.6...0.8 then 'text-lime-500'
34
+ when 0.8...1.0 then 'text-green-400'
35
+ else 'text-green-300'
36
+ end %>
37
+ <p class="text-xl font-bold <%= color_class %>"><%= sprintf('%.2f', score) %></p>
38
+ </div>
39
+ <% end %>
40
+ </div>
41
+ <% else %>
42
+ <p class="text-gray-400">No evaluation results available.</p>
43
+ <% end %>
44
+ </div>
45
+ </div>
46
+ <div class="bg-gray-800 rounded-lg shadow-lg p-6 mb-8">
47
+ <h2 class="text-2xl font-semibold text-indigo-300 mb-4">Predictions and Ground Truth</h2>
48
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
49
+ <div>
50
+ <h3 class="text-xl font-semibold text-indigo-200 mb-2">Predictions</h3>
51
+ <% @runner_result.parsed_predictions.each_with_index do |prediction, index| %>
52
+ <div class="mb-4">
53
+ <h4 class="text-lg font-semibold text-indigo-100 mb-2">Prediction <%= index + 1 %></h4>
54
+ <pre class="bg-gray-700 p-4 rounded-lg mt-2 text-sm text-gray-300 whitespace-pre-wrap"><%= prediction %></pre>
55
+ </div>
56
+ <% end %>
57
+ </div>
58
+ <div>
59
+ <h3 class="text-xl font-semibold text-indigo-200 mb-2">Ground Truth</h3>
60
+ <pre class="bg-gray-700 p-4 rounded-lg mt-2 text-sm text-gray-300 whitespace-pre-wrap"><%= @runner_result.ground_truth %></pre>
61
+ </div>
62
+ </div>
63
+ </div>
64
+ </div>
@@ -0,0 +1,5 @@
1
+ <div class="bg-gray-800 rounded-lg shadow-lg p-6">
2
+ <h3 class="text-xl font-semibold mb-4 text-indigo-300">Evaluation Results</h3>
3
+ <!-- Add evaluation results display here -->
4
+ <p class="text-gray-400">No evaluation results available yet. Run an evaluation to see results.</p>
5
+ </div>