leva 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +54 -0
  3. data/app/assets/stylesheets/leva/application.css +9 -0
  4. data/app/controllers/leva/dataset_optimizations_controller.rb +64 -0
  5. data/app/controllers/leva/experiments_controller.rb +14 -6
  6. data/app/controllers/leva/workbench_controller.rb +26 -10
  7. data/app/helpers/leva/application_helper.rb +32 -16
  8. data/app/models/leva/dataset.rb +1 -0
  9. data/app/models/leva/experiment.rb +1 -0
  10. data/app/models/leva/optimization_run.rb +137 -0
  11. data/app/models/leva/prompt.rb +10 -0
  12. data/app/services/leva/class_loader.rb +37 -0
  13. data/app/services/leva/dataset_converter.rb +64 -0
  14. data/app/services/leva/optimizers/base.rb +183 -0
  15. data/app/services/leva/optimizers/bootstrap.rb +92 -0
  16. data/app/services/leva/optimizers/gepa_optimizer.rb +59 -0
  17. data/app/services/leva/optimizers/miprov2_optimizer.rb +52 -0
  18. data/app/services/leva/prompt_optimizer.rb +305 -0
  19. data/app/services/leva/signature_generator.rb +129 -0
  20. data/app/views/leva/datasets/show.html.erb +3 -0
  21. data/app/views/leva/experiments/_experiment.html.erb +9 -10
  22. data/app/views/leva/experiments/_form.html.erb +10 -0
  23. data/app/views/leva/experiments/index.html.erb +2 -1
  24. data/app/views/leva/experiments/show.html.erb +20 -21
  25. data/app/views/leva/optimization_runs/show.html.erb +698 -0
  26. data/app/views/leva/runner_results/show.html.erb +18 -48
  27. data/app/views/leva/workbench/_results_section.html.erb +1 -9
  28. data/db/migrate/20241204000001_create_leva_optimization_runs.rb +25 -0
  29. data/lib/generators/leva/templates/eval.rb.erb +4 -2
  30. data/lib/leva/errors.rb +18 -0
  31. data/lib/leva/version.rb +1 -1
  32. data/lib/leva.rb +1 -0
  33. metadata +16 -3
@@ -95,13 +95,6 @@
95
95
  <%
96
96
  score = eval_result.score
97
97
  score_pct = (score * 100).round
98
- score_class = case score
99
- when 0...0.2 then 'score-bad'
100
- when 0.2...0.4 then 'score-poor'
101
- when 0.4...0.6 then 'score-fair'
102
- when 0.6...0.8 then 'score-good'
103
- else 'score-excellent'
104
- end
105
98
  score_label = case score
106
99
  when 0...0.2 then 'Bad'
107
100
  when 0.2...0.4 then 'Poor'
@@ -116,12 +109,12 @@
116
109
  short_name = short_name.presence || eval_result.evaluator_class.demodulize.gsub(/Eval(uator)?$/, '')
117
110
  %>
118
111
  <div class="eval-summary-card" title="<%= eval_result.evaluator_class %>">
119
- <span class="eval-summary-score <%= score_class %>"><%= score_pct %><span class="eval-summary-pct">%</span></span>
112
+ <span class="eval-summary-score <%= score_class(score) %>"><%= score_pct %><span class="eval-summary-pct">%</span></span>
120
113
  <span class="eval-summary-name"><%= short_name %></span>
121
114
  <div class="eval-summary-bar">
122
- <div class="eval-summary-bar-fill <%= score_class %>" style="width: <%= score_pct %>%"></div>
115
+ <div class="eval-summary-bar-fill <%= score_class(score) %>" style="width: <%= score_pct %>%"></div>
123
116
  </div>
124
- <span class="eval-summary-label <%= score_class %>"><%= score_label %></span>
117
+ <span class="eval-summary-label <%= score_class(score) %>"><%= score_label %></span>
125
118
  </div>
126
119
  <% end %>
127
120
  </div>
@@ -217,49 +210,26 @@
217
210
  </div>
218
211
  </div>
219
212
 
220
- <%# Raw Output - Enhanced Collapsible Section %>
221
- <% if @runner_result.respond_to?(:raw_output) && @runner_result.raw_output.present? %>
213
+ <%# Raw LLM Response - Enhanced Collapsible Section %>
214
+ <% if @runner_result.prediction.present? && @runner_result.prediction != prediction_text %>
222
215
  <%
223
- raw_output = @runner_result.raw_output
216
+ raw_output = @runner_result.prediction
224
217
  line_count = raw_output.lines.count
225
218
  char_count = raw_output.length
226
219
  %>
227
220
  <div class="result-section">
228
- <details class="raw-output-collapsible">
229
- <summary class="raw-output-header">
230
- <div class="raw-output-header-left">
231
- <svg class="icon-sm raw-output-icon" viewBox="0 0 20 20" fill="currentColor">
232
- <path fill-rule="evenodd" d="M12.316 3.051a1 1 0 01.633 1.265l-4 12a1 1 0 11-1.898-.632l4-12a1 1 0 011.265-.633zM5.707 6.293a1 1 0 010 1.414L3.414 10l2.293 2.293a1 1 0 11-1.414 1.414l-3-3a1 1 0 010-1.414l3-3a1 1 0 011.414 0zm8.586 0a1 1 0 011.414 0l3 3a1 1 0 010 1.414l-3 3a1 1 0 11-1.414-1.414L16.586 10l-2.293-2.293a1 1 0 010-1.414z" clip-rule="evenodd" />
233
- </svg>
234
- <span class="raw-output-title">Raw Output</span>
235
- <span class="raw-output-stats">
236
- <span class="raw-output-stat"><%= line_count %> line<%= line_count == 1 ? '' : 's' %></span>
237
- <span class="raw-output-stat-sep"></span>
238
- <span class="raw-output-stat"><%= number_to_human_size(char_count) %></span>
239
- </span>
240
- </div>
241
- <div class="raw-output-header-right">
242
- <span class="raw-output-expand-hint">
243
- <span class="expand-text">Show</span>
244
- <span class="collapse-text">Hide</span>
245
- </span>
246
- <svg class="raw-output-chevron" viewBox="0 0 20 20" fill="currentColor">
247
- <path fill-rule="evenodd" d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" clip-rule="evenodd" />
248
- </svg>
249
- </div>
250
- </summary>
251
- <div class="raw-output-body">
252
- <div class="raw-output-toolbar">
253
- <span class="raw-output-lang-hint">
254
- <svg class="icon-sm" viewBox="0 0 20 20" fill="currentColor">
255
- <path fill-rule="evenodd" d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z" clip-rule="evenodd" />
256
- </svg>
257
- Response from LLM
258
- </span>
259
- </div>
260
- <pre class="raw-output-code"><code><%= raw_output %></code></pre>
261
- </div>
262
- </details>
221
+ <div class="result-section-header">
222
+ <h2 class="result-section-title">
223
+ <svg class="icon-sm" viewBox="0 0 20 20" fill="currentColor">
224
+ <path fill-rule="evenodd" d="M12.316 3.051a1 1 0 01.633 1.265l-4 12a1 1 0 11-1.898-.632l4-12a1 1 0 011.265-.633zM5.707 6.293a1 1 0 010 1.414L3.414 10l2.293 2.293a1 1 0 11-1.414 1.414l-3-3a1 1 0 010-1.414l3-3a1 1 0 011.414 0zm8.586 0a1 1 0 011.414 0l3 3a1 1 0 010 1.414l-3 3a1 1 0 11-1.414-1.414L16.586 10l-2.293-2.293a1 1 0 010-1.414z" clip-rule="evenodd" />
225
+ </svg>
226
+ Raw LLM Response<% if @experiment.metadata&.dig("model").present? %> <span class="text-muted font-mono text-sm">(<%= @experiment.metadata["model"] %>)</span><% end %>
227
+ </h2>
228
+ <span class="result-section-count"><%= line_count %> line<%= line_count == 1 ? '' : 's' %>, <%= number_to_human_size(char_count) %></span>
229
+ </div>
230
+ <div class="result-panel">
231
+ <pre class="result-code"><code><%= raw_output %></code></pre>
232
+ </div>
263
233
  </div>
264
234
  <% end %>
265
235
 
@@ -131,14 +131,6 @@
131
131
  <% evaluation_result = @dataset_record&.evaluation_results&.for_evaluator(evaluator_class)&.last %>
132
132
  <% score = evaluation_result&.score %>
133
133
  <%
134
- score_class = case score
135
- when 0...0.2 then 'score-bad'
136
- when 0.2...0.4 then 'score-poor'
137
- when 0.4...0.6 then 'score-fair'
138
- when 0.6...0.8 then 'score-good'
139
- when 0.8..1.0 then 'score-excellent'
140
- else ''
141
- end
142
134
  bg_style = case score
143
135
  when 0...0.2 then 'background: rgba(207, 111, 98, 0.08);'
144
136
  when 0.2...0.4 then 'background: rgba(232, 161, 88, 0.08);'
@@ -153,7 +145,7 @@
153
145
  <span class="eval-name"><%= evaluator_class.name.demodulize.gsub(/Evaluator$/, '').gsub(/([a-z])([A-Z])/, '\1 \2') %></span>
154
146
  <div class="flex items-center gap-2">
155
147
  <% if score %>
156
- <span class="eval-score <%= score_class %>"><%= sprintf('%.0f', score * 100) %></span>
148
+ <span class="eval-score <%= score_class(score) %>"><%= sprintf('%.0f', score * 100) %></span>
157
149
  <span class="text-xs text-muted" style="font-size: 10px;">%</span>
158
150
  <% else %>
159
151
  <span class="eval-score eval-score--empty">
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateLevaOptimizationRuns < ActiveRecord::Migration[7.2]
4
+ def change
5
+ create_table :leva_optimization_runs do |t|
6
+ t.references :dataset, null: false, foreign_key: { to_table: :leva_datasets }
7
+ t.references :prompt, foreign_key: { to_table: :leva_prompts }
8
+ t.string :status, default: "pending", null: false
9
+ t.string :current_step
10
+ t.integer :progress, default: 0, null: false
11
+ t.integer :examples_processed, default: 0
12
+ t.integer :total_examples
13
+ t.string :prompt_name, null: false
14
+ t.string :mode, default: "light", null: false
15
+ t.text :error_message
16
+ t.json :metadata
17
+ t.string :model
18
+ t.string :optimizer, default: "bootstrap", null: false
19
+
20
+ t.timestamps
21
+ end
22
+
23
+ add_index :leva_optimization_runs, :status
24
+ end
25
+ end
@@ -8,7 +8,9 @@ class <%= class_name %>Eval < Leva::BaseEval
8
8
  # Implement your evaluation logic here
9
9
  # You can access the ground truth using recordable.ground_truth
10
10
 
11
- # Example implementation:
12
- runner_result.parsed_predictions.first == recordable.ground_truth ? 1.0 : 0.0
11
+ # Example implementation (case-insensitive comparison):
12
+ prediction = runner_result.parsed_predictions.first.to_s.downcase
13
+ expected = recordable.ground_truth.to_s.downcase
14
+ prediction == expected ? 1.0 : 0.0
13
15
  end
14
16
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Leva
4
+ # Base error class for all Leva errors
5
+ class Error < StandardError; end
6
+
7
+ # Raised when a dataset has insufficient records for optimization
8
+ class InsufficientDataError < Error; end
9
+
10
+ # Raised when DSPy is not properly configured
11
+ class DspyConfigurationError < Error; end
12
+
13
+ # Raised when optimization fails
14
+ class OptimizationError < Error; end
15
+
16
+ # Raised when a runner encounters an error during execution
17
+ class RunnerError < Error; end
18
+ end
data/lib/leva/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Leva
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.1"
3
3
  end
data/lib/leva.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "leva/version"
2
2
  require "leva/engine"
3
+ require "leva/errors"
3
4
  require "liquid"
4
5
 
5
6
  module Leva
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: leva
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kieran Klaassen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-11-23 00:00:00.000000000 Z
11
+ date: 2025-12-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -53,6 +53,7 @@ files:
53
53
  - app/assets/config/leva_manifest.js
54
54
  - app/assets/stylesheets/leva/application.css
55
55
  - app/controllers/leva/application_controller.rb
56
+ - app/controllers/leva/dataset_optimizations_controller.rb
56
57
  - app/controllers/leva/dataset_records_controller.rb
57
58
  - app/controllers/leva/datasets_controller.rb
58
59
  - app/controllers/leva/design_system_controller.rb
@@ -72,8 +73,17 @@ files:
72
73
  - app/models/leva/dataset_record.rb
73
74
  - app/models/leva/evaluation_result.rb
74
75
  - app/models/leva/experiment.rb
76
+ - app/models/leva/optimization_run.rb
75
77
  - app/models/leva/prompt.rb
76
78
  - app/models/leva/runner_result.rb
79
+ - app/services/leva/class_loader.rb
80
+ - app/services/leva/dataset_converter.rb
81
+ - app/services/leva/optimizers/base.rb
82
+ - app/services/leva/optimizers/bootstrap.rb
83
+ - app/services/leva/optimizers/gepa_optimizer.rb
84
+ - app/services/leva/optimizers/miprov2_optimizer.rb
85
+ - app/services/leva/prompt_optimizer.rb
86
+ - app/services/leva/signature_generator.rb
77
87
  - app/views/layouts/leva/application.html.erb
78
88
  - app/views/leva/dataset_records/index.html.erb
79
89
  - app/views/leva/dataset_records/show.html.erb
@@ -90,6 +100,7 @@ files:
90
100
  - app/views/leva/experiments/index.html.erb
91
101
  - app/views/leva/experiments/new.html.erb
92
102
  - app/views/leva/experiments/show.html.erb
103
+ - app/views/leva/optimization_runs/show.html.erb
93
104
  - app/views/leva/runner_results/show.html.erb
94
105
  - app/views/leva/workbench/_evaluation_area.html.erb
95
106
  - app/views/leva/workbench/_prompt_content.html.erb
@@ -107,12 +118,14 @@ files:
107
118
  - db/migrate/20240813173035_create_leva_experiments.rb
108
119
  - db/migrate/20240813173040_create_leva_runner_results.rb
109
120
  - db/migrate/20240813173050_create_leva_evaluation_results.rb
121
+ - db/migrate/20241204000001_create_leva_optimization_runs.rb
110
122
  - lib/generators/leva/eval_generator.rb
111
123
  - lib/generators/leva/runner_generator.rb
112
124
  - lib/generators/leva/templates/eval.rb.erb
113
125
  - lib/generators/leva/templates/runner.rb.erb
114
126
  - lib/leva.rb
115
127
  - lib/leva/engine.rb
128
+ - lib/leva/errors.rb
116
129
  - lib/leva/version.rb
117
130
  - lib/tasks/leva_tasks.rake
118
131
  homepage: https://github.com/kieranklaassen/leva
@@ -137,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
137
150
  - !ruby/object:Gem::Version
138
151
  version: '0'
139
152
  requirements: []
140
- rubygems_version: 3.4.10
153
+ rubygems_version: 3.5.3
141
154
  signing_key:
142
155
  specification_version: 4
143
156
  summary: Flexible Evaluation Framework for Language Models in Rails