leva 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +54 -0
- data/app/assets/stylesheets/leva/application.css +9 -0
- data/app/controllers/leva/dataset_optimizations_controller.rb +64 -0
- data/app/controllers/leva/experiments_controller.rb +14 -6
- data/app/controllers/leva/workbench_controller.rb +26 -10
- data/app/helpers/leva/application_helper.rb +32 -16
- data/app/models/leva/dataset.rb +1 -0
- data/app/models/leva/experiment.rb +1 -0
- data/app/models/leva/optimization_run.rb +137 -0
- data/app/models/leva/prompt.rb +10 -0
- data/app/services/leva/class_loader.rb +37 -0
- data/app/services/leva/dataset_converter.rb +64 -0
- data/app/services/leva/optimizers/base.rb +183 -0
- data/app/services/leva/optimizers/bootstrap.rb +92 -0
- data/app/services/leva/optimizers/gepa_optimizer.rb +59 -0
- data/app/services/leva/optimizers/miprov2_optimizer.rb +52 -0
- data/app/services/leva/prompt_optimizer.rb +305 -0
- data/app/services/leva/signature_generator.rb +129 -0
- data/app/views/leva/datasets/show.html.erb +3 -0
- data/app/views/leva/experiments/_experiment.html.erb +9 -10
- data/app/views/leva/experiments/_form.html.erb +10 -0
- data/app/views/leva/experiments/index.html.erb +2 -1
- data/app/views/leva/experiments/show.html.erb +20 -21
- data/app/views/leva/optimization_runs/show.html.erb +698 -0
- data/app/views/leva/runner_results/show.html.erb +18 -48
- data/app/views/leva/workbench/_results_section.html.erb +1 -9
- data/db/migrate/20241204000001_create_leva_optimization_runs.rb +25 -0
- data/lib/generators/leva/templates/eval.rb.erb +4 -2
- data/lib/leva/errors.rb +18 -0
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +1 -0
- metadata +16 -3
|
@@ -95,13 +95,6 @@
|
|
|
95
95
|
<%
|
|
96
96
|
score = eval_result.score
|
|
97
97
|
score_pct = (score * 100).round
|
|
98
|
-
score_class = case score
|
|
99
|
-
when 0...0.2 then 'score-bad'
|
|
100
|
-
when 0.2...0.4 then 'score-poor'
|
|
101
|
-
when 0.4...0.6 then 'score-fair'
|
|
102
|
-
when 0.6...0.8 then 'score-good'
|
|
103
|
-
else 'score-excellent'
|
|
104
|
-
end
|
|
105
98
|
score_label = case score
|
|
106
99
|
when 0...0.2 then 'Bad'
|
|
107
100
|
when 0.2...0.4 then 'Poor'
|
|
@@ -116,12 +109,12 @@
|
|
|
116
109
|
short_name = short_name.presence || eval_result.evaluator_class.demodulize.gsub(/Eval(uator)?$/, '')
|
|
117
110
|
%>
|
|
118
111
|
<div class="eval-summary-card" title="<%= eval_result.evaluator_class %>">
|
|
119
|
-
<span class="eval-summary-score <%= score_class %>"><%= score_pct %><span class="eval-summary-pct">%</span></span>
|
|
112
|
+
<span class="eval-summary-score <%= score_class(score) %>"><%= score_pct %><span class="eval-summary-pct">%</span></span>
|
|
120
113
|
<span class="eval-summary-name"><%= short_name %></span>
|
|
121
114
|
<div class="eval-summary-bar">
|
|
122
|
-
<div class="eval-summary-bar-fill <%= score_class %>" style="width: <%= score_pct %>%"></div>
|
|
115
|
+
<div class="eval-summary-bar-fill <%= score_class(score) %>" style="width: <%= score_pct %>%"></div>
|
|
123
116
|
</div>
|
|
124
|
-
<span class="eval-summary-label <%= score_class %>"><%= score_label %></span>
|
|
117
|
+
<span class="eval-summary-label <%= score_class(score) %>"><%= score_label %></span>
|
|
125
118
|
</div>
|
|
126
119
|
<% end %>
|
|
127
120
|
</div>
|
|
@@ -217,49 +210,26 @@
|
|
|
217
210
|
</div>
|
|
218
211
|
</div>
|
|
219
212
|
|
|
220
|
-
<%# Raw
|
|
221
|
-
<% if @runner_result.
|
|
213
|
+
<%# Raw LLM Response - Enhanced Collapsible Section %>
|
|
214
|
+
<% if @runner_result.prediction.present? && @runner_result.prediction != prediction_text %>
|
|
222
215
|
<%
|
|
223
|
-
raw_output = @runner_result.
|
|
216
|
+
raw_output = @runner_result.prediction
|
|
224
217
|
line_count = raw_output.lines.count
|
|
225
218
|
char_count = raw_output.length
|
|
226
219
|
%>
|
|
227
220
|
<div class="result-section">
|
|
228
|
-
<
|
|
229
|
-
<
|
|
230
|
-
<
|
|
231
|
-
<
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
</div>
|
|
241
|
-
<div class="raw-output-header-right">
|
|
242
|
-
<span class="raw-output-expand-hint">
|
|
243
|
-
<span class="expand-text">Show</span>
|
|
244
|
-
<span class="collapse-text">Hide</span>
|
|
245
|
-
</span>
|
|
246
|
-
<svg class="raw-output-chevron" viewBox="0 0 20 20" fill="currentColor">
|
|
247
|
-
<path fill-rule="evenodd" d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" clip-rule="evenodd" />
|
|
248
|
-
</svg>
|
|
249
|
-
</div>
|
|
250
|
-
</summary>
|
|
251
|
-
<div class="raw-output-body">
|
|
252
|
-
<div class="raw-output-toolbar">
|
|
253
|
-
<span class="raw-output-lang-hint">
|
|
254
|
-
<svg class="icon-sm" viewBox="0 0 20 20" fill="currentColor">
|
|
255
|
-
<path fill-rule="evenodd" d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z" clip-rule="evenodd" />
|
|
256
|
-
</svg>
|
|
257
|
-
Response from LLM
|
|
258
|
-
</span>
|
|
259
|
-
</div>
|
|
260
|
-
<pre class="raw-output-code"><code><%= raw_output %></code></pre>
|
|
261
|
-
</div>
|
|
262
|
-
</details>
|
|
221
|
+
<div class="result-section-header">
|
|
222
|
+
<h2 class="result-section-title">
|
|
223
|
+
<svg class="icon-sm" viewBox="0 0 20 20" fill="currentColor">
|
|
224
|
+
<path fill-rule="evenodd" d="M12.316 3.051a1 1 0 01.633 1.265l-4 12a1 1 0 11-1.898-.632l4-12a1 1 0 011.265-.633zM5.707 6.293a1 1 0 010 1.414L3.414 10l2.293 2.293a1 1 0 11-1.414 1.414l-3-3a1 1 0 010-1.414l3-3a1 1 0 011.414 0zm8.586 0a1 1 0 011.414 0l3 3a1 1 0 010 1.414l-3 3a1 1 0 11-1.414-1.414L16.586 10l-2.293-2.293a1 1 0 010-1.414z" clip-rule="evenodd" />
|
|
225
|
+
</svg>
|
|
226
|
+
Raw LLM Response<% if @experiment.metadata&.dig("model").present? %> <span class="text-muted font-mono text-sm">(<%= @experiment.metadata["model"] %>)</span><% end %>
|
|
227
|
+
</h2>
|
|
228
|
+
<span class="result-section-count"><%= line_count %> line<%= line_count == 1 ? '' : 's' %>, <%= number_to_human_size(char_count) %></span>
|
|
229
|
+
</div>
|
|
230
|
+
<div class="result-panel">
|
|
231
|
+
<pre class="result-code"><code><%= raw_output %></code></pre>
|
|
232
|
+
</div>
|
|
263
233
|
</div>
|
|
264
234
|
<% end %>
|
|
265
235
|
|
|
@@ -131,14 +131,6 @@
|
|
|
131
131
|
<% evaluation_result = @dataset_record&.evaluation_results&.for_evaluator(evaluator_class)&.last %>
|
|
132
132
|
<% score = evaluation_result&.score %>
|
|
133
133
|
<%
|
|
134
|
-
score_class = case score
|
|
135
|
-
when 0...0.2 then 'score-bad'
|
|
136
|
-
when 0.2...0.4 then 'score-poor'
|
|
137
|
-
when 0.4...0.6 then 'score-fair'
|
|
138
|
-
when 0.6...0.8 then 'score-good'
|
|
139
|
-
when 0.8..1.0 then 'score-excellent'
|
|
140
|
-
else ''
|
|
141
|
-
end
|
|
142
134
|
bg_style = case score
|
|
143
135
|
when 0...0.2 then 'background: rgba(207, 111, 98, 0.08);'
|
|
144
136
|
when 0.2...0.4 then 'background: rgba(232, 161, 88, 0.08);'
|
|
@@ -153,7 +145,7 @@
|
|
|
153
145
|
<span class="eval-name"><%= evaluator_class.name.demodulize.gsub(/Evaluator$/, '').gsub(/([a-z])([A-Z])/, '\1 \2') %></span>
|
|
154
146
|
<div class="flex items-center gap-2">
|
|
155
147
|
<% if score %>
|
|
156
|
-
<span class="eval-score <%= score_class %>"><%= sprintf('%.0f', score * 100) %></span>
|
|
148
|
+
<span class="eval-score <%= score_class(score) %>"><%= sprintf('%.0f', score * 100) %></span>
|
|
157
149
|
<span class="text-xs text-muted" style="font-size: 10px;">%</span>
|
|
158
150
|
<% else %>
|
|
159
151
|
<span class="eval-score eval-score--empty">
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateLevaOptimizationRuns < ActiveRecord::Migration[7.2]
|
|
4
|
+
def change
|
|
5
|
+
create_table :leva_optimization_runs do |t|
|
|
6
|
+
t.references :dataset, null: false, foreign_key: { to_table: :leva_datasets }
|
|
7
|
+
t.references :prompt, foreign_key: { to_table: :leva_prompts }
|
|
8
|
+
t.string :status, default: "pending", null: false
|
|
9
|
+
t.string :current_step
|
|
10
|
+
t.integer :progress, default: 0, null: false
|
|
11
|
+
t.integer :examples_processed, default: 0
|
|
12
|
+
t.integer :total_examples
|
|
13
|
+
t.string :prompt_name, null: false
|
|
14
|
+
t.string :mode, default: "light", null: false
|
|
15
|
+
t.text :error_message
|
|
16
|
+
t.json :metadata
|
|
17
|
+
t.string :model
|
|
18
|
+
t.string :optimizer, default: "bootstrap", null: false
|
|
19
|
+
|
|
20
|
+
t.timestamps
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
add_index :leva_optimization_runs, :status
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -8,7 +8,9 @@ class <%= class_name %>Eval < Leva::BaseEval
|
|
|
8
8
|
# Implement your evaluation logic here
|
|
9
9
|
# You can access the ground truth using recordable.ground_truth
|
|
10
10
|
|
|
11
|
-
# Example implementation:
|
|
12
|
-
runner_result.parsed_predictions.first
|
|
11
|
+
# Example implementation (case-insensitive comparison):
|
|
12
|
+
prediction = runner_result.parsed_predictions.first.to_s.downcase
|
|
13
|
+
expected = recordable.ground_truth.to_s.downcase
|
|
14
|
+
prediction == expected ? 1.0 : 0.0
|
|
13
15
|
end
|
|
14
16
|
end
|
data/lib/leva/errors.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Base error class for all Leva errors
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when a dataset has insufficient records for optimization
|
|
8
|
+
class InsufficientDataError < Error; end
|
|
9
|
+
|
|
10
|
+
# Raised when DSPy is not properly configured
|
|
11
|
+
class DspyConfigurationError < Error; end
|
|
12
|
+
|
|
13
|
+
# Raised when optimization fails
|
|
14
|
+
class OptimizationError < Error; end
|
|
15
|
+
|
|
16
|
+
# Raised when a runner encounters an error during execution
|
|
17
|
+
class RunnerError < Error; end
|
|
18
|
+
end
|
data/lib/leva/version.rb
CHANGED
data/lib/leva.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: leva
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kieran Klaassen
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-12-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rails
|
|
@@ -53,6 +53,7 @@ files:
|
|
|
53
53
|
- app/assets/config/leva_manifest.js
|
|
54
54
|
- app/assets/stylesheets/leva/application.css
|
|
55
55
|
- app/controllers/leva/application_controller.rb
|
|
56
|
+
- app/controllers/leva/dataset_optimizations_controller.rb
|
|
56
57
|
- app/controllers/leva/dataset_records_controller.rb
|
|
57
58
|
- app/controllers/leva/datasets_controller.rb
|
|
58
59
|
- app/controllers/leva/design_system_controller.rb
|
|
@@ -72,8 +73,17 @@ files:
|
|
|
72
73
|
- app/models/leva/dataset_record.rb
|
|
73
74
|
- app/models/leva/evaluation_result.rb
|
|
74
75
|
- app/models/leva/experiment.rb
|
|
76
|
+
- app/models/leva/optimization_run.rb
|
|
75
77
|
- app/models/leva/prompt.rb
|
|
76
78
|
- app/models/leva/runner_result.rb
|
|
79
|
+
- app/services/leva/class_loader.rb
|
|
80
|
+
- app/services/leva/dataset_converter.rb
|
|
81
|
+
- app/services/leva/optimizers/base.rb
|
|
82
|
+
- app/services/leva/optimizers/bootstrap.rb
|
|
83
|
+
- app/services/leva/optimizers/gepa_optimizer.rb
|
|
84
|
+
- app/services/leva/optimizers/miprov2_optimizer.rb
|
|
85
|
+
- app/services/leva/prompt_optimizer.rb
|
|
86
|
+
- app/services/leva/signature_generator.rb
|
|
77
87
|
- app/views/layouts/leva/application.html.erb
|
|
78
88
|
- app/views/leva/dataset_records/index.html.erb
|
|
79
89
|
- app/views/leva/dataset_records/show.html.erb
|
|
@@ -90,6 +100,7 @@ files:
|
|
|
90
100
|
- app/views/leva/experiments/index.html.erb
|
|
91
101
|
- app/views/leva/experiments/new.html.erb
|
|
92
102
|
- app/views/leva/experiments/show.html.erb
|
|
103
|
+
- app/views/leva/optimization_runs/show.html.erb
|
|
93
104
|
- app/views/leva/runner_results/show.html.erb
|
|
94
105
|
- app/views/leva/workbench/_evaluation_area.html.erb
|
|
95
106
|
- app/views/leva/workbench/_prompt_content.html.erb
|
|
@@ -107,12 +118,14 @@ files:
|
|
|
107
118
|
- db/migrate/20240813173035_create_leva_experiments.rb
|
|
108
119
|
- db/migrate/20240813173040_create_leva_runner_results.rb
|
|
109
120
|
- db/migrate/20240813173050_create_leva_evaluation_results.rb
|
|
121
|
+
- db/migrate/20241204000001_create_leva_optimization_runs.rb
|
|
110
122
|
- lib/generators/leva/eval_generator.rb
|
|
111
123
|
- lib/generators/leva/runner_generator.rb
|
|
112
124
|
- lib/generators/leva/templates/eval.rb.erb
|
|
113
125
|
- lib/generators/leva/templates/runner.rb.erb
|
|
114
126
|
- lib/leva.rb
|
|
115
127
|
- lib/leva/engine.rb
|
|
128
|
+
- lib/leva/errors.rb
|
|
116
129
|
- lib/leva/version.rb
|
|
117
130
|
- lib/tasks/leva_tasks.rake
|
|
118
131
|
homepage: https://github.com/kieranklaassen/leva
|
|
@@ -137,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
137
150
|
- !ruby/object:Gem::Version
|
|
138
151
|
version: '0'
|
|
139
152
|
requirements: []
|
|
140
|
-
rubygems_version: 3.
|
|
153
|
+
rubygems_version: 3.5.3
|
|
141
154
|
signing_key:
|
|
142
155
|
specification_version: 4
|
|
143
156
|
summary: Flexible Evaluation Framework for Language Models in Rails
|