leva 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +54 -0
- data/app/assets/stylesheets/leva/application.css +9 -0
- data/app/controllers/leva/dataset_optimizations_controller.rb +64 -0
- data/app/controllers/leva/experiments_controller.rb +14 -6
- data/app/controllers/leva/workbench_controller.rb +26 -10
- data/app/helpers/leva/application_helper.rb +32 -16
- data/app/models/leva/dataset.rb +1 -0
- data/app/models/leva/experiment.rb +1 -0
- data/app/models/leva/optimization_run.rb +137 -0
- data/app/models/leva/prompt.rb +10 -0
- data/app/services/leva/class_loader.rb +37 -0
- data/app/services/leva/dataset_converter.rb +64 -0
- data/app/services/leva/optimizers/base.rb +183 -0
- data/app/services/leva/optimizers/bootstrap.rb +92 -0
- data/app/services/leva/optimizers/gepa_optimizer.rb +59 -0
- data/app/services/leva/optimizers/miprov2_optimizer.rb +52 -0
- data/app/services/leva/prompt_optimizer.rb +305 -0
- data/app/services/leva/signature_generator.rb +129 -0
- data/app/views/leva/datasets/show.html.erb +3 -0
- data/app/views/leva/experiments/_experiment.html.erb +9 -10
- data/app/views/leva/experiments/_form.html.erb +10 -0
- data/app/views/leva/experiments/index.html.erb +2 -1
- data/app/views/leva/experiments/show.html.erb +20 -21
- data/app/views/leva/optimization_runs/show.html.erb +698 -0
- data/app/views/leva/runner_results/show.html.erb +18 -48
- data/app/views/leva/workbench/_results_section.html.erb +1 -9
- data/db/migrate/20241204000001_create_leva_optimization_runs.rb +25 -0
- data/lib/generators/leva/templates/eval.rb.erb +4 -2
- data/lib/leva/errors.rb +18 -0
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +1 -0
- metadata +16 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '03694d16308b610d8c1cc83ec070cf2c0a03273d93b4e220834ff063f8df5b0a'
|
|
4
|
+
data.tar.gz: 31fa8e5737410dbb9b5729bf43616ef037fbad1c6b8188e60649a5156c8f87c1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f12f9ec8d00a5dcd9a8c003a598d9ec316be4bd8b8b2deb7a99680a14dcd64790b496829e7635e28f5b86dd7a5f484b9043b504bda24f7e3d0fd75b8e4eee271
|
|
7
|
+
data.tar.gz: 293f53edc39d95ed612b0ce0e0e5097f38e888990c7e8530b54da6afcf2015ae7f150f8f9bd9d2bb1171c5bf18c0c4a34180482594c376ed17341ae42bce9f09
|
data/README.md
CHANGED
|
@@ -181,6 +181,60 @@ experiment.evaluation_results.group_by(&:evaluator_class).each do |evaluator_cla
|
|
|
181
181
|
end
|
|
182
182
|
```
|
|
183
183
|
|
|
184
|
+
## Prompt Optimization (DSPy Integration)
|
|
185
|
+
|
|
186
|
+
Leva includes optional prompt optimization powered by [DSPy.rb](https://github.com/kieranklaassen/dspy.rb). This feature automatically finds optimal prompts and few-shot examples for your datasets.
|
|
187
|
+
|
|
188
|
+
**Requirements:**
|
|
189
|
+
- Ruby 3.3.0 or higher
|
|
190
|
+
- DSPy gem and optional optimizer gems
|
|
191
|
+
|
|
192
|
+
### Installation
|
|
193
|
+
|
|
194
|
+
Add the DSPy gems to your Gemfile:
|
|
195
|
+
|
|
196
|
+
```ruby
|
|
197
|
+
gem "dspy" # Core DSPy functionality (required)
|
|
198
|
+
gem "dspy-gepa" # GEPA optimizer (optional, recommended)
|
|
199
|
+
gem "dspy-miprov2" # MIPROv2 optimizer (optional)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Available Optimizers
|
|
203
|
+
|
|
204
|
+
| Optimizer | Best For | Description |
|
|
205
|
+
|-----------|----------|-------------|
|
|
206
|
+
| **Bootstrap** | Quick iteration, small datasets | Fast selection of few-shot examples. No gem required. |
|
|
207
|
+
| **GEPA** | Maximum quality | State-of-the-art reflective prompt evolution. 10-14% better than MIPROv2. |
|
|
208
|
+
| **MIPROv2** | Large datasets (200+) | Bayesian optimization for instructions and examples. |
|
|
209
|
+
|
|
210
|
+
### Usage
|
|
211
|
+
|
|
212
|
+
```ruby
|
|
213
|
+
# Create an optimizer for your dataset
|
|
214
|
+
optimizer = Leva::PromptOptimizer.new(
|
|
215
|
+
dataset: dataset,
|
|
216
|
+
optimizer: :gepa, # :bootstrap, :gepa, or :miprov2
|
|
217
|
+
mode: :medium, # :light, :medium, or :heavy
|
|
218
|
+
model: "gpt-4o-mini" # Any model supported by RubyLLM
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Run optimization
|
|
222
|
+
result = optimizer.optimize
|
|
223
|
+
|
|
224
|
+
# Result contains optimized prompts
|
|
225
|
+
result[:system_prompt] # Optimized instruction
|
|
226
|
+
result[:user_prompt] # Template with Liquid variables
|
|
227
|
+
result[:metadata] # Score, examples, and optimization details
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Optimization Modes
|
|
231
|
+
|
|
232
|
+
| Mode | Duration | Use Case |
|
|
233
|
+
|------|----------|----------|
|
|
234
|
+
| `:light` | ~5 min | Quick experiments |
|
|
235
|
+
| `:medium` | ~15 min | Balanced quality/speed |
|
|
236
|
+
| `:heavy` | ~30 min | Production prompts |
|
|
237
|
+
|
|
184
238
|
## Configuration
|
|
185
239
|
|
|
186
240
|
Ensure you set up any required API keys or other configurations in your Rails credentials or environment variables.
|
|
@@ -947,6 +947,15 @@ strong {
|
|
|
947
947
|
color: var(--info-400);
|
|
948
948
|
}
|
|
949
949
|
|
|
950
|
+
.badge-optimized {
|
|
951
|
+
background: rgba(168, 127, 219, 0.15);
|
|
952
|
+
color: #b794f4;
|
|
953
|
+
font-size: 0.65rem;
|
|
954
|
+
padding: 2px 6px;
|
|
955
|
+
margin-left: 6px;
|
|
956
|
+
vertical-align: middle;
|
|
957
|
+
}
|
|
958
|
+
|
|
950
959
|
/* Status Dots */
|
|
951
960
|
.status-dot {
|
|
952
961
|
display: inline-block;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
class DatasetOptimizationsController < ApplicationController
|
|
5
|
+
before_action :set_dataset
|
|
6
|
+
|
|
7
|
+
# GET /datasets/:dataset_id/optimization/new
|
|
8
|
+
# Shows the prompt optimization form
|
|
9
|
+
# @return [void]
|
|
10
|
+
def new
|
|
11
|
+
@record_count = @dataset.dataset_records.count
|
|
12
|
+
@prompt_optimizer = PromptOptimizer.new(dataset: @dataset)
|
|
13
|
+
@can_optimize = @prompt_optimizer.can_optimize?
|
|
14
|
+
@records_needed = @prompt_optimizer.records_needed
|
|
15
|
+
@modes = PromptOptimizer::MODES
|
|
16
|
+
@models = PromptOptimizer.available_models
|
|
17
|
+
@optimizers = PromptOptimizer::OPTIMIZERS
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# POST /datasets/:dataset_id/optimization
|
|
21
|
+
# Starts the prompt optimization job with progress tracking
|
|
22
|
+
# @return [void]
|
|
23
|
+
def create
|
|
24
|
+
opt_params = optimization_params
|
|
25
|
+
|
|
26
|
+
@optimization_run = @dataset.optimization_runs.create!(
|
|
27
|
+
prompt_name: opt_params[:prompt_name],
|
|
28
|
+
mode: opt_params[:mode],
|
|
29
|
+
model: opt_params[:model],
|
|
30
|
+
optimizer: opt_params[:optimizer],
|
|
31
|
+
status: :pending
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
PromptOptimizationJob.perform_later(optimization_run_id: @optimization_run.id)
|
|
35
|
+
|
|
36
|
+
redirect_to optimization_run_path(@optimization_run)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# Use callbacks to share common setup or constraints between actions.
|
|
42
|
+
# @return [void]
|
|
43
|
+
def set_dataset
|
|
44
|
+
@dataset = Dataset.find(params[:dataset_id])
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Strong parameters for optimization run creation.
|
|
48
|
+
# @return [Hash]
|
|
49
|
+
# @raise [ActionController::BadRequest] If model is invalid
|
|
50
|
+
def optimization_params
|
|
51
|
+
model = params[:model].presence || PromptOptimizer::DEFAULT_MODEL
|
|
52
|
+
unless PromptOptimizer.find_model(model)
|
|
53
|
+
raise ActionController::BadRequest, "Invalid model: #{model}"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
{
|
|
57
|
+
prompt_name: params[:prompt_name].presence || "Optimized: #{@dataset.name}",
|
|
58
|
+
mode: params[:mode].presence || "light",
|
|
59
|
+
model: model,
|
|
60
|
+
optimizer: params[:optimizer].presence || PromptOptimizer::DEFAULT_OPTIMIZER.to_s
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
module Leva
|
|
4
4
|
class ExperimentsController < ApplicationController
|
|
5
|
-
include ApplicationHelper
|
|
6
|
-
|
|
7
5
|
before_action :set_experiment, only: [ :show, :edit, :update ]
|
|
8
6
|
before_action :check_editable, only: [ :edit, :update ]
|
|
9
7
|
before_action :load_runners_and_evaluators, only: [ :new, :edit, :create, :update ]
|
|
@@ -11,7 +9,8 @@ module Leva
|
|
|
11
9
|
# GET /experiments
|
|
12
10
|
# @return [void]
|
|
13
11
|
def index
|
|
14
|
-
@experiments = Experiment.all
|
|
12
|
+
@experiments = Experiment.includes(:evaluation_results).all
|
|
13
|
+
@evaluator_classes = Leva::EvaluationResult.distinct.pluck(:evaluator_class)
|
|
15
14
|
end
|
|
16
15
|
|
|
17
16
|
# GET /experiments/1
|
|
@@ -83,12 +82,21 @@ module Leva
|
|
|
83
82
|
# Only allow a list of trusted parameters through.
|
|
84
83
|
# @return [ActionController::Parameters]
|
|
85
84
|
def experiment_params
|
|
86
|
-
params.require(:experiment).permit(:name, :description, :dataset_id, :prompt_id, :runner_class, evaluator_classes: [])
|
|
85
|
+
permitted = params.require(:experiment).permit(:name, :description, :dataset_id, :prompt_id, :runner_class, evaluator_classes: [], metadata: {})
|
|
86
|
+
# Ensure metadata is a hash, not ActionController::Parameters
|
|
87
|
+
if permitted[:metadata].present?
|
|
88
|
+
metadata_hash = permitted[:metadata].to_h
|
|
89
|
+
if metadata_hash.to_json.bytesize > 100_000
|
|
90
|
+
raise ActionController::BadRequest, "Metadata exceeds maximum size of 100KB"
|
|
91
|
+
end
|
|
92
|
+
permitted[:metadata] = metadata_hash
|
|
93
|
+
end
|
|
94
|
+
permitted
|
|
87
95
|
end
|
|
88
96
|
|
|
89
97
|
def load_runners_and_evaluators
|
|
90
|
-
@runners =
|
|
91
|
-
@evaluators =
|
|
98
|
+
@runners = Leva::ClassLoader.runners
|
|
99
|
+
@evaluators = Leva::ClassLoader.evaluators
|
|
92
100
|
end
|
|
93
101
|
|
|
94
102
|
def check_editable
|
|
@@ -19,14 +19,12 @@ module Leva
|
|
|
19
19
|
@selected_dataset_record = params[:dataset_record_id] || DatasetRecord.first&.id
|
|
20
20
|
|
|
21
21
|
# Get merged context if runner and dataset record are available
|
|
22
|
-
if @selected_runner && @dataset_record
|
|
23
|
-
runner_class = @selected_runner.constantize
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@merged_context = @record_context.merge(@runner_context)
|
|
29
|
-
end
|
|
22
|
+
if @selected_runner && @dataset_record && valid_runner?(@selected_runner)
|
|
23
|
+
runner_class = @selected_runner.constantize
|
|
24
|
+
runner = runner_class.new
|
|
25
|
+
@record_context = @dataset_record.recordable.to_llm_context
|
|
26
|
+
@runner_context = runner.to_llm_context(@dataset_record.recordable)
|
|
27
|
+
@merged_context = @record_context.merge(@runner_context)
|
|
30
28
|
end
|
|
31
29
|
end
|
|
32
30
|
|
|
@@ -67,8 +65,8 @@ module Leva
|
|
|
67
65
|
def run
|
|
68
66
|
return redirect_to workbench_index_path, alert: "Please select a record and a runner" unless @dataset_record && run_params[:runner]
|
|
69
67
|
|
|
68
|
+
return redirect_to workbench_index_path, alert: "Invalid runner selected" unless valid_runner?(run_params[:runner])
|
|
70
69
|
runner_class = run_params[:runner].constantize
|
|
71
|
-
return redirect_to workbench_index_path, alert: "Invalid runner selected" unless runner_class < Leva::BaseRun
|
|
72
70
|
|
|
73
71
|
runner = runner_class.new
|
|
74
72
|
runner_result = runner.execute_and_store(nil, @dataset_record, @prompt)
|
|
@@ -90,8 +88,8 @@ module Leva
|
|
|
90
88
|
def run_evaluator
|
|
91
89
|
return redirect_to workbench_index_path, alert: "No runner result available" unless @runner_result
|
|
92
90
|
|
|
91
|
+
return redirect_to workbench_index_path, alert: "Invalid evaluator selected" unless allowed_evaluator_names.include?(params[:evaluator])
|
|
93
92
|
evaluator_class = params[:evaluator].constantize
|
|
94
|
-
return redirect_to workbench_index_path, alert: "Invalid evaluator selected" unless evaluator_class < Leva::BaseEval
|
|
95
93
|
|
|
96
94
|
evaluator = evaluator_class.new
|
|
97
95
|
evaluator.evaluate_and_store(nil, @runner_result)
|
|
@@ -120,5 +118,23 @@ module Leva
|
|
|
120
118
|
def set_runner_result
|
|
121
119
|
@runner_result = @dataset_record.runner_results.last if @dataset_record
|
|
122
120
|
end
|
|
121
|
+
|
|
122
|
+
def allowed_runner_names
|
|
123
|
+
@allowed_runner_names ||= load_runners.map(&:name)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def allowed_evaluator_names
|
|
127
|
+
@allowed_evaluator_names ||= load_evaluators.map(&:name)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def valid_runner?(runner_name)
|
|
131
|
+
return true if allowed_runner_names.include?(runner_name)
|
|
132
|
+
|
|
133
|
+
# Also accept any class that inherits from BaseRun (for testing)
|
|
134
|
+
klass = runner_name.constantize
|
|
135
|
+
klass < Leva::BaseRun
|
|
136
|
+
rescue NameError
|
|
137
|
+
false
|
|
138
|
+
end
|
|
123
139
|
end
|
|
124
140
|
end
|
|
@@ -4,14 +4,44 @@ module Leva
|
|
|
4
4
|
#
|
|
5
5
|
# @return [Array<Class>] An array of evaluator classes
|
|
6
6
|
def load_evaluators
|
|
7
|
-
|
|
7
|
+
Leva::ClassLoader.evaluators
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
# Loads all runner classes that inherit from Leva::BaseRun
|
|
11
11
|
#
|
|
12
12
|
# @return [Array<Class>] An array of runner classes
|
|
13
13
|
def load_runners
|
|
14
|
-
|
|
14
|
+
Leva::ClassLoader.runners
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Returns the CSS class for a score value.
|
|
18
|
+
#
|
|
19
|
+
# @param score [Float, nil] The score value (0.0 - 1.0)
|
|
20
|
+
# @return [String] The CSS class for the score
|
|
21
|
+
def score_class(score)
|
|
22
|
+
return "" if score.nil?
|
|
23
|
+
|
|
24
|
+
case score
|
|
25
|
+
when 0...0.2 then "score-bad"
|
|
26
|
+
when 0.2...0.4 then "score-poor"
|
|
27
|
+
when 0.4...0.6 then "score-fair"
|
|
28
|
+
when 0.6...0.8 then "score-good"
|
|
29
|
+
else "score-excellent"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns the display name for a model.
|
|
34
|
+
#
|
|
35
|
+
# Uses RubyLLM to find the model and get its display name,
|
|
36
|
+
# falling back to extracting the name from the model ID.
|
|
37
|
+
#
|
|
38
|
+
# @param model_id [String] The model ID
|
|
39
|
+
# @return [String] The display name for the model
|
|
40
|
+
def model_display_name(model_id)
|
|
41
|
+
return "—" if model_id.blank?
|
|
42
|
+
|
|
43
|
+
@models_cache ||= Leva::PromptOptimizer.available_models.index_by(&:id)
|
|
44
|
+
@models_cache[model_id]&.name || model_id.split("/").last
|
|
15
45
|
end
|
|
16
46
|
|
|
17
47
|
# Loads predefined prompts from markdown files
|
|
@@ -25,19 +55,5 @@ module Leva
|
|
|
25
55
|
end
|
|
26
56
|
prompts
|
|
27
57
|
end
|
|
28
|
-
|
|
29
|
-
private
|
|
30
|
-
|
|
31
|
-
# Loads classes from a specified directory that inherit from a given base class
|
|
32
|
-
#
|
|
33
|
-
# @param directory [String] The directory path to load classes from
|
|
34
|
-
# @param base_class [Class] The base class that loaded classes should inherit from
|
|
35
|
-
# @return [Array<Class>] An array of loaded classes
|
|
36
|
-
def load_classes_from_directory(directory, base_class)
|
|
37
|
-
classes = Dir[Rails.root.join(directory, "*.rb")].map do |file|
|
|
38
|
-
File.basename(file, ".rb").camelize.constantize
|
|
39
|
-
end.select { |klass| klass < base_class }
|
|
40
|
-
classes.empty? ? [] : classes
|
|
41
|
-
end
|
|
42
58
|
end
|
|
43
59
|
end
|
data/app/models/leva/dataset.rb
CHANGED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Tracks the progress and status of prompt optimization runs.
|
|
5
|
+
#
|
|
6
|
+
# @example Create and track an optimization run
|
|
7
|
+
# run = OptimizationRun.create!(
|
|
8
|
+
# dataset: dataset,
|
|
9
|
+
# prompt_name: "My Optimized Prompt",
|
|
10
|
+
# mode: "light"
|
|
11
|
+
# )
|
|
12
|
+
# run.start!
|
|
13
|
+
# run.update_progress(step: "bootstrapping", progress: 50, examples_processed: 5)
|
|
14
|
+
# run.complete!(prompt)
|
|
15
|
+
class OptimizationRun < ApplicationRecord
|
|
16
|
+
self.table_name = "leva_optimization_runs"
|
|
17
|
+
|
|
18
|
+
belongs_to :dataset
|
|
19
|
+
belongs_to :prompt, optional: true
|
|
20
|
+
|
|
21
|
+
enum :status, {
|
|
22
|
+
pending: "pending",
|
|
23
|
+
running: "running",
|
|
24
|
+
completed: "completed",
|
|
25
|
+
failed: "failed"
|
|
26
|
+
}, default: :pending
|
|
27
|
+
|
|
28
|
+
validates :prompt_name, presence: true, length: { maximum: 255 }
|
|
29
|
+
validates :mode, presence: true, inclusion: { in: %w[light medium heavy] }
|
|
30
|
+
validates :model, presence: true
|
|
31
|
+
validates :optimizer, inclusion: { in: PromptOptimizer::OPTIMIZERS.keys.map(&:to_s) }
|
|
32
|
+
validates :progress, numericality: { in: 0..100 }
|
|
33
|
+
|
|
34
|
+
# Defined optimization steps for display
|
|
35
|
+
STEPS = {
|
|
36
|
+
"validating" => { label: "Validating dataset", icon: "check" },
|
|
37
|
+
"splitting_data" => { label: "Splitting data", icon: "scissors" },
|
|
38
|
+
"generating_signature" => { label: "Generating signature", icon: "code" },
|
|
39
|
+
"bootstrapping" => { label: "Bootstrapping examples", icon: "zap" },
|
|
40
|
+
"evaluating" => { label: "Evaluating results", icon: "bar-chart" },
|
|
41
|
+
"building_result" => { label: "Building prompt", icon: "package" },
|
|
42
|
+
"complete" => { label: "Complete", icon: "check-circle" }
|
|
43
|
+
}.freeze
|
|
44
|
+
|
|
45
|
+
# Marks the run as started.
|
|
46
|
+
#
|
|
47
|
+
# @return [void]
|
|
48
|
+
def start!
|
|
49
|
+
update!(status: :running, current_step: "validating", progress: 0)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Updates the progress of the optimization run.
|
|
53
|
+
#
|
|
54
|
+
# @param step [String] Current step name
|
|
55
|
+
# @param progress [Integer] Progress percentage (0-100)
|
|
56
|
+
# @param examples_processed [Integer, nil] Number of examples processed
|
|
57
|
+
# @param total [Integer, nil] Total examples to process
|
|
58
|
+
# @return [void]
|
|
59
|
+
def update_progress(step:, progress:, examples_processed: nil, total: nil)
|
|
60
|
+
attrs = { current_step: step, progress: progress }
|
|
61
|
+
attrs[:examples_processed] = examples_processed if examples_processed
|
|
62
|
+
attrs[:total_examples] = total if total
|
|
63
|
+
update!(attrs)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Marks the run as completed with the created prompt.
|
|
67
|
+
#
|
|
68
|
+
# @param created_prompt [Leva::Prompt] The optimized prompt
|
|
69
|
+
# @return [void]
|
|
70
|
+
def complete!(created_prompt)
|
|
71
|
+
update!(
|
|
72
|
+
status: :completed,
|
|
73
|
+
prompt: created_prompt,
|
|
74
|
+
current_step: "complete",
|
|
75
|
+
progress: 100
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Marks the run as failed.
|
|
80
|
+
#
|
|
81
|
+
# @param error [String, Exception] The error message or exception
|
|
82
|
+
# @return [void]
|
|
83
|
+
def fail!(error)
|
|
84
|
+
message = error.is_a?(Exception) ? "#{error.class}: #{error.message}" : error.to_s
|
|
85
|
+
update!(status: :failed, error_message: message)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Returns the human-readable label for the current step.
|
|
89
|
+
#
|
|
90
|
+
# @return [String]
|
|
91
|
+
def current_step_label
|
|
92
|
+
STEPS.dig(current_step, :label) || current_step&.humanize || "Initializing"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Returns elapsed time since the run started.
|
|
96
|
+
#
|
|
97
|
+
# @return [ActiveSupport::Duration, nil]
|
|
98
|
+
def elapsed_time
|
|
99
|
+
return nil unless running? || completed? || failed?
|
|
100
|
+
|
|
101
|
+
(completed? || failed? ? updated_at : Time.current) - created_at
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Formats elapsed time for display.
|
|
105
|
+
#
|
|
106
|
+
# @return [String]
|
|
107
|
+
def elapsed_time_formatted
|
|
108
|
+
seconds = elapsed_time&.to_i || 0
|
|
109
|
+
if seconds < 60
|
|
110
|
+
"#{seconds}s"
|
|
111
|
+
elsif seconds < 3600
|
|
112
|
+
"#{seconds / 60}m #{seconds % 60}s"
|
|
113
|
+
else
|
|
114
|
+
"#{seconds / 3600}h #{(seconds % 3600) / 60}m"
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Returns a hash for JSON API response.
|
|
119
|
+
#
|
|
120
|
+
# @return [Hash]
|
|
121
|
+
def as_json(options = {})
|
|
122
|
+
{
|
|
123
|
+
id: id,
|
|
124
|
+
status: status,
|
|
125
|
+
current_step: current_step,
|
|
126
|
+
current_step_label: current_step_label,
|
|
127
|
+
progress: progress,
|
|
128
|
+
examples_processed: examples_processed,
|
|
129
|
+
total_examples: total_examples,
|
|
130
|
+
elapsed_time: elapsed_time_formatted,
|
|
131
|
+
error_message: error_message,
|
|
132
|
+
prompt_id: prompt_id,
|
|
133
|
+
prompt_name: prompt_name
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
data/app/models/leva/prompt.rb
CHANGED
|
@@ -26,5 +26,15 @@ module Leva
|
|
|
26
26
|
self.version ||= 0
|
|
27
27
|
self.version += 1
|
|
28
28
|
end
|
|
29
|
+
|
|
30
|
+
# @return [Boolean] true if this prompt was generated by an optimizer
|
|
31
|
+
def optimized?
|
|
32
|
+
metadata&.dig("optimization").present?
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @return [String, nil] the optimizer used (e.g., "bootstrap", "mipro")
|
|
36
|
+
def optimizer_name
|
|
37
|
+
metadata&.dig("optimization", "optimizer")
|
|
38
|
+
end
|
|
29
39
|
end
|
|
30
40
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Service for loading evaluator and runner classes from the application.
|
|
5
|
+
#
|
|
6
|
+
# This service dynamically loads classes from the app/evals and app/runners
|
|
7
|
+
# directories that inherit from their respective base classes.
|
|
8
|
+
class ClassLoader
|
|
9
|
+
# Loads all evaluator classes that inherit from Leva::BaseEval
|
|
10
|
+
#
|
|
11
|
+
# @return [Array<Class>] An array of evaluator classes
|
|
12
|
+
def self.evaluators
|
|
13
|
+
load_classes_from_directory("app/evals", Leva::BaseEval)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Loads all runner classes that inherit from Leva::BaseRun
|
|
17
|
+
#
|
|
18
|
+
# @return [Array<Class>] An array of runner classes
|
|
19
|
+
def self.runners
|
|
20
|
+
load_classes_from_directory("app/runners", Leva::BaseRun)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Loads classes from a specified directory that inherit from a given base class
|
|
24
|
+
#
|
|
25
|
+
# @param directory [String] The directory path to load classes from
|
|
26
|
+
# @param base_class [Class] The base class that loaded classes should inherit from
|
|
27
|
+
# @return [Array<Class>] An array of loaded classes
|
|
28
|
+
def self.load_classes_from_directory(directory, base_class)
|
|
29
|
+
classes = Dir[Rails.root.join(directory, "*.rb")].map do |file|
|
|
30
|
+
File.basename(file, ".rb").camelize.constantize
|
|
31
|
+
end.select { |klass| klass < base_class }
|
|
32
|
+
classes.empty? ? [] : classes
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private_class_method :load_classes_from_directory
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Converts Leva datasets to DSPy example format.
|
|
5
|
+
#
|
|
6
|
+
# This service transforms DatasetRecord objects into DSPy::Example objects
|
|
7
|
+
# suitable for use with DSPy optimizers and predictors.
|
|
8
|
+
#
|
|
9
|
+
# @example Convert a dataset to DSPy examples
|
|
10
|
+
# converter = Leva::DatasetConverter.new(dataset)
|
|
11
|
+
# examples = converter.to_dspy_examples
|
|
12
|
+
#
|
|
13
|
+
# @example Split dataset for training
|
|
14
|
+
# converter = Leva::DatasetConverter.new(dataset)
|
|
15
|
+
# splits = converter.split(train_ratio: 0.6, val_ratio: 0.2)
|
|
16
|
+
# # => { train: [...], val: [...], test: [...] }
|
|
17
|
+
class DatasetConverter
|
|
18
|
+
# @param dataset [Leva::Dataset] The dataset to convert
|
|
19
|
+
def initialize(dataset)
|
|
20
|
+
@dataset = dataset
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Converts all dataset records to DSPy example format.
|
|
24
|
+
#
|
|
25
|
+
# @return [Array<Hash>] Array of example hashes with :input and :expected keys
|
|
26
|
+
def to_dspy_examples
|
|
27
|
+
@dataset.dataset_records.includes(:recordable).map do |record|
|
|
28
|
+
next unless record.recordable
|
|
29
|
+
|
|
30
|
+
{
|
|
31
|
+
input: record.recordable.to_llm_context,
|
|
32
|
+
expected: { output: record.recordable.ground_truth }
|
|
33
|
+
}
|
|
34
|
+
end.compact
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Splits the dataset into train, validation, and test sets.
|
|
38
|
+
#
|
|
39
|
+
# @param train_ratio [Float] Proportion of data for training (default: 0.6)
|
|
40
|
+
# @param val_ratio [Float] Proportion of data for validation (default: 0.2)
|
|
41
|
+
# @param seed [Integer, nil] Random seed for reproducibility
|
|
42
|
+
# @return [Hash] Hash with :train, :val, and :test arrays
|
|
43
|
+
def split(train_ratio: 0.6, val_ratio: 0.2, seed: nil)
|
|
44
|
+
examples = to_dspy_examples
|
|
45
|
+
examples = seed ? examples.shuffle(random: Random.new(seed)) : examples.shuffle
|
|
46
|
+
|
|
47
|
+
train_size = (examples.size * train_ratio).to_i
|
|
48
|
+
val_size = (examples.size * val_ratio).to_i
|
|
49
|
+
|
|
50
|
+
{
|
|
51
|
+
train: examples[0...train_size],
|
|
52
|
+
val: examples[train_size...(train_size + val_size)],
|
|
53
|
+
test: examples[(train_size + val_size)..]
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns the count of valid records in the dataset.
|
|
58
|
+
#
|
|
59
|
+
# @return [Integer] Number of records with valid recordable objects
|
|
60
|
+
def valid_record_count
|
|
61
|
+
to_dspy_examples.size
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|