leva 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +54 -0
- data/app/assets/stylesheets/leva/application.css +9 -0
- data/app/controllers/leva/dataset_optimizations_controller.rb +64 -0
- data/app/controllers/leva/experiments_controller.rb +14 -6
- data/app/controllers/leva/workbench_controller.rb +26 -10
- data/app/helpers/leva/application_helper.rb +32 -16
- data/app/models/leva/dataset.rb +1 -0
- data/app/models/leva/experiment.rb +1 -0
- data/app/models/leva/optimization_run.rb +137 -0
- data/app/models/leva/prompt.rb +10 -0
- data/app/services/leva/class_loader.rb +37 -0
- data/app/services/leva/dataset_converter.rb +64 -0
- data/app/services/leva/optimizers/base.rb +183 -0
- data/app/services/leva/optimizers/bootstrap.rb +92 -0
- data/app/services/leva/optimizers/gepa_optimizer.rb +59 -0
- data/app/services/leva/optimizers/miprov2_optimizer.rb +52 -0
- data/app/services/leva/prompt_optimizer.rb +305 -0
- data/app/services/leva/signature_generator.rb +129 -0
- data/app/views/leva/datasets/show.html.erb +3 -0
- data/app/views/leva/experiments/_experiment.html.erb +9 -10
- data/app/views/leva/experiments/_form.html.erb +10 -0
- data/app/views/leva/experiments/index.html.erb +2 -1
- data/app/views/leva/experiments/show.html.erb +20 -21
- data/app/views/leva/optimization_runs/show.html.erb +698 -0
- data/app/views/leva/runner_results/show.html.erb +18 -48
- data/app/views/leva/workbench/_results_section.html.erb +1 -9
- data/db/migrate/20241204000001_create_leva_optimization_runs.rb +25 -0
- data/lib/generators/leva/templates/eval.rb.erb +4 -2
- data/lib/leva/errors.rb +18 -0
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +1 -0
- metadata +16 -3
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# Base class for optimization strategies.
|
|
6
|
+
#
|
|
7
|
+
# Each optimizer implements a different approach to finding optimal
|
|
8
|
+
# prompt instructions and few-shot examples.
|
|
9
|
+
#
|
|
10
|
+
# @abstract Subclass and override {#compile} to implement a strategy
|
|
11
|
+
class Base
|
|
12
|
+
# @return [String] The model identifier
|
|
13
|
+
attr_reader :model
|
|
14
|
+
|
|
15
|
+
# @return [Proc] The evaluation metric
|
|
16
|
+
attr_reader :metric
|
|
17
|
+
|
|
18
|
+
# @return [Proc, nil] Progress callback
|
|
19
|
+
attr_reader :progress_callback
|
|
20
|
+
|
|
21
|
+
# @return [Symbol] The optimization mode
|
|
22
|
+
attr_reader :mode
|
|
23
|
+
|
|
24
|
+
# @param model [String] The model to use for optimization
|
|
25
|
+
# @param metric [Proc] The evaluation metric
|
|
26
|
+
# @param mode [Symbol] Optimization intensity (:light, :medium, :heavy)
|
|
27
|
+
# @param progress_callback [Proc, nil] Callback for progress updates
|
|
28
|
+
def initialize(model:, metric:, mode:, progress_callback: nil)
|
|
29
|
+
@model = model
|
|
30
|
+
@metric = metric
|
|
31
|
+
@mode = mode
|
|
32
|
+
@progress_callback = progress_callback
|
|
33
|
+
@last_progress = nil
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Runs the optimization and returns results.
|
|
37
|
+
#
|
|
38
|
+
# @param splits [Hash] The train/val/test splits
|
|
39
|
+
# @param signature [Class] The generated DSPy signature class
|
|
40
|
+
# @return [Hash] Hash with :instruction, :few_shot_examples, :score
|
|
41
|
+
# @raise [Leva::OptimizationError] If optimization fails
|
|
42
|
+
def optimize(splits, signature)
|
|
43
|
+
train_examples = splits[:train]
|
|
44
|
+
val_examples = splits[:val]
|
|
45
|
+
|
|
46
|
+
report_progress(step: step_name, progress: 30, examples_processed: 0, total: train_examples.size)
|
|
47
|
+
|
|
48
|
+
predictor = DSPy::Predict.new(signature)
|
|
49
|
+
predictor.config.lm = create_lm
|
|
50
|
+
|
|
51
|
+
result = compile(predictor, train_examples, val_examples, signature)
|
|
52
|
+
|
|
53
|
+
report_progress(step: "evaluating", progress: 85)
|
|
54
|
+
|
|
55
|
+
instruction = result[:instruction_override] || extract_instruction(result[:optimized], signature)
|
|
56
|
+
score = evaluate(result[:optimized] || predictor, val_examples)
|
|
57
|
+
|
|
58
|
+
report_progress(step: "building_result", progress: 95)
|
|
59
|
+
|
|
60
|
+
{
|
|
61
|
+
instruction: instruction,
|
|
62
|
+
few_shot_examples: result[:few_shot_examples] || [],
|
|
63
|
+
score: score
|
|
64
|
+
}
|
|
65
|
+
rescue StandardError => e
|
|
66
|
+
Rails.logger.error "[Leva::Optimizers::#{self.class.name.demodulize}] Optimization failed: #{e.message}"
|
|
67
|
+
Rails.logger.error e.backtrace.first(5).join("\n")
|
|
68
|
+
raise Leva::OptimizationError, "#{optimizer_name} optimization failed: #{e.message}"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# The name used in progress reporting.
|
|
72
|
+
# @return [String]
|
|
73
|
+
def step_name
|
|
74
|
+
raise NotImplementedError, "Subclasses must implement #step_name"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Human-readable optimizer name.
|
|
78
|
+
# @return [String]
|
|
79
|
+
def optimizer_name
|
|
80
|
+
raise NotImplementedError, "Subclasses must implement #optimizer_name"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @return [Symbol] The optimizer type symbol
|
|
84
|
+
def optimizer_type
|
|
85
|
+
raise NotImplementedError, "Subclasses must implement #optimizer_type"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
protected
|
|
89
|
+
|
|
90
|
+
# Performs the actual optimization logic.
|
|
91
|
+
#
|
|
92
|
+
# @param predictor [DSPy::Predict] The base predictor
|
|
93
|
+
# @param train_examples [Array<Hash>] Training examples
|
|
94
|
+
# @param val_examples [Array<Hash>] Validation examples
|
|
95
|
+
# @param signature [Class] The signature class
|
|
96
|
+
# @return [Hash] Hash with :optimized (predictor), :few_shot_examples, :instruction_override (optional)
|
|
97
|
+
def compile(predictor, train_examples, val_examples, signature)
|
|
98
|
+
raise NotImplementedError, "Subclasses must implement #compile"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Converts examples to DSPy::Example format.
|
|
102
|
+
#
|
|
103
|
+
# @param examples [Array<Hash>] Examples with :input and :expected keys
|
|
104
|
+
# @param signature [Class] The DSPy signature class
|
|
105
|
+
# @return [Array<DSPy::Example>]
|
|
106
|
+
def to_dspy_examples(examples, signature)
|
|
107
|
+
examples.map do |ex|
|
|
108
|
+
DSPy::Example.new(
|
|
109
|
+
signature_class: signature,
|
|
110
|
+
input: ex[:input],
|
|
111
|
+
expected: ex[:expected]
|
|
112
|
+
)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Creates an LM instance for this optimizer.
|
|
117
|
+
# Prepends ruby_llm/ prefix for DSPy adapter.
|
|
118
|
+
# RubyLLM handles API keys from its configuration.
|
|
119
|
+
#
|
|
120
|
+
# @return [DSPy::LM]
|
|
121
|
+
def create_lm
|
|
122
|
+
DSPy::LM.new("ruby_llm/#{model}")
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Reports progress to the callback if provided.
|
|
126
|
+
# Throttles updates to only report when progress changes by 5% or more.
|
|
127
|
+
def report_progress(step:, progress:, examples_processed: nil, total: nil)
|
|
128
|
+
return unless progress_callback
|
|
129
|
+
|
|
130
|
+
# Skip if progress hasn't changed by at least 5%
|
|
131
|
+
return if @last_progress && (progress - @last_progress).abs < 5
|
|
132
|
+
|
|
133
|
+
@last_progress = progress
|
|
134
|
+
progress_callback.call(
|
|
135
|
+
step: step,
|
|
136
|
+
progress: progress,
|
|
137
|
+
examples_processed: examples_processed,
|
|
138
|
+
total: total
|
|
139
|
+
)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private
|
|
143
|
+
|
|
144
|
+
# Extracts instruction from an optimized predictor.
|
|
145
|
+
def extract_instruction(optimized, signature)
|
|
146
|
+
return signature.description unless optimized
|
|
147
|
+
|
|
148
|
+
if optimized.respond_to?(:instruction)
|
|
149
|
+
optimized.instruction
|
|
150
|
+
elsif optimized.respond_to?(:signature) && optimized.signature.respond_to?(:instructions)
|
|
151
|
+
optimized.signature.instructions
|
|
152
|
+
else
|
|
153
|
+
signature.description
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Evaluates predictor on validation examples.
|
|
158
|
+
# Handles both Hash examples and DSPy::Example objects.
|
|
159
|
+
def evaluate(predictor, val_examples)
|
|
160
|
+
return 0.0 if val_examples.empty?
|
|
161
|
+
|
|
162
|
+
correct = val_examples.count do |example|
|
|
163
|
+
# Handle both Hash and DSPy::Example
|
|
164
|
+
if example.is_a?(Hash)
|
|
165
|
+
input = example[:input]
|
|
166
|
+
expected_output = example.dig(:expected, :output)
|
|
167
|
+
else
|
|
168
|
+
# DSPy::Example has input_values/expected_values methods
|
|
169
|
+
input = example.input_values
|
|
170
|
+
expected_output = example.expected_values[:output]
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
prediction = predictor.call(**input)
|
|
174
|
+
actual = prediction.output.to_s.strip.downcase
|
|
175
|
+
expected = expected_output.to_s.strip.downcase
|
|
176
|
+
actual == expected
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
correct.to_f / val_examples.size
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# Bootstrap optimization strategy.
|
|
6
|
+
#
|
|
7
|
+
# Uses simple few-shot bootstrapping to find the best examples
|
|
8
|
+
# that improve model performance. Fast and has no extra dependencies.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# optimizer = Leva::Optimizers::Bootstrap.new(
|
|
12
|
+
# model: "gemini-2.5-flash",
|
|
13
|
+
# metric: my_metric,
|
|
14
|
+
# mode: :medium
|
|
15
|
+
# )
|
|
16
|
+
# result = optimizer.optimize(splits, signature)
|
|
17
|
+
class Bootstrap < Base
|
|
18
|
+
MODES = {
|
|
19
|
+
light: { trials: 5 },
|
|
20
|
+
medium: { trials: 15 },
|
|
21
|
+
heavy: { trials: 30 }
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
def step_name
|
|
25
|
+
"bootstrapping"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def optimizer_name
|
|
29
|
+
"Bootstrap"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def optimizer_type
|
|
33
|
+
:bootstrap
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
protected
|
|
37
|
+
|
|
38
|
+
def compile(predictor, train_examples, _val_examples, signature)
|
|
39
|
+
best_examples = bootstrap_few_shot_examples(predictor, train_examples)
|
|
40
|
+
instruction = generate_optimized_instruction(best_examples, signature)
|
|
41
|
+
|
|
42
|
+
{
|
|
43
|
+
optimized: nil,
|
|
44
|
+
few_shot_examples: best_examples,
|
|
45
|
+
instruction_override: instruction
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# Bootstraps few-shot examples by evaluating which examples help the model most.
|
|
52
|
+
def bootstrap_few_shot_examples(predictor, examples)
|
|
53
|
+
max_examples = MODES[mode][:trials].clamp(3, 8)
|
|
54
|
+
|
|
55
|
+
scored_examples = examples.each_with_index.map do |example, index|
|
|
56
|
+
prediction = predictor.call(**example[:input])
|
|
57
|
+
actual_output = prediction.output.to_s.strip.downcase
|
|
58
|
+
expected_output = example.dig(:expected, :output).to_s.strip.downcase
|
|
59
|
+
|
|
60
|
+
score = actual_output == expected_output ? 1.0 : 0.0
|
|
61
|
+
|
|
62
|
+
# Progress: 30% + (index / total * 50%)
|
|
63
|
+
progress = 30 + ((index + 1).to_f / examples.size * 50).to_i
|
|
64
|
+
report_progress(
|
|
65
|
+
step: step_name,
|
|
66
|
+
progress: progress,
|
|
67
|
+
examples_processed: index + 1,
|
|
68
|
+
total: examples.size
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
{ example: example, score: score }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
scored_examples.sort_by { |e| -e[:score] }.take(max_examples).map { |e| e[:example] }
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Generates instruction based on output patterns.
|
|
78
|
+
def generate_optimized_instruction(examples, signature)
|
|
79
|
+
return signature.description if examples.empty?
|
|
80
|
+
|
|
81
|
+
outputs = examples.map { |e| e.dig(:expected, :output) }.compact.uniq
|
|
82
|
+
|
|
83
|
+
if outputs.size <= 5
|
|
84
|
+
# Classification task
|
|
85
|
+
"#{signature.description} Respond with one of: #{outputs.join(', ')}."
|
|
86
|
+
else
|
|
87
|
+
signature.description
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# GEPA (Genetic-Pareto) optimization strategy.
|
|
6
|
+
#
|
|
7
|
+
# Uses reflective prompt evolution with genetic algorithms
|
|
8
|
+
# and Pareto optimization for best quality results.
|
|
9
|
+
#
|
|
10
|
+
# Requires the dspy-gepa gem.
|
|
11
|
+
class GepaOptimizer < Base
|
|
12
|
+
def step_name
|
|
13
|
+
"gepa_optimizing"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def optimizer_name
|
|
17
|
+
"GEPA"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def optimizer_type
|
|
21
|
+
:gepa
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
protected
|
|
25
|
+
|
|
26
|
+
def compile(predictor, train_examples, val_examples, signature)
|
|
27
|
+
lm = create_lm
|
|
28
|
+
predictor.config.lm = lm
|
|
29
|
+
|
|
30
|
+
# Build config based on mode intensity
|
|
31
|
+
gepa_config = case mode
|
|
32
|
+
when :light
|
|
33
|
+
{ max_metric_calls: 16, minibatch_size: 2 }
|
|
34
|
+
when :medium
|
|
35
|
+
{ max_metric_calls: 32, minibatch_size: 2 }
|
|
36
|
+
when :heavy
|
|
37
|
+
{ max_metric_calls: 64, minibatch_size: 4 }
|
|
38
|
+
else
|
|
39
|
+
{ max_metric_calls: 16, minibatch_size: 2 }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
gepa = DSPy::Teleprompt::GEPA.new(
|
|
43
|
+
metric: metric,
|
|
44
|
+
reflection_lm: DSPy::ReflectionLM.new("ruby_llm/#{model}"),
|
|
45
|
+
config: gepa_config
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
trainset = to_dspy_examples(train_examples, signature)
|
|
49
|
+
valset = to_dspy_examples(val_examples, signature)
|
|
50
|
+
|
|
51
|
+
report_progress(step: "gepa_compiling", progress: 50)
|
|
52
|
+
|
|
53
|
+
result = gepa.compile(predictor, trainset: trainset, valset: valset)
|
|
54
|
+
|
|
55
|
+
{ optimized: result.optimized_program, few_shot_examples: [] }
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# MIPROv2 optimization strategy.
|
|
6
|
+
#
|
|
7
|
+
# Uses Bayesian optimization with Gaussian Processes
|
|
8
|
+
# for efficient prompt search.
|
|
9
|
+
#
|
|
10
|
+
# Requires the dspy-miprov2 gem.
|
|
11
|
+
class Miprov2Optimizer < Base
|
|
12
|
+
def step_name
|
|
13
|
+
"miprov2_optimizing"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def optimizer_name
|
|
17
|
+
"MIPROv2"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def optimizer_type
|
|
21
|
+
:miprov2
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
protected
|
|
25
|
+
|
|
26
|
+
def compile(predictor, train_examples, val_examples, signature)
|
|
27
|
+
predictor.config.lm = create_lm
|
|
28
|
+
|
|
29
|
+
# Use AutoMode helpers for preset configurations
|
|
30
|
+
mipro = case mode
|
|
31
|
+
when :light
|
|
32
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.light(metric: metric)
|
|
33
|
+
when :medium
|
|
34
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.medium(metric: metric)
|
|
35
|
+
when :heavy
|
|
36
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.heavy(metric: metric)
|
|
37
|
+
else
|
|
38
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.light(metric: metric)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
trainset = to_dspy_examples(train_examples, signature)
|
|
42
|
+
valset = to_dspy_examples(val_examples, signature)
|
|
43
|
+
|
|
44
|
+
report_progress(step: "miprov2_compiling", progress: 50)
|
|
45
|
+
|
|
46
|
+
result = mipro.compile(predictor, trainset: trainset, valset: valset)
|
|
47
|
+
|
|
48
|
+
{ optimized: result.optimized_program, few_shot_examples: [] }
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Optimizes prompts using DSPy.rb optimizers.
|
|
5
|
+
#
|
|
6
|
+
# This service coordinates the optimization process, delegating
|
|
7
|
+
# the actual optimization work to strategy classes.
|
|
8
|
+
#
|
|
9
|
+
# @example Optimize a prompt for a dataset
|
|
10
|
+
# optimizer = Leva::PromptOptimizer.new(dataset: dataset, mode: :medium)
|
|
11
|
+
# result = optimizer.optimize
|
|
12
|
+
# # => { system_prompt: "...", user_prompt: "...", metadata: {...} }
|
|
13
|
+
#
|
|
14
|
+
# @example With GEPA optimizer
|
|
15
|
+
# optimizer = Leva::PromptOptimizer.new(dataset: dataset, optimizer: :gepa, mode: :medium)
|
|
16
|
+
# result = optimizer.optimize
|
|
17
|
+
class PromptOptimizer
|
|
18
|
+
# Minimum number of examples required for optimization
|
|
19
|
+
MINIMUM_EXAMPLES = 10
|
|
20
|
+
|
|
21
|
+
# Available optimizers with their strategy classes
|
|
22
|
+
OPTIMIZERS = {
|
|
23
|
+
bootstrap: {
|
|
24
|
+
name: "Bootstrap",
|
|
25
|
+
strategy_class: Leva::Optimizers::Bootstrap,
|
|
26
|
+
gem: nil,
|
|
27
|
+
description: "Fast and simple. Automatically selects optimal few-shot examples from your dataset. " \
|
|
28
|
+
"Best for quick iteration and when you have limited data (10-50 examples). " \
|
|
29
|
+
"Does not modify instructions, only adds demonstrations."
|
|
30
|
+
},
|
|
31
|
+
gepa: {
|
|
32
|
+
name: "GEPA",
|
|
33
|
+
strategy_class: Leva::Optimizers::GepaOptimizer,
|
|
34
|
+
gem: "dspy-gepa",
|
|
35
|
+
description: "State-of-the-art optimizer using reflective prompt evolution. Uses LLM reflection " \
|
|
36
|
+
"to identify what works and propose improvements. Outperforms MIPROv2 by 10-14% " \
|
|
37
|
+
"while being more sample efficient. Best choice for maximum quality."
|
|
38
|
+
},
|
|
39
|
+
miprov2: {
|
|
40
|
+
name: "MIPROv2",
|
|
41
|
+
strategy_class: Leva::Optimizers::Miprov2Optimizer,
|
|
42
|
+
gem: "dspy-miprov2",
|
|
43
|
+
description: "Uses Bayesian optimization to search for optimal instructions and few-shot examples. " \
|
|
44
|
+
"Good for larger datasets (200+ examples). More computationally demanding but thorough. " \
|
|
45
|
+
"Can overfit on small datasets."
|
|
46
|
+
}
|
|
47
|
+
}.freeze
|
|
48
|
+
|
|
49
|
+
# Default optimizer
|
|
50
|
+
DEFAULT_OPTIMIZER = :bootstrap
|
|
51
|
+
|
|
52
|
+
# Optimization modes with their approximate durations
|
|
53
|
+
MODES = {
|
|
54
|
+
light: { description: "Fast optimization (~5 min)", trials: 5 },
|
|
55
|
+
medium: { description: "Balanced optimization (~15 min)", trials: 15 },
|
|
56
|
+
heavy: { description: "Thorough optimization (~30 min)", trials: 30 }
|
|
57
|
+
}.freeze
|
|
58
|
+
|
|
59
|
+
# Default model if none specified (fast and cheap)
|
|
60
|
+
DEFAULT_MODEL = "gemini-2.5-flash"
|
|
61
|
+
|
|
62
|
+
# Returns available models from RubyLLM.
|
|
63
|
+
# Results are cached for 5 minutes to avoid repeated expensive calls.
|
|
64
|
+
#
|
|
65
|
+
# @return [Array<RubyLLM::Model>] All available chat models
|
|
66
|
+
def self.available_models
|
|
67
|
+
Rails.cache.fetch("leva/available_models", expires_in: 5.minutes) do
|
|
68
|
+
RubyLLM.models.chat_models
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Finds a model by ID.
|
|
73
|
+
#
|
|
74
|
+
# @param model_id [String] The model ID to find
|
|
75
|
+
# @return [RubyLLM::Model, nil] The model or nil if not found
|
|
76
|
+
def self.find_model(model_id)
|
|
77
|
+
RubyLLM.models.find(model_id)
|
|
78
|
+
rescue RubyLLM::ModelNotFoundError
|
|
79
|
+
nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @return [Leva::Dataset] The dataset being optimized
|
|
83
|
+
attr_reader :dataset
|
|
84
|
+
|
|
85
|
+
# @return [Symbol] The optimization mode (:light, :medium, :heavy)
|
|
86
|
+
attr_reader :mode
|
|
87
|
+
|
|
88
|
+
# @return [String] The model to use for optimization
|
|
89
|
+
attr_reader :model
|
|
90
|
+
|
|
91
|
+
# @return [Symbol] The optimizer to use (:bootstrap, :gepa, :miprov2)
|
|
92
|
+
attr_reader :optimizer
|
|
93
|
+
|
|
94
|
+
# @param dataset [Leva::Dataset] The dataset to optimize for
|
|
95
|
+
# @param metric [Proc, nil] Custom evaluation metric (default: exact string match)
|
|
96
|
+
# @param mode [Symbol] Optimization intensity (:light, :medium, :heavy)
|
|
97
|
+
# @param model [String, nil] The model to use (default: DEFAULT_MODEL)
|
|
98
|
+
# @param optimizer [Symbol, String] The optimizer to use (default: :bootstrap)
|
|
99
|
+
# @param progress_callback [Proc, nil] Callback for progress updates
|
|
100
|
+
def initialize(dataset:, metric: nil, mode: :light, model: nil, optimizer: nil, progress_callback: nil)
|
|
101
|
+
@dataset = dataset
|
|
102
|
+
@metric = metric || default_metric
|
|
103
|
+
@mode = mode.to_sym
|
|
104
|
+
@model = model.presence || DEFAULT_MODEL
|
|
105
|
+
@optimizer = (optimizer.presence || DEFAULT_OPTIMIZER).to_sym
|
|
106
|
+
@progress_callback = progress_callback
|
|
107
|
+
@last_progress = nil
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Runs the optimization process.
|
|
111
|
+
#
|
|
112
|
+
# @return [Hash] Hash containing :system_prompt, :user_prompt, and :metadata
|
|
113
|
+
# @raise [Leva::InsufficientDataError] If dataset has too few records
|
|
114
|
+
# @raise [Leva::DspyConfigurationError] If DSPy is not configured
|
|
115
|
+
def optimize
|
|
116
|
+
report_progress(step: "validating", progress: 0)
|
|
117
|
+
validate_dataset!
|
|
118
|
+
validate_dspy_configuration!
|
|
119
|
+
validate_optimizer!
|
|
120
|
+
|
|
121
|
+
report_progress(step: "splitting_data", progress: 10)
|
|
122
|
+
splits = DatasetConverter.new(@dataset).split
|
|
123
|
+
|
|
124
|
+
report_progress(step: "generating_signature", progress: 20)
|
|
125
|
+
signature = SignatureGenerator.new(@dataset).generate
|
|
126
|
+
|
|
127
|
+
# Delegate to optimizer strategy
|
|
128
|
+
strategy = build_optimizer_strategy
|
|
129
|
+
result = strategy.optimize(splits, signature)
|
|
130
|
+
|
|
131
|
+
report_progress(step: "complete", progress: 100)
|
|
132
|
+
|
|
133
|
+
build_final_result(result, splits, strategy.optimizer_type)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Checks if the dataset is ready for optimization.
|
|
137
|
+
#
|
|
138
|
+
# @return [Boolean] True if the dataset can be optimized
|
|
139
|
+
def can_optimize?
|
|
140
|
+
@dataset.dataset_records.count >= MINIMUM_EXAMPLES
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Returns the number of additional records needed for optimization.
|
|
144
|
+
#
|
|
145
|
+
# @return [Integer] Number of records still needed (0 if ready)
|
|
146
|
+
def records_needed
|
|
147
|
+
[ MINIMUM_EXAMPLES - @dataset.dataset_records.count, 0 ].max
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Checks if a specific optimizer is available.
|
|
151
|
+
#
|
|
152
|
+
# @param optimizer_type [Symbol] The optimizer to check
|
|
153
|
+
# @return [Boolean] True if the optimizer is available
|
|
154
|
+
def self.optimizer_available?(optimizer_type)
|
|
155
|
+
optimizer_type = optimizer_type.to_sym
|
|
156
|
+
return true if optimizer_type == :bootstrap
|
|
157
|
+
|
|
158
|
+
case optimizer_type
|
|
159
|
+
when :gepa
|
|
160
|
+
!!defined?(DSPy::Teleprompt::GEPA)
|
|
161
|
+
when :miprov2
|
|
162
|
+
!!defined?(DSPy::Teleprompt::MIPROv2)
|
|
163
|
+
else
|
|
164
|
+
false
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
private
|
|
169
|
+
|
|
170
|
+
# Builds the optimizer strategy instance.
|
|
171
|
+
#
|
|
172
|
+
# @return [Leva::Optimizers::Base] The optimizer strategy
|
|
173
|
+
def build_optimizer_strategy
|
|
174
|
+
config = OPTIMIZERS[@optimizer]
|
|
175
|
+
config[:strategy_class].new(
|
|
176
|
+
model: @model,
|
|
177
|
+
metric: @metric,
|
|
178
|
+
mode: @mode,
|
|
179
|
+
progress_callback: @progress_callback
|
|
180
|
+
)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Builds the final result hash from optimization.
|
|
184
|
+
#
|
|
185
|
+
# @param result [Hash] The optimizer result with :instruction, :few_shot_examples, :score
|
|
186
|
+
# @param splits [Hash] The data splits
|
|
187
|
+
# @param optimizer_type [Symbol] The optimizer that was used
|
|
188
|
+
# @return [Hash] The formatted result
|
|
189
|
+
def build_final_result(result, splits, optimizer_type)
|
|
190
|
+
sample_record = @dataset.dataset_records.first&.recordable
|
|
191
|
+
input_fields = sample_record&.to_llm_context&.keys || []
|
|
192
|
+
|
|
193
|
+
formatted_examples = result[:few_shot_examples].map do |ex|
|
|
194
|
+
{ input: ex[:input], output: ex.dig(:expected, :output) }
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
{
|
|
198
|
+
system_prompt: result[:instruction],
|
|
199
|
+
user_prompt: build_user_prompt_template(input_fields),
|
|
200
|
+
metadata: {
|
|
201
|
+
optimization: {
|
|
202
|
+
score: result[:score],
|
|
203
|
+
mode: @mode.to_s,
|
|
204
|
+
optimizer: optimizer_type.to_s,
|
|
205
|
+
model: @model,
|
|
206
|
+
few_shot_examples: formatted_examples,
|
|
207
|
+
optimized_at: Time.current.iso8601,
|
|
208
|
+
dataset_size: @dataset.dataset_records.count,
|
|
209
|
+
train_size: splits[:train].size,
|
|
210
|
+
val_size: splits[:val].size,
|
|
211
|
+
test_size: splits[:test].size
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Reports progress to the callback if provided.
|
|
218
|
+
# Throttles updates to only report when progress changes by 5% or more.
|
|
219
|
+
#
|
|
220
|
+
# @param step [String] Current step name
|
|
221
|
+
# @param progress [Integer] Progress percentage (0-100)
|
|
222
|
+
# @param examples_processed [Integer, nil] Number of examples processed
|
|
223
|
+
# @param total [Integer, nil] Total examples to process
|
|
224
|
+
# @return [void]
|
|
225
|
+
def report_progress(step:, progress:, examples_processed: nil, total: nil)
|
|
226
|
+
return unless @progress_callback
|
|
227
|
+
|
|
228
|
+
# Skip if progress hasn't changed by at least 5%
|
|
229
|
+
return if @last_progress && (progress - @last_progress).abs < 5
|
|
230
|
+
|
|
231
|
+
@last_progress = progress
|
|
232
|
+
@progress_callback.call(
|
|
233
|
+
step: step,
|
|
234
|
+
progress: progress,
|
|
235
|
+
examples_processed: examples_processed,
|
|
236
|
+
total: total
|
|
237
|
+
)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Validates that the dataset has enough records.
|
|
241
|
+
#
|
|
242
|
+
# @raise [Leva::InsufficientDataError] If dataset has too few records
|
|
243
|
+
def validate_dataset!
|
|
244
|
+
count = @dataset.dataset_records.count
|
|
245
|
+
return if count >= MINIMUM_EXAMPLES
|
|
246
|
+
|
|
247
|
+
raise InsufficientDataError,
|
|
248
|
+
"Dataset needs at least #{MINIMUM_EXAMPLES} records for optimization, has #{count}"
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Validates that DSPy is properly configured.
|
|
252
|
+
#
|
|
253
|
+
# @raise [Leva::DspyConfigurationError] If DSPy is not configured
|
|
254
|
+
def validate_dspy_configuration!
|
|
255
|
+
unless defined?(DSPy) && defined?(DSPy::Predict)
|
|
256
|
+
raise DspyConfigurationError, "DSPy is not installed. Add 'dspy' gem to your Gemfile."
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Validates that the selected optimizer is available.
|
|
261
|
+
#
|
|
262
|
+
# @raise [Leva::DspyConfigurationError] If optimizer is not available
|
|
263
|
+
def validate_optimizer!
|
|
264
|
+
return if @optimizer == :bootstrap
|
|
265
|
+
return if self.class.optimizer_available?(@optimizer)
|
|
266
|
+
|
|
267
|
+
gem_name = OPTIMIZERS.dig(@optimizer, :gem)
|
|
268
|
+
raise DspyConfigurationError, <<~MSG.strip
|
|
269
|
+
#{@optimizer.to_s.upcase} optimizer is not available. Install it:
|
|
270
|
+
|
|
271
|
+
gem 'dspy'
|
|
272
|
+
gem '#{gem_name}'
|
|
273
|
+
|
|
274
|
+
Or set DSPY_WITH_#{@optimizer.to_s.upcase}=1 before requiring dspy.
|
|
275
|
+
MSG
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Returns the default evaluation metric (case-insensitive exact match).
|
|
279
|
+
# Handles both Hash examples and DSPy::Example objects.
|
|
280
|
+
#
|
|
281
|
+
# @return [Proc] The default metric function
|
|
282
|
+
def default_metric
|
|
283
|
+
lambda do |example, prediction|
|
|
284
|
+
# Handle both Hash and DSPy::Example
|
|
285
|
+
expected_output = if example.is_a?(Hash)
|
|
286
|
+
example.dig(:expected, :output)
|
|
287
|
+
else
|
|
288
|
+
# DSPy::Example has expected_values method to get Hash
|
|
289
|
+
example.expected_values[:output]
|
|
290
|
+
end
|
|
291
|
+
expected = expected_output.to_s.strip.downcase
|
|
292
|
+
actual = prediction.to_s.strip.downcase
|
|
293
|
+
expected == actual ? 1.0 : 0.0
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Builds the user prompt template with Liquid placeholders.
|
|
298
|
+
#
|
|
299
|
+
# @param input_fields [Array<Symbol>] The input field names
|
|
300
|
+
# @return [String] The user prompt template
|
|
301
|
+
def build_user_prompt_template(input_fields)
|
|
302
|
+
input_fields.map { |field| "{{ #{field} }}" }.join("\n\n")
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|