leva 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +55 -1
- data/app/assets/stylesheets/leva/application.css +165 -25
- data/app/controllers/leva/dataset_optimizations_controller.rb +64 -0
- data/app/controllers/leva/experiments_controller.rb +14 -6
- data/app/controllers/leva/workbench_controller.rb +26 -10
- data/app/helpers/leva/application_helper.rb +32 -16
- data/app/models/leva/dataset.rb +1 -0
- data/app/models/leva/experiment.rb +1 -0
- data/app/models/leva/optimization_run.rb +137 -0
- data/app/models/leva/prompt.rb +10 -0
- data/app/services/leva/class_loader.rb +37 -0
- data/app/services/leva/dataset_converter.rb +64 -0
- data/app/services/leva/optimizers/base.rb +183 -0
- data/app/services/leva/optimizers/bootstrap.rb +92 -0
- data/app/services/leva/optimizers/gepa_optimizer.rb +59 -0
- data/app/services/leva/optimizers/miprov2_optimizer.rb +52 -0
- data/app/services/leva/prompt_optimizer.rb +305 -0
- data/app/services/leva/signature_generator.rb +129 -0
- data/app/views/leva/datasets/show.html.erb +3 -0
- data/app/views/leva/experiments/_experiment.html.erb +9 -10
- data/app/views/leva/experiments/_form.html.erb +10 -0
- data/app/views/leva/experiments/index.html.erb +2 -1
- data/app/views/leva/experiments/show.html.erb +20 -21
- data/app/views/leva/optimization_runs/show.html.erb +698 -0
- data/app/views/leva/runner_results/show.html.erb +18 -48
- data/app/views/leva/workbench/_results_section.html.erb +3 -11
- data/db/migrate/20241204000001_create_leva_optimization_runs.rb +25 -0
- data/lib/generators/leva/templates/eval.rb.erb +4 -2
- data/lib/leva/errors.rb +18 -0
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +1 -0
- metadata +16 -3
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Tracks the progress and status of prompt optimization runs.
|
|
5
|
+
#
|
|
6
|
+
# @example Create and track an optimization run
|
|
7
|
+
# run = OptimizationRun.create!(
|
|
8
|
+
# dataset: dataset,
|
|
9
|
+
# prompt_name: "My Optimized Prompt",
|
|
10
|
+
# mode: "light"
|
|
11
|
+
# )
|
|
12
|
+
# run.start!
|
|
13
|
+
# run.update_progress(step: "bootstrapping", progress: 50, examples_processed: 5)
|
|
14
|
+
# run.complete!(prompt)
|
|
15
|
+
class OptimizationRun < ApplicationRecord
|
|
16
|
+
self.table_name = "leva_optimization_runs"
|
|
17
|
+
|
|
18
|
+
belongs_to :dataset
|
|
19
|
+
belongs_to :prompt, optional: true
|
|
20
|
+
|
|
21
|
+
enum :status, {
|
|
22
|
+
pending: "pending",
|
|
23
|
+
running: "running",
|
|
24
|
+
completed: "completed",
|
|
25
|
+
failed: "failed"
|
|
26
|
+
}, default: :pending
|
|
27
|
+
|
|
28
|
+
validates :prompt_name, presence: true, length: { maximum: 255 }
|
|
29
|
+
validates :mode, presence: true, inclusion: { in: %w[light medium heavy] }
|
|
30
|
+
validates :model, presence: true
|
|
31
|
+
validates :optimizer, inclusion: { in: PromptOptimizer::OPTIMIZERS.keys.map(&:to_s) }
|
|
32
|
+
validates :progress, numericality: { in: 0..100 }
|
|
33
|
+
|
|
34
|
+
# Defined optimization steps for display
|
|
35
|
+
STEPS = {
|
|
36
|
+
"validating" => { label: "Validating dataset", icon: "check" },
|
|
37
|
+
"splitting_data" => { label: "Splitting data", icon: "scissors" },
|
|
38
|
+
"generating_signature" => { label: "Generating signature", icon: "code" },
|
|
39
|
+
"bootstrapping" => { label: "Bootstrapping examples", icon: "zap" },
|
|
40
|
+
"evaluating" => { label: "Evaluating results", icon: "bar-chart" },
|
|
41
|
+
"building_result" => { label: "Building prompt", icon: "package" },
|
|
42
|
+
"complete" => { label: "Complete", icon: "check-circle" }
|
|
43
|
+
}.freeze
|
|
44
|
+
|
|
45
|
+
# Marks the run as started.
|
|
46
|
+
#
|
|
47
|
+
# @return [void]
|
|
48
|
+
def start!
|
|
49
|
+
update!(status: :running, current_step: "validating", progress: 0)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Updates the progress of the optimization run.
|
|
53
|
+
#
|
|
54
|
+
# @param step [String] Current step name
|
|
55
|
+
# @param progress [Integer] Progress percentage (0-100)
|
|
56
|
+
# @param examples_processed [Integer, nil] Number of examples processed
|
|
57
|
+
# @param total [Integer, nil] Total examples to process
|
|
58
|
+
# @return [void]
|
|
59
|
+
def update_progress(step:, progress:, examples_processed: nil, total: nil)
|
|
60
|
+
attrs = { current_step: step, progress: progress }
|
|
61
|
+
attrs[:examples_processed] = examples_processed if examples_processed
|
|
62
|
+
attrs[:total_examples] = total if total
|
|
63
|
+
update!(attrs)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Marks the run as completed with the created prompt.
|
|
67
|
+
#
|
|
68
|
+
# @param created_prompt [Leva::Prompt] The optimized prompt
|
|
69
|
+
# @return [void]
|
|
70
|
+
def complete!(created_prompt)
|
|
71
|
+
update!(
|
|
72
|
+
status: :completed,
|
|
73
|
+
prompt: created_prompt,
|
|
74
|
+
current_step: "complete",
|
|
75
|
+
progress: 100
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Marks the run as failed.
|
|
80
|
+
#
|
|
81
|
+
# @param error [String, Exception] The error message or exception
|
|
82
|
+
# @return [void]
|
|
83
|
+
def fail!(error)
|
|
84
|
+
message = error.is_a?(Exception) ? "#{error.class}: #{error.message}" : error.to_s
|
|
85
|
+
update!(status: :failed, error_message: message)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Returns the human-readable label for the current step.
|
|
89
|
+
#
|
|
90
|
+
# @return [String]
|
|
91
|
+
def current_step_label
|
|
92
|
+
STEPS.dig(current_step, :label) || current_step&.humanize || "Initializing"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Returns elapsed time since the run started.
|
|
96
|
+
#
|
|
97
|
+
# @return [ActiveSupport::Duration, nil]
|
|
98
|
+
def elapsed_time
|
|
99
|
+
return nil unless running? || completed? || failed?
|
|
100
|
+
|
|
101
|
+
(completed? || failed? ? updated_at : Time.current) - created_at
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Formats elapsed time for display.
|
|
105
|
+
#
|
|
106
|
+
# @return [String]
|
|
107
|
+
def elapsed_time_formatted
|
|
108
|
+
seconds = elapsed_time&.to_i || 0
|
|
109
|
+
if seconds < 60
|
|
110
|
+
"#{seconds}s"
|
|
111
|
+
elsif seconds < 3600
|
|
112
|
+
"#{seconds / 60}m #{seconds % 60}s"
|
|
113
|
+
else
|
|
114
|
+
"#{seconds / 3600}h #{(seconds % 3600) / 60}m"
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Returns a hash for JSON API response.
|
|
119
|
+
#
|
|
120
|
+
# @return [Hash]
|
|
121
|
+
def as_json(options = {})
|
|
122
|
+
{
|
|
123
|
+
id: id,
|
|
124
|
+
status: status,
|
|
125
|
+
current_step: current_step,
|
|
126
|
+
current_step_label: current_step_label,
|
|
127
|
+
progress: progress,
|
|
128
|
+
examples_processed: examples_processed,
|
|
129
|
+
total_examples: total_examples,
|
|
130
|
+
elapsed_time: elapsed_time_formatted,
|
|
131
|
+
error_message: error_message,
|
|
132
|
+
prompt_id: prompt_id,
|
|
133
|
+
prompt_name: prompt_name
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
data/app/models/leva/prompt.rb
CHANGED
|
@@ -26,5 +26,15 @@ module Leva
|
|
|
26
26
|
self.version ||= 0
|
|
27
27
|
self.version += 1
|
|
28
28
|
end
|
|
29
|
+
|
|
30
|
+
# @return [Boolean] true if this prompt was generated by an optimizer
|
|
31
|
+
def optimized?
|
|
32
|
+
metadata&.dig("optimization").present?
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @return [String, nil] the optimizer used (e.g., "bootstrap", "mipro")
|
|
36
|
+
def optimizer_name
|
|
37
|
+
metadata&.dig("optimization", "optimizer")
|
|
38
|
+
end
|
|
29
39
|
end
|
|
30
40
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Service for loading evaluator and runner classes from the application.
|
|
5
|
+
#
|
|
6
|
+
# This service dynamically loads classes from the app/evals and app/runners
|
|
7
|
+
# directories that inherit from their respective base classes.
|
|
8
|
+
class ClassLoader
|
|
9
|
+
# Loads all evaluator classes that inherit from Leva::BaseEval
|
|
10
|
+
#
|
|
11
|
+
# @return [Array<Class>] An array of evaluator classes
|
|
12
|
+
def self.evaluators
|
|
13
|
+
load_classes_from_directory("app/evals", Leva::BaseEval)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Loads all runner classes that inherit from Leva::BaseRun
|
|
17
|
+
#
|
|
18
|
+
# @return [Array<Class>] An array of runner classes
|
|
19
|
+
def self.runners
|
|
20
|
+
load_classes_from_directory("app/runners", Leva::BaseRun)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Loads classes from a specified directory that inherit from a given base class
|
|
24
|
+
#
|
|
25
|
+
# @param directory [String] The directory path to load classes from
|
|
26
|
+
# @param base_class [Class] The base class that loaded classes should inherit from
|
|
27
|
+
# @return [Array<Class>] An array of loaded classes
|
|
28
|
+
def self.load_classes_from_directory(directory, base_class)
|
|
29
|
+
classes = Dir[Rails.root.join(directory, "*.rb")].map do |file|
|
|
30
|
+
File.basename(file, ".rb").camelize.constantize
|
|
31
|
+
end.select { |klass| klass < base_class }
|
|
32
|
+
classes.empty? ? [] : classes
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private_class_method :load_classes_from_directory
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
# Converts Leva datasets to DSPy example format.
|
|
5
|
+
#
|
|
6
|
+
# This service transforms DatasetRecord objects into DSPy::Example objects
|
|
7
|
+
# suitable for use with DSPy optimizers and predictors.
|
|
8
|
+
#
|
|
9
|
+
# @example Convert a dataset to DSPy examples
|
|
10
|
+
# converter = Leva::DatasetConverter.new(dataset)
|
|
11
|
+
# examples = converter.to_dspy_examples
|
|
12
|
+
#
|
|
13
|
+
# @example Split dataset for training
|
|
14
|
+
# converter = Leva::DatasetConverter.new(dataset)
|
|
15
|
+
# splits = converter.split(train_ratio: 0.6, val_ratio: 0.2)
|
|
16
|
+
# # => { train: [...], val: [...], test: [...] }
|
|
17
|
+
class DatasetConverter
|
|
18
|
+
# @param dataset [Leva::Dataset] The dataset to convert
|
|
19
|
+
def initialize(dataset)
|
|
20
|
+
@dataset = dataset
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Converts all dataset records to DSPy example format.
|
|
24
|
+
#
|
|
25
|
+
# @return [Array<Hash>] Array of example hashes with :input and :expected keys
|
|
26
|
+
def to_dspy_examples
|
|
27
|
+
@dataset.dataset_records.includes(:recordable).map do |record|
|
|
28
|
+
next unless record.recordable
|
|
29
|
+
|
|
30
|
+
{
|
|
31
|
+
input: record.recordable.to_llm_context,
|
|
32
|
+
expected: { output: record.recordable.ground_truth }
|
|
33
|
+
}
|
|
34
|
+
end.compact
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Splits the dataset into train, validation, and test sets.
|
|
38
|
+
#
|
|
39
|
+
# @param train_ratio [Float] Proportion of data for training (default: 0.6)
|
|
40
|
+
# @param val_ratio [Float] Proportion of data for validation (default: 0.2)
|
|
41
|
+
# @param seed [Integer, nil] Random seed for reproducibility
|
|
42
|
+
# @return [Hash] Hash with :train, :val, and :test arrays
|
|
43
|
+
def split(train_ratio: 0.6, val_ratio: 0.2, seed: nil)
|
|
44
|
+
examples = to_dspy_examples
|
|
45
|
+
examples = seed ? examples.shuffle(random: Random.new(seed)) : examples.shuffle
|
|
46
|
+
|
|
47
|
+
train_size = (examples.size * train_ratio).to_i
|
|
48
|
+
val_size = (examples.size * val_ratio).to_i
|
|
49
|
+
|
|
50
|
+
{
|
|
51
|
+
train: examples[0...train_size],
|
|
52
|
+
val: examples[train_size...(train_size + val_size)],
|
|
53
|
+
test: examples[(train_size + val_size)..]
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns the count of valid records in the dataset.
|
|
58
|
+
#
|
|
59
|
+
# @return [Integer] Number of records with valid recordable objects
|
|
60
|
+
def valid_record_count
|
|
61
|
+
to_dspy_examples.size
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# Base class for optimization strategies.
|
|
6
|
+
#
|
|
7
|
+
# Each optimizer implements a different approach to finding optimal
|
|
8
|
+
# prompt instructions and few-shot examples.
|
|
9
|
+
#
|
|
10
|
+
# @abstract Subclass and override {#compile} to implement a strategy
|
|
11
|
+
class Base
|
|
12
|
+
# @return [String] The model identifier
|
|
13
|
+
attr_reader :model
|
|
14
|
+
|
|
15
|
+
# @return [Proc] The evaluation metric
|
|
16
|
+
attr_reader :metric
|
|
17
|
+
|
|
18
|
+
# @return [Proc, nil] Progress callback
|
|
19
|
+
attr_reader :progress_callback
|
|
20
|
+
|
|
21
|
+
# @return [Symbol] The optimization mode
|
|
22
|
+
attr_reader :mode
|
|
23
|
+
|
|
24
|
+
# @param model [String] The model to use for optimization
|
|
25
|
+
# @param metric [Proc] The evaluation metric
|
|
26
|
+
# @param mode [Symbol] Optimization intensity (:light, :medium, :heavy)
|
|
27
|
+
# @param progress_callback [Proc, nil] Callback for progress updates
|
|
28
|
+
def initialize(model:, metric:, mode:, progress_callback: nil)
|
|
29
|
+
@model = model
|
|
30
|
+
@metric = metric
|
|
31
|
+
@mode = mode
|
|
32
|
+
@progress_callback = progress_callback
|
|
33
|
+
@last_progress = nil
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Runs the optimization and returns results.
|
|
37
|
+
#
|
|
38
|
+
# @param splits [Hash] The train/val/test splits
|
|
39
|
+
# @param signature [Class] The generated DSPy signature class
|
|
40
|
+
# @return [Hash] Hash with :instruction, :few_shot_examples, :score
|
|
41
|
+
# @raise [Leva::OptimizationError] If optimization fails
|
|
42
|
+
def optimize(splits, signature)
|
|
43
|
+
train_examples = splits[:train]
|
|
44
|
+
val_examples = splits[:val]
|
|
45
|
+
|
|
46
|
+
report_progress(step: step_name, progress: 30, examples_processed: 0, total: train_examples.size)
|
|
47
|
+
|
|
48
|
+
predictor = DSPy::Predict.new(signature)
|
|
49
|
+
predictor.config.lm = create_lm
|
|
50
|
+
|
|
51
|
+
result = compile(predictor, train_examples, val_examples, signature)
|
|
52
|
+
|
|
53
|
+
report_progress(step: "evaluating", progress: 85)
|
|
54
|
+
|
|
55
|
+
instruction = result[:instruction_override] || extract_instruction(result[:optimized], signature)
|
|
56
|
+
score = evaluate(result[:optimized] || predictor, val_examples)
|
|
57
|
+
|
|
58
|
+
report_progress(step: "building_result", progress: 95)
|
|
59
|
+
|
|
60
|
+
{
|
|
61
|
+
instruction: instruction,
|
|
62
|
+
few_shot_examples: result[:few_shot_examples] || [],
|
|
63
|
+
score: score
|
|
64
|
+
}
|
|
65
|
+
rescue StandardError => e
|
|
66
|
+
Rails.logger.error "[Leva::Optimizers::#{self.class.name.demodulize}] Optimization failed: #{e.message}"
|
|
67
|
+
Rails.logger.error e.backtrace.first(5).join("\n")
|
|
68
|
+
raise Leva::OptimizationError, "#{optimizer_name} optimization failed: #{e.message}"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# The name used in progress reporting.
|
|
72
|
+
# @return [String]
|
|
73
|
+
def step_name
|
|
74
|
+
raise NotImplementedError, "Subclasses must implement #step_name"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Human-readable optimizer name.
|
|
78
|
+
# @return [String]
|
|
79
|
+
def optimizer_name
|
|
80
|
+
raise NotImplementedError, "Subclasses must implement #optimizer_name"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @return [Symbol] The optimizer type symbol
|
|
84
|
+
def optimizer_type
|
|
85
|
+
raise NotImplementedError, "Subclasses must implement #optimizer_type"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
protected
|
|
89
|
+
|
|
90
|
+
# Performs the actual optimization logic.
|
|
91
|
+
#
|
|
92
|
+
# @param predictor [DSPy::Predict] The base predictor
|
|
93
|
+
# @param train_examples [Array<Hash>] Training examples
|
|
94
|
+
# @param val_examples [Array<Hash>] Validation examples
|
|
95
|
+
# @param signature [Class] The signature class
|
|
96
|
+
# @return [Hash] Hash with :optimized (predictor), :few_shot_examples, :instruction_override (optional)
|
|
97
|
+
def compile(predictor, train_examples, val_examples, signature)
|
|
98
|
+
raise NotImplementedError, "Subclasses must implement #compile"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Converts examples to DSPy::Example format.
|
|
102
|
+
#
|
|
103
|
+
# @param examples [Array<Hash>] Examples with :input and :expected keys
|
|
104
|
+
# @param signature [Class] The DSPy signature class
|
|
105
|
+
# @return [Array<DSPy::Example>]
|
|
106
|
+
def to_dspy_examples(examples, signature)
|
|
107
|
+
examples.map do |ex|
|
|
108
|
+
DSPy::Example.new(
|
|
109
|
+
signature_class: signature,
|
|
110
|
+
input: ex[:input],
|
|
111
|
+
expected: ex[:expected]
|
|
112
|
+
)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Creates an LM instance for this optimizer.
|
|
117
|
+
# Prepends ruby_llm/ prefix for DSPy adapter.
|
|
118
|
+
# RubyLLM handles API keys from its configuration.
|
|
119
|
+
#
|
|
120
|
+
# @return [DSPy::LM]
|
|
121
|
+
def create_lm
|
|
122
|
+
DSPy::LM.new("ruby_llm/#{model}")
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Reports progress to the callback if provided.
|
|
126
|
+
# Throttles updates to only report when progress changes by 5% or more.
|
|
127
|
+
def report_progress(step:, progress:, examples_processed: nil, total: nil)
|
|
128
|
+
return unless progress_callback
|
|
129
|
+
|
|
130
|
+
# Skip if progress hasn't changed by at least 5%
|
|
131
|
+
return if @last_progress && (progress - @last_progress).abs < 5
|
|
132
|
+
|
|
133
|
+
@last_progress = progress
|
|
134
|
+
progress_callback.call(
|
|
135
|
+
step: step,
|
|
136
|
+
progress: progress,
|
|
137
|
+
examples_processed: examples_processed,
|
|
138
|
+
total: total
|
|
139
|
+
)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private
|
|
143
|
+
|
|
144
|
+
# Extracts instruction from an optimized predictor.
|
|
145
|
+
def extract_instruction(optimized, signature)
|
|
146
|
+
return signature.description unless optimized
|
|
147
|
+
|
|
148
|
+
if optimized.respond_to?(:instruction)
|
|
149
|
+
optimized.instruction
|
|
150
|
+
elsif optimized.respond_to?(:signature) && optimized.signature.respond_to?(:instructions)
|
|
151
|
+
optimized.signature.instructions
|
|
152
|
+
else
|
|
153
|
+
signature.description
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Evaluates predictor on validation examples.
|
|
158
|
+
# Handles both Hash examples and DSPy::Example objects.
|
|
159
|
+
def evaluate(predictor, val_examples)
|
|
160
|
+
return 0.0 if val_examples.empty?
|
|
161
|
+
|
|
162
|
+
correct = val_examples.count do |example|
|
|
163
|
+
# Handle both Hash and DSPy::Example
|
|
164
|
+
if example.is_a?(Hash)
|
|
165
|
+
input = example[:input]
|
|
166
|
+
expected_output = example.dig(:expected, :output)
|
|
167
|
+
else
|
|
168
|
+
# DSPy::Example has input_values/expected_values methods
|
|
169
|
+
input = example.input_values
|
|
170
|
+
expected_output = example.expected_values[:output]
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
prediction = predictor.call(**input)
|
|
174
|
+
actual = prediction.output.to_s.strip.downcase
|
|
175
|
+
expected = expected_output.to_s.strip.downcase
|
|
176
|
+
actual == expected
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
correct.to_f / val_examples.size
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# Bootstrap optimization strategy.
|
|
6
|
+
#
|
|
7
|
+
# Uses simple few-shot bootstrapping to find the best examples
|
|
8
|
+
# that improve model performance. Fast and has no extra dependencies.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# optimizer = Leva::Optimizers::Bootstrap.new(
|
|
12
|
+
# model: "gemini-2.5-flash",
|
|
13
|
+
# metric: my_metric,
|
|
14
|
+
# mode: :medium
|
|
15
|
+
# )
|
|
16
|
+
# result = optimizer.optimize(splits, signature)
|
|
17
|
+
class Bootstrap < Base
|
|
18
|
+
MODES = {
|
|
19
|
+
light: { trials: 5 },
|
|
20
|
+
medium: { trials: 15 },
|
|
21
|
+
heavy: { trials: 30 }
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
def step_name
|
|
25
|
+
"bootstrapping"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def optimizer_name
|
|
29
|
+
"Bootstrap"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def optimizer_type
|
|
33
|
+
:bootstrap
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
protected
|
|
37
|
+
|
|
38
|
+
def compile(predictor, train_examples, _val_examples, signature)
|
|
39
|
+
best_examples = bootstrap_few_shot_examples(predictor, train_examples)
|
|
40
|
+
instruction = generate_optimized_instruction(best_examples, signature)
|
|
41
|
+
|
|
42
|
+
{
|
|
43
|
+
optimized: nil,
|
|
44
|
+
few_shot_examples: best_examples,
|
|
45
|
+
instruction_override: instruction
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# Bootstraps few-shot examples by evaluating which examples help the model most.
|
|
52
|
+
def bootstrap_few_shot_examples(predictor, examples)
|
|
53
|
+
max_examples = MODES[mode][:trials].clamp(3, 8)
|
|
54
|
+
|
|
55
|
+
scored_examples = examples.each_with_index.map do |example, index|
|
|
56
|
+
prediction = predictor.call(**example[:input])
|
|
57
|
+
actual_output = prediction.output.to_s.strip.downcase
|
|
58
|
+
expected_output = example.dig(:expected, :output).to_s.strip.downcase
|
|
59
|
+
|
|
60
|
+
score = actual_output == expected_output ? 1.0 : 0.0
|
|
61
|
+
|
|
62
|
+
# Progress: 30% + (index / total * 50%)
|
|
63
|
+
progress = 30 + ((index + 1).to_f / examples.size * 50).to_i
|
|
64
|
+
report_progress(
|
|
65
|
+
step: step_name,
|
|
66
|
+
progress: progress,
|
|
67
|
+
examples_processed: index + 1,
|
|
68
|
+
total: examples.size
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
{ example: example, score: score }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
scored_examples.sort_by { |e| -e[:score] }.take(max_examples).map { |e| e[:example] }
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Generates instruction based on output patterns.
|
|
78
|
+
def generate_optimized_instruction(examples, signature)
|
|
79
|
+
return signature.description if examples.empty?
|
|
80
|
+
|
|
81
|
+
outputs = examples.map { |e| e.dig(:expected, :output) }.compact.uniq
|
|
82
|
+
|
|
83
|
+
if outputs.size <= 5
|
|
84
|
+
# Classification task
|
|
85
|
+
"#{signature.description} Respond with one of: #{outputs.join(', ')}."
|
|
86
|
+
else
|
|
87
|
+
signature.description
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# GEPA (Genetic-Pareto) optimization strategy.
|
|
6
|
+
#
|
|
7
|
+
# Uses reflective prompt evolution with genetic algorithms
|
|
8
|
+
# and Pareto optimization for best quality results.
|
|
9
|
+
#
|
|
10
|
+
# Requires the dspy-gepa gem.
|
|
11
|
+
class GepaOptimizer < Base
|
|
12
|
+
def step_name
|
|
13
|
+
"gepa_optimizing"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def optimizer_name
|
|
17
|
+
"GEPA"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def optimizer_type
|
|
21
|
+
:gepa
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
protected
|
|
25
|
+
|
|
26
|
+
def compile(predictor, train_examples, val_examples, signature)
|
|
27
|
+
lm = create_lm
|
|
28
|
+
predictor.config.lm = lm
|
|
29
|
+
|
|
30
|
+
# Build config based on mode intensity
|
|
31
|
+
gepa_config = case mode
|
|
32
|
+
when :light
|
|
33
|
+
{ max_metric_calls: 16, minibatch_size: 2 }
|
|
34
|
+
when :medium
|
|
35
|
+
{ max_metric_calls: 32, minibatch_size: 2 }
|
|
36
|
+
when :heavy
|
|
37
|
+
{ max_metric_calls: 64, minibatch_size: 4 }
|
|
38
|
+
else
|
|
39
|
+
{ max_metric_calls: 16, minibatch_size: 2 }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
gepa = DSPy::Teleprompt::GEPA.new(
|
|
43
|
+
metric: metric,
|
|
44
|
+
reflection_lm: DSPy::ReflectionLM.new("ruby_llm/#{model}"),
|
|
45
|
+
config: gepa_config
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
trainset = to_dspy_examples(train_examples, signature)
|
|
49
|
+
valset = to_dspy_examples(val_examples, signature)
|
|
50
|
+
|
|
51
|
+
report_progress(step: "gepa_compiling", progress: 50)
|
|
52
|
+
|
|
53
|
+
result = gepa.compile(predictor, trainset: trainset, valset: valset)
|
|
54
|
+
|
|
55
|
+
{ optimized: result.optimized_program, few_shot_examples: [] }
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Leva
|
|
4
|
+
module Optimizers
|
|
5
|
+
# MIPROv2 optimization strategy.
|
|
6
|
+
#
|
|
7
|
+
# Uses Bayesian optimization with Gaussian Processes
|
|
8
|
+
# for efficient prompt search.
|
|
9
|
+
#
|
|
10
|
+
# Requires the dspy-miprov2 gem.
|
|
11
|
+
class Miprov2Optimizer < Base
|
|
12
|
+
def step_name
|
|
13
|
+
"miprov2_optimizing"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def optimizer_name
|
|
17
|
+
"MIPROv2"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def optimizer_type
|
|
21
|
+
:miprov2
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
protected
|
|
25
|
+
|
|
26
|
+
def compile(predictor, train_examples, val_examples, signature)
|
|
27
|
+
predictor.config.lm = create_lm
|
|
28
|
+
|
|
29
|
+
# Use AutoMode helpers for preset configurations
|
|
30
|
+
mipro = case mode
|
|
31
|
+
when :light
|
|
32
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.light(metric: metric)
|
|
33
|
+
when :medium
|
|
34
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.medium(metric: metric)
|
|
35
|
+
when :heavy
|
|
36
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.heavy(metric: metric)
|
|
37
|
+
else
|
|
38
|
+
DSPy::Teleprompt::MIPROv2::AutoMode.light(metric: metric)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
trainset = to_dspy_examples(train_examples, signature)
|
|
42
|
+
valset = to_dspy_examples(val_examples, signature)
|
|
43
|
+
|
|
44
|
+
report_progress(step: "miprov2_compiling", progress: 50)
|
|
45
|
+
|
|
46
|
+
result = mipro.compile(predictor, trainset: trainset, valset: valset)
|
|
47
|
+
|
|
48
|
+
{ optimized: result.optimized_program, few_shot_examples: [] }
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|