dspy 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -382
- data/lib/dspy/chain_of_thought.rb +57 -0
- data/lib/dspy/evaluate.rb +554 -0
- data/lib/dspy/example.rb +203 -0
- data/lib/dspy/few_shot_example.rb +81 -0
- data/lib/dspy/instrumentation.rb +97 -8
- data/lib/dspy/lm/adapter_factory.rb +6 -8
- data/lib/dspy/lm.rb +5 -7
- data/lib/dspy/predict.rb +32 -34
- data/lib/dspy/prompt.rb +222 -0
- data/lib/dspy/propose/grounded_proposer.rb +560 -0
- data/lib/dspy/registry/registry_manager.rb +504 -0
- data/lib/dspy/registry/signature_registry.rb +725 -0
- data/lib/dspy/storage/program_storage.rb +442 -0
- data/lib/dspy/storage/storage_manager.rb +331 -0
- data/lib/dspy/subscribers/langfuse_subscriber.rb +669 -0
- data/lib/dspy/subscribers/logger_subscriber.rb +120 -0
- data/lib/dspy/subscribers/newrelic_subscriber.rb +686 -0
- data/lib/dspy/subscribers/otel_subscriber.rb +538 -0
- data/lib/dspy/teleprompt/data_handler.rb +107 -0
- data/lib/dspy/teleprompt/mipro_v2.rb +790 -0
- data/lib/dspy/teleprompt/simple_optimizer.rb +497 -0
- data/lib/dspy/teleprompt/teleprompter.rb +336 -0
- data/lib/dspy/teleprompt/utils.rb +380 -0
- data/lib/dspy/version.rb +5 -0
- data/lib/dspy.rb +16 -0
- metadata +29 -12
- data/lib/dspy/lm/adapters/ruby_llm_adapter.rb +0 -81
@@ -0,0 +1,790 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'digest'
|
4
|
+
require 'sorbet-runtime'
|
5
|
+
require_relative 'teleprompter'
|
6
|
+
require_relative 'utils'
|
7
|
+
require_relative '../propose/grounded_proposer'
|
8
|
+
|
9
|
+
module DSPy
|
10
|
+
module Teleprompt
|
11
|
+
# MIPROv2: Multi-prompt Instruction Proposal with Retrieval Optimization
|
12
|
+
# State-of-the-art prompt optimization combining bootstrap sampling,
|
13
|
+
# instruction generation, and Bayesian optimization
|
14
|
+
class MIPROv2 < Teleprompter
|
15
|
+
extend T::Sig
|
16
|
+
|
17
|
+
# Auto-configuration modes for different optimization needs
|
18
|
+
module AutoMode
|
19
|
+
extend T::Sig
|
20
|
+
|
21
|
+
sig { returns(MIPROv2) }
|
22
|
+
def self.light
|
23
|
+
config = MIPROv2Config.new
|
24
|
+
config.num_trials = 6
|
25
|
+
config.num_instruction_candidates = 3
|
26
|
+
config.max_bootstrapped_examples = 2
|
27
|
+
config.max_labeled_examples = 8
|
28
|
+
config.bootstrap_sets = 3
|
29
|
+
config.optimization_strategy = "greedy"
|
30
|
+
config.early_stopping_patience = 2
|
31
|
+
MIPROv2.new(config: config)
|
32
|
+
end
|
33
|
+
|
34
|
+
sig { returns(MIPROv2) }
|
35
|
+
def self.medium
|
36
|
+
config = MIPROv2Config.new
|
37
|
+
config.num_trials = 12
|
38
|
+
config.num_instruction_candidates = 5
|
39
|
+
config.max_bootstrapped_examples = 4
|
40
|
+
config.max_labeled_examples = 16
|
41
|
+
config.bootstrap_sets = 5
|
42
|
+
config.optimization_strategy = "adaptive"
|
43
|
+
config.early_stopping_patience = 3
|
44
|
+
MIPROv2.new(config: config)
|
45
|
+
end
|
46
|
+
|
47
|
+
sig { returns(MIPROv2) }
|
48
|
+
def self.heavy
|
49
|
+
config = MIPROv2Config.new
|
50
|
+
config.num_trials = 18
|
51
|
+
config.num_instruction_candidates = 8
|
52
|
+
config.max_bootstrapped_examples = 6
|
53
|
+
config.max_labeled_examples = 24
|
54
|
+
config.bootstrap_sets = 8
|
55
|
+
config.optimization_strategy = "bayesian"
|
56
|
+
config.early_stopping_patience = 5
|
57
|
+
MIPROv2.new(config: config)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Configuration for MIPROv2 optimization
|
62
|
+
class MIPROv2Config < Config
|
63
|
+
extend T::Sig
|
64
|
+
|
65
|
+
sig { returns(Integer) }
|
66
|
+
attr_accessor :num_trials
|
67
|
+
|
68
|
+
sig { returns(Integer) }
|
69
|
+
attr_accessor :num_instruction_candidates
|
70
|
+
|
71
|
+
sig { returns(Integer) }
|
72
|
+
attr_accessor :bootstrap_sets
|
73
|
+
|
74
|
+
sig { returns(String) }
|
75
|
+
attr_accessor :optimization_strategy
|
76
|
+
|
77
|
+
sig { returns(Float) }
|
78
|
+
attr_accessor :init_temperature
|
79
|
+
|
80
|
+
sig { returns(Float) }
|
81
|
+
attr_accessor :final_temperature
|
82
|
+
|
83
|
+
sig { returns(Integer) }
|
84
|
+
attr_accessor :early_stopping_patience
|
85
|
+
|
86
|
+
sig { returns(T::Boolean) }
|
87
|
+
attr_accessor :use_bayesian_optimization
|
88
|
+
|
89
|
+
sig { returns(T::Boolean) }
|
90
|
+
attr_accessor :track_diversity
|
91
|
+
|
92
|
+
sig { returns(DSPy::Propose::GroundedProposer::Config) }
|
93
|
+
attr_accessor :proposer_config
|
94
|
+
|
95
|
+
sig { void }
|
96
|
+
def initialize
|
97
|
+
super
|
98
|
+
@num_trials = 12
|
99
|
+
@num_instruction_candidates = 5
|
100
|
+
@bootstrap_sets = 5
|
101
|
+
@optimization_strategy = "adaptive" # greedy, adaptive, bayesian
|
102
|
+
@init_temperature = 1.0
|
103
|
+
@final_temperature = 0.1
|
104
|
+
@early_stopping_patience = 3
|
105
|
+
@use_bayesian_optimization = true
|
106
|
+
@track_diversity = true
|
107
|
+
@proposer_config = DSPy::Propose::GroundedProposer::Config.new
|
108
|
+
end
|
109
|
+
|
110
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
111
|
+
def to_h
|
112
|
+
super.merge({
|
113
|
+
num_trials: @num_trials,
|
114
|
+
num_instruction_candidates: @num_instruction_candidates,
|
115
|
+
bootstrap_sets: @bootstrap_sets,
|
116
|
+
optimization_strategy: @optimization_strategy,
|
117
|
+
init_temperature: @init_temperature,
|
118
|
+
final_temperature: @final_temperature,
|
119
|
+
early_stopping_patience: @early_stopping_patience,
|
120
|
+
use_bayesian_optimization: @use_bayesian_optimization,
|
121
|
+
track_diversity: @track_diversity
|
122
|
+
})
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Candidate configuration for optimization trials
|
127
|
+
class CandidateConfig
|
128
|
+
extend T::Sig
|
129
|
+
|
130
|
+
sig { returns(String) }
|
131
|
+
attr_reader :instruction
|
132
|
+
|
133
|
+
sig { returns(T::Array[T.untyped]) }
|
134
|
+
attr_reader :few_shot_examples
|
135
|
+
|
136
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
137
|
+
attr_reader :metadata
|
138
|
+
|
139
|
+
sig { returns(String) }
|
140
|
+
attr_reader :config_id
|
141
|
+
|
142
|
+
sig do
|
143
|
+
params(
|
144
|
+
instruction: String,
|
145
|
+
few_shot_examples: T::Array[T.untyped],
|
146
|
+
metadata: T::Hash[Symbol, T.untyped]
|
147
|
+
).void
|
148
|
+
end
|
149
|
+
def initialize(instruction:, few_shot_examples:, metadata: {})
|
150
|
+
@instruction = instruction
|
151
|
+
@few_shot_examples = few_shot_examples
|
152
|
+
@metadata = metadata.freeze
|
153
|
+
@config_id = generate_config_id
|
154
|
+
end
|
155
|
+
|
156
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
157
|
+
def to_h
|
158
|
+
{
|
159
|
+
instruction: @instruction,
|
160
|
+
few_shot_examples: @few_shot_examples.size,
|
161
|
+
metadata: @metadata,
|
162
|
+
config_id: @config_id
|
163
|
+
}
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
sig { returns(String) }
|
169
|
+
def generate_config_id
|
170
|
+
content = "#{@instruction}_#{@few_shot_examples.size}_#{@metadata.hash}"
|
171
|
+
Digest::SHA256.hexdigest(content)[0, 12]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Result of MIPROv2 optimization
|
176
|
+
class MIPROv2Result < OptimizationResult
|
177
|
+
extend T::Sig
|
178
|
+
|
179
|
+
sig { returns(T::Array[CandidateConfig]) }
|
180
|
+
attr_reader :evaluated_candidates
|
181
|
+
|
182
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
183
|
+
attr_reader :optimization_trace
|
184
|
+
|
185
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
186
|
+
attr_reader :bootstrap_statistics
|
187
|
+
|
188
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
189
|
+
attr_reader :proposal_statistics
|
190
|
+
|
191
|
+
sig do
|
192
|
+
params(
|
193
|
+
optimized_program: T.untyped,
|
194
|
+
scores: T::Hash[Symbol, T.untyped],
|
195
|
+
history: T::Hash[Symbol, T.untyped],
|
196
|
+
evaluated_candidates: T::Array[CandidateConfig],
|
197
|
+
optimization_trace: T::Hash[Symbol, T.untyped],
|
198
|
+
bootstrap_statistics: T::Hash[Symbol, T.untyped],
|
199
|
+
proposal_statistics: T::Hash[Symbol, T.untyped],
|
200
|
+
best_score_name: T.nilable(String),
|
201
|
+
best_score_value: T.nilable(Float),
|
202
|
+
metadata: T::Hash[Symbol, T.untyped]
|
203
|
+
).void
|
204
|
+
end
|
205
|
+
def initialize(optimized_program:, scores:, history:, evaluated_candidates:, optimization_trace:, bootstrap_statistics:, proposal_statistics:, best_score_name: nil, best_score_value: nil, metadata: {})
|
206
|
+
super(
|
207
|
+
optimized_program: optimized_program,
|
208
|
+
scores: scores,
|
209
|
+
history: history,
|
210
|
+
best_score_name: best_score_name,
|
211
|
+
best_score_value: best_score_value,
|
212
|
+
metadata: metadata
|
213
|
+
)
|
214
|
+
@evaluated_candidates = evaluated_candidates.freeze
|
215
|
+
@optimization_trace = optimization_trace.freeze
|
216
|
+
@bootstrap_statistics = bootstrap_statistics.freeze
|
217
|
+
@proposal_statistics = proposal_statistics.freeze
|
218
|
+
end
|
219
|
+
|
220
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
221
|
+
def to_h
|
222
|
+
super.merge({
|
223
|
+
evaluated_candidates: @evaluated_candidates.map(&:to_h),
|
224
|
+
optimization_trace: @optimization_trace,
|
225
|
+
bootstrap_statistics: @bootstrap_statistics,
|
226
|
+
proposal_statistics: @proposal_statistics
|
227
|
+
})
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
sig { returns(MIPROv2Config) }
|
232
|
+
attr_reader :mipro_config
|
233
|
+
|
234
|
+
sig { returns(T.nilable(DSPy::Propose::GroundedProposer)) }
|
235
|
+
attr_reader :proposer
|
236
|
+
|
237
|
+
sig do
|
238
|
+
params(
|
239
|
+
metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
|
240
|
+
config: T.nilable(MIPROv2Config)
|
241
|
+
).void
|
242
|
+
end
|
243
|
+
def initialize(metric: nil, config: nil)
|
244
|
+
@mipro_config = config || MIPROv2Config.new
|
245
|
+
super(metric: metric, config: @mipro_config)
|
246
|
+
|
247
|
+
@proposer = DSPy::Propose::GroundedProposer.new(config: @mipro_config.proposer_config)
|
248
|
+
@optimization_trace = []
|
249
|
+
@evaluated_candidates = []
|
250
|
+
end
|
251
|
+
|
252
|
+
# Main MIPROv2 optimization method
|
253
|
+
sig do
|
254
|
+
params(
|
255
|
+
program: T.untyped,
|
256
|
+
trainset: T::Array[T.untyped],
|
257
|
+
valset: T.nilable(T::Array[T.untyped])
|
258
|
+
).returns(MIPROv2Result)
|
259
|
+
end
|
260
|
+
def compile(program, trainset:, valset: nil)
|
261
|
+
validate_inputs(program, trainset, valset)
|
262
|
+
|
263
|
+
instrument_step('miprov2_compile', {
|
264
|
+
trainset_size: trainset.size,
|
265
|
+
valset_size: valset&.size || 0,
|
266
|
+
num_trials: @mipro_config.num_trials,
|
267
|
+
optimization_strategy: @mipro_config.optimization_strategy,
|
268
|
+
mode: infer_auto_mode
|
269
|
+
}) do
|
270
|
+
# Convert examples to typed format
|
271
|
+
typed_trainset = ensure_typed_examples(trainset)
|
272
|
+
typed_valset = valset ? ensure_typed_examples(valset) : nil
|
273
|
+
|
274
|
+
# Use validation set if available, otherwise use part of training set
|
275
|
+
evaluation_set = typed_valset || typed_trainset.take([typed_trainset.size / 3, 10].max)
|
276
|
+
|
277
|
+
# Phase 1: Bootstrap few-shot examples
|
278
|
+
emit_event('phase_start', { phase: 1, name: 'bootstrap' })
|
279
|
+
bootstrap_result = phase_1_bootstrap(program, typed_trainset)
|
280
|
+
emit_event('phase_complete', {
|
281
|
+
phase: 1,
|
282
|
+
success_rate: bootstrap_result.statistics[:success_rate],
|
283
|
+
candidate_sets: bootstrap_result.candidate_sets.size
|
284
|
+
})
|
285
|
+
|
286
|
+
# Phase 2: Generate instruction candidates
|
287
|
+
emit_event('phase_start', { phase: 2, name: 'instruction_proposal' })
|
288
|
+
proposal_result = phase_2_propose_instructions(program, typed_trainset, bootstrap_result)
|
289
|
+
emit_event('phase_complete', {
|
290
|
+
phase: 2,
|
291
|
+
num_candidates: proposal_result.num_candidates,
|
292
|
+
best_instruction_preview: proposal_result.best_instruction[0, 50]
|
293
|
+
})
|
294
|
+
|
295
|
+
# Phase 3: Bayesian optimization
|
296
|
+
emit_event('phase_start', { phase: 3, name: 'optimization' })
|
297
|
+
optimization_result = phase_3_optimize(
|
298
|
+
program,
|
299
|
+
evaluation_set,
|
300
|
+
proposal_result,
|
301
|
+
bootstrap_result
|
302
|
+
)
|
303
|
+
emit_event('phase_complete', {
|
304
|
+
phase: 3,
|
305
|
+
best_score: optimization_result[:best_score],
|
306
|
+
trials_completed: optimization_result[:trials_completed]
|
307
|
+
})
|
308
|
+
|
309
|
+
# Build final result
|
310
|
+
final_result = build_miprov2_result(
|
311
|
+
optimization_result,
|
312
|
+
bootstrap_result,
|
313
|
+
proposal_result
|
314
|
+
)
|
315
|
+
|
316
|
+
save_results(final_result)
|
317
|
+
final_result
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
private
|
322
|
+
|
323
|
+
# Phase 1: Bootstrap few-shot examples from training data
|
324
|
+
sig { params(program: T.untyped, trainset: T::Array[DSPy::Example]).returns(Utils::BootstrapResult) }
|
325
|
+
def phase_1_bootstrap(program, trainset)
|
326
|
+
bootstrap_config = Utils::BootstrapConfig.new
|
327
|
+
bootstrap_config.max_bootstrapped_examples = @mipro_config.max_bootstrapped_examples
|
328
|
+
bootstrap_config.max_labeled_examples = @mipro_config.max_labeled_examples
|
329
|
+
bootstrap_config.num_candidate_sets = @mipro_config.bootstrap_sets
|
330
|
+
bootstrap_config.max_errors = @mipro_config.max_errors
|
331
|
+
bootstrap_config.num_threads = @mipro_config.num_threads
|
332
|
+
|
333
|
+
Utils.create_n_fewshot_demo_sets(program, trainset, config: bootstrap_config, metric: @metric)
|
334
|
+
end
|
335
|
+
|
336
|
+
# Phase 2: Generate instruction candidates using grounded proposer
|
337
|
+
sig do
|
338
|
+
params(
|
339
|
+
program: T.untyped,
|
340
|
+
trainset: T::Array[DSPy::Example],
|
341
|
+
bootstrap_result: Utils::BootstrapResult
|
342
|
+
).returns(DSPy::Propose::GroundedProposer::ProposalResult)
|
343
|
+
end
|
344
|
+
def phase_2_propose_instructions(program, trainset, bootstrap_result)
|
345
|
+
# Get current instruction if available
|
346
|
+
current_instruction = extract_current_instruction(program)
|
347
|
+
|
348
|
+
# Use few-shot examples from bootstrap if available
|
349
|
+
few_shot_examples = bootstrap_result.successful_examples.take(5)
|
350
|
+
|
351
|
+
# Get signature class from program
|
352
|
+
signature_class = extract_signature_class(program)
|
353
|
+
raise ArgumentError, "Cannot extract signature class from program" unless signature_class
|
354
|
+
|
355
|
+
# Configure proposer for this optimization run
|
356
|
+
@mipro_config.proposer_config.num_instruction_candidates = @mipro_config.num_instruction_candidates
|
357
|
+
|
358
|
+
@proposer.propose_instructions(
|
359
|
+
signature_class,
|
360
|
+
trainset,
|
361
|
+
few_shot_examples: few_shot_examples,
|
362
|
+
current_instruction: current_instruction
|
363
|
+
)
|
364
|
+
end
|
365
|
+
|
366
|
+
# Phase 3: Bayesian optimization to find best configuration
|
367
|
+
sig do
|
368
|
+
params(
|
369
|
+
program: T.untyped,
|
370
|
+
evaluation_set: T::Array[DSPy::Example],
|
371
|
+
proposal_result: DSPy::Propose::GroundedProposer::ProposalResult,
|
372
|
+
bootstrap_result: Utils::BootstrapResult
|
373
|
+
).returns(T::Hash[Symbol, T.untyped])
|
374
|
+
end
|
375
|
+
def phase_3_optimize(program, evaluation_set, proposal_result, bootstrap_result)
|
376
|
+
# Generate candidate configurations
|
377
|
+
candidates = generate_candidate_configurations(proposal_result, bootstrap_result)
|
378
|
+
|
379
|
+
# Initialize optimization state
|
380
|
+
optimization_state = initialize_optimization_state(candidates)
|
381
|
+
|
382
|
+
# Run optimization trials
|
383
|
+
trials_completed = 0
|
384
|
+
best_score = 0.0
|
385
|
+
best_candidate = nil
|
386
|
+
best_program = nil
|
387
|
+
|
388
|
+
@mipro_config.num_trials.times do |trial_idx|
|
389
|
+
trials_completed = trial_idx + 1
|
390
|
+
|
391
|
+
# Select next candidate based on optimization strategy
|
392
|
+
candidate = select_next_candidate(candidates, optimization_state, trial_idx)
|
393
|
+
|
394
|
+
emit_event('trial_start', {
|
395
|
+
trial_number: trials_completed,
|
396
|
+
candidate_id: candidate.config_id,
|
397
|
+
instruction_preview: candidate.instruction[0, 50],
|
398
|
+
num_few_shot: candidate.few_shot_examples.size
|
399
|
+
})
|
400
|
+
|
401
|
+
begin
|
402
|
+
# Evaluate candidate
|
403
|
+
score, modified_program = evaluate_candidate(program, candidate, evaluation_set)
|
404
|
+
|
405
|
+
# Update optimization state
|
406
|
+
update_optimization_state(optimization_state, candidate, score)
|
407
|
+
|
408
|
+
# Track best result
|
409
|
+
is_best = score > best_score
|
410
|
+
if is_best
|
411
|
+
best_score = score
|
412
|
+
best_candidate = candidate
|
413
|
+
best_program = modified_program
|
414
|
+
end
|
415
|
+
|
416
|
+
emit_event('trial_complete', {
|
417
|
+
trial_number: trials_completed,
|
418
|
+
score: score,
|
419
|
+
is_best: is_best,
|
420
|
+
candidate_id: candidate.config_id
|
421
|
+
})
|
422
|
+
|
423
|
+
# Check early stopping
|
424
|
+
if should_early_stop?(optimization_state, trial_idx)
|
425
|
+
DSPy.logger.info("Early stopping at trial #{trials_completed}")
|
426
|
+
break
|
427
|
+
end
|
428
|
+
|
429
|
+
rescue => error
|
430
|
+
emit_event('trial_error', {
|
431
|
+
trial_number: trials_completed,
|
432
|
+
error: error.message,
|
433
|
+
candidate_id: candidate.config_id
|
434
|
+
})
|
435
|
+
|
436
|
+
DSPy.logger.warn("Trial #{trials_completed} failed: #{error.message}")
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
{
|
441
|
+
best_score: best_score,
|
442
|
+
best_candidate: best_candidate,
|
443
|
+
best_program: best_program,
|
444
|
+
trials_completed: trials_completed,
|
445
|
+
optimization_state: optimization_state,
|
446
|
+
evaluated_candidates: @evaluated_candidates
|
447
|
+
}
|
448
|
+
end
|
449
|
+
|
450
|
+
# Generate candidate configurations from proposals and bootstrap results
|
451
|
+
sig do
|
452
|
+
params(
|
453
|
+
proposal_result: DSPy::Propose::GroundedProposer::ProposalResult,
|
454
|
+
bootstrap_result: Utils::BootstrapResult
|
455
|
+
).returns(T::Array[CandidateConfig])
|
456
|
+
end
|
457
|
+
def generate_candidate_configurations(proposal_result, bootstrap_result)
|
458
|
+
candidates = []
|
459
|
+
|
460
|
+
# Base configuration (no modifications)
|
461
|
+
candidates << CandidateConfig.new(
|
462
|
+
instruction: "",
|
463
|
+
few_shot_examples: [],
|
464
|
+
metadata: { type: "baseline" }
|
465
|
+
)
|
466
|
+
|
467
|
+
# Instruction-only candidates
|
468
|
+
proposal_result.candidate_instructions.each_with_index do |instruction, idx|
|
469
|
+
candidates << CandidateConfig.new(
|
470
|
+
instruction: instruction,
|
471
|
+
few_shot_examples: [],
|
472
|
+
metadata: { type: "instruction_only", proposal_rank: idx }
|
473
|
+
)
|
474
|
+
end
|
475
|
+
|
476
|
+
# Few-shot only candidates
|
477
|
+
bootstrap_result.candidate_sets.each_with_index do |candidate_set, idx|
|
478
|
+
candidates << CandidateConfig.new(
|
479
|
+
instruction: "",
|
480
|
+
few_shot_examples: candidate_set,
|
481
|
+
metadata: { type: "few_shot_only", bootstrap_rank: idx }
|
482
|
+
)
|
483
|
+
end
|
484
|
+
|
485
|
+
# Combined candidates (instruction + few-shot)
|
486
|
+
top_instructions = proposal_result.candidate_instructions.take(3)
|
487
|
+
top_bootstrap_sets = bootstrap_result.candidate_sets.take(3)
|
488
|
+
|
489
|
+
top_instructions.each_with_index do |instruction, i_idx|
|
490
|
+
top_bootstrap_sets.each_with_index do |candidate_set, b_idx|
|
491
|
+
candidates << CandidateConfig.new(
|
492
|
+
instruction: instruction,
|
493
|
+
few_shot_examples: candidate_set,
|
494
|
+
metadata: {
|
495
|
+
type: "combined",
|
496
|
+
instruction_rank: i_idx,
|
497
|
+
bootstrap_rank: b_idx
|
498
|
+
}
|
499
|
+
)
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
candidates
|
504
|
+
end
|
505
|
+
|
506
|
+
# Initialize optimization state for candidate selection
|
507
|
+
sig { params(candidates: T::Array[CandidateConfig]).returns(T::Hash[Symbol, T.untyped]) }
|
508
|
+
def initialize_optimization_state(candidates)
|
509
|
+
{
|
510
|
+
candidates: candidates,
|
511
|
+
scores: {},
|
512
|
+
exploration_counts: Hash.new(0),
|
513
|
+
temperature: @mipro_config.init_temperature,
|
514
|
+
best_score_history: [],
|
515
|
+
diversity_scores: {},
|
516
|
+
no_improvement_count: 0
|
517
|
+
}
|
518
|
+
end
|
519
|
+
|
520
|
+
# Select next candidate based on optimization strategy
|
521
|
+
sig do
|
522
|
+
params(
|
523
|
+
candidates: T::Array[CandidateConfig],
|
524
|
+
state: T::Hash[Symbol, T.untyped],
|
525
|
+
trial_idx: Integer
|
526
|
+
).returns(CandidateConfig)
|
527
|
+
end
|
528
|
+
def select_next_candidate(candidates, state, trial_idx)
|
529
|
+
case @mipro_config.optimization_strategy
|
530
|
+
when "greedy"
|
531
|
+
select_candidate_greedy(candidates, state)
|
532
|
+
when "adaptive"
|
533
|
+
select_candidate_adaptive(candidates, state, trial_idx)
|
534
|
+
when "bayesian"
|
535
|
+
select_candidate_bayesian(candidates, state, trial_idx)
|
536
|
+
else
|
537
|
+
candidates.sample # Random fallback
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
# Greedy candidate selection (exploit best known configurations)
|
542
|
+
sig { params(candidates: T::Array[CandidateConfig], state: T::Hash[Symbol, T.untyped]).returns(CandidateConfig) }
|
543
|
+
def select_candidate_greedy(candidates, state)
|
544
|
+
# Prioritize unexplored candidates, then highest scoring
|
545
|
+
unexplored = candidates.reject { |c| state[:scores].key?(c.config_id) }
|
546
|
+
return unexplored.sample if unexplored.any?
|
547
|
+
|
548
|
+
# Among explored, pick the best
|
549
|
+
scored_candidates = candidates.select { |c| state[:scores].key?(c.config_id) }
|
550
|
+
scored_candidates.max_by { |c| state[:scores][c.config_id] } || candidates.first
|
551
|
+
end
|
552
|
+
|
553
|
+
# Adaptive candidate selection (balance exploration and exploitation)
|
554
|
+
sig do
|
555
|
+
params(
|
556
|
+
candidates: T::Array[CandidateConfig],
|
557
|
+
state: T::Hash[Symbol, T.untyped],
|
558
|
+
trial_idx: Integer
|
559
|
+
).returns(CandidateConfig)
|
560
|
+
end
|
561
|
+
def select_candidate_adaptive(candidates, state, trial_idx)
|
562
|
+
# Update temperature based on progress
|
563
|
+
progress = trial_idx.to_f / @mipro_config.num_trials
|
564
|
+
state[:temperature] = @mipro_config.init_temperature * (1 - progress) + @mipro_config.final_temperature * progress
|
565
|
+
|
566
|
+
# Calculate selection scores combining exploitation and exploration
|
567
|
+
candidate_scores = candidates.map do |candidate|
|
568
|
+
exploitation_score = state[:scores][candidate.config_id] || 0.0
|
569
|
+
exploration_bonus = 1.0 / (state[:exploration_counts][candidate.config_id] + 1)
|
570
|
+
|
571
|
+
total_score = exploitation_score + state[:temperature] * exploration_bonus
|
572
|
+
[candidate, total_score]
|
573
|
+
end
|
574
|
+
|
575
|
+
# Select using softmax with temperature
|
576
|
+
if state[:temperature] > 0.01
|
577
|
+
# Probabilistic selection
|
578
|
+
weights = candidate_scores.map { |_, score| Math.exp(score / state[:temperature]) }
|
579
|
+
total_weight = weights.sum
|
580
|
+
probabilities = weights.map { |w| w / total_weight }
|
581
|
+
|
582
|
+
random_value = rand
|
583
|
+
cumulative = 0.0
|
584
|
+
candidate_scores.each_with_index do |(candidate, _), idx|
|
585
|
+
cumulative += probabilities[idx]
|
586
|
+
return candidate if random_value <= cumulative
|
587
|
+
end
|
588
|
+
end
|
589
|
+
|
590
|
+
# Fallback to highest scoring
|
591
|
+
candidate_scores.max_by { |_, score| score }.first
|
592
|
+
end
|
593
|
+
|
594
|
+
# Bayesian candidate selection (use probabilistic model)
|
595
|
+
sig do
|
596
|
+
params(
|
597
|
+
candidates: T::Array[CandidateConfig],
|
598
|
+
state: T::Hash[Symbol, T.untyped],
|
599
|
+
trial_idx: Integer
|
600
|
+
).returns(CandidateConfig)
|
601
|
+
end
|
602
|
+
def select_candidate_bayesian(candidates, state, trial_idx)
|
603
|
+
# For now, use adaptive selection with Bayesian-inspired exploration
|
604
|
+
# In a full implementation, this would use Gaussian processes or similar
|
605
|
+
select_candidate_adaptive(candidates, state, trial_idx)
|
606
|
+
end
|
607
|
+
|
608
|
+
# Evaluate a candidate configuration
|
609
|
+
sig do
|
610
|
+
params(
|
611
|
+
program: T.untyped,
|
612
|
+
candidate: CandidateConfig,
|
613
|
+
evaluation_set: T::Array[DSPy::Example]
|
614
|
+
).returns([Float, T.untyped])
|
615
|
+
end
|
616
|
+
def evaluate_candidate(program, candidate, evaluation_set)
|
617
|
+
# Apply candidate configuration to program
|
618
|
+
modified_program = apply_candidate_configuration(program, candidate)
|
619
|
+
|
620
|
+
# Evaluate modified program
|
621
|
+
evaluation_result = evaluate_program(modified_program, evaluation_set)
|
622
|
+
|
623
|
+
# Store evaluation details
|
624
|
+
@evaluated_candidates << candidate
|
625
|
+
|
626
|
+
[evaluation_result.pass_rate, modified_program]
|
627
|
+
end
|
628
|
+
|
629
|
+
# Apply candidate configuration to program
|
630
|
+
sig { params(program: T.untyped, candidate: CandidateConfig).returns(T.untyped) }
|
631
|
+
def apply_candidate_configuration(program, candidate)
|
632
|
+
modified_program = program
|
633
|
+
|
634
|
+
# Apply instruction if provided
|
635
|
+
if !candidate.instruction.empty? && program.respond_to?(:with_instruction)
|
636
|
+
modified_program = modified_program.with_instruction(candidate.instruction)
|
637
|
+
end
|
638
|
+
|
639
|
+
# Apply few-shot examples if provided
|
640
|
+
if candidate.few_shot_examples.any? && program.respond_to?(:with_examples)
|
641
|
+
few_shot_examples = candidate.few_shot_examples.map do |example|
|
642
|
+
DSPy::FewShotExample.new(
|
643
|
+
input: example.input_values,
|
644
|
+
output: example.expected_values,
|
645
|
+
reasoning: extract_reasoning_from_example(example)
|
646
|
+
)
|
647
|
+
end
|
648
|
+
modified_program = modified_program.with_examples(few_shot_examples)
|
649
|
+
end
|
650
|
+
|
651
|
+
modified_program
|
652
|
+
end
|
653
|
+
|
654
|
+
# Update optimization state after candidate evaluation
|
655
|
+
sig do
|
656
|
+
params(
|
657
|
+
state: T::Hash[Symbol, T.untyped],
|
658
|
+
candidate: CandidateConfig,
|
659
|
+
score: Float
|
660
|
+
).void
|
661
|
+
end
|
662
|
+
def update_optimization_state(state, candidate, score)
|
663
|
+
state[:scores][candidate.config_id] = score
|
664
|
+
state[:exploration_counts][candidate.config_id] += 1
|
665
|
+
state[:best_score_history] << score
|
666
|
+
|
667
|
+
# Track diversity if enabled
|
668
|
+
if @mipro_config.track_diversity
|
669
|
+
state[:diversity_scores][candidate.config_id] = calculate_diversity_score(candidate)
|
670
|
+
end
|
671
|
+
|
672
|
+
# Update no improvement counter
|
673
|
+
if state[:best_score_history].size > 1 && score > state[:best_score_history][-2]
|
674
|
+
state[:no_improvement_count] = 0
|
675
|
+
else
|
676
|
+
state[:no_improvement_count] += 1
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
# Check if optimization should stop early
|
681
|
+
sig { params(state: T::Hash[Symbol, T.untyped], trial_idx: Integer).returns(T::Boolean) }
|
682
|
+
def should_early_stop?(state, trial_idx)
|
683
|
+
# Don't stop too early
|
684
|
+
return false if trial_idx < @mipro_config.early_stopping_patience
|
685
|
+
|
686
|
+
# Stop if no improvement for patience trials
|
687
|
+
state[:no_improvement_count] >= @mipro_config.early_stopping_patience
|
688
|
+
end
|
689
|
+
|
690
|
+
# Calculate diversity score for candidate
|
691
|
+
sig { params(candidate: CandidateConfig).returns(Float) }
|
692
|
+
def calculate_diversity_score(candidate)
|
693
|
+
# Simple diversity metric based on instruction length and few-shot count
|
694
|
+
instruction_diversity = candidate.instruction.length / 200.0
|
695
|
+
few_shot_diversity = candidate.few_shot_examples.size / 10.0
|
696
|
+
|
697
|
+
[instruction_diversity + few_shot_diversity, 1.0].min
|
698
|
+
end
|
699
|
+
|
700
|
+
# Build final MIPROv2 result
|
701
|
+
sig do
|
702
|
+
params(
|
703
|
+
optimization_result: T::Hash[Symbol, T.untyped],
|
704
|
+
bootstrap_result: Utils::BootstrapResult,
|
705
|
+
proposal_result: DSPy::Propose::GroundedProposer::ProposalResult
|
706
|
+
).returns(MIPROv2Result)
|
707
|
+
end
|
708
|
+
def build_miprov2_result(optimization_result, bootstrap_result, proposal_result)
|
709
|
+
best_candidate = optimization_result[:best_candidate]
|
710
|
+
best_program = optimization_result[:best_program]
|
711
|
+
best_score = optimization_result[:best_score]
|
712
|
+
|
713
|
+
scores = { pass_rate: best_score }
|
714
|
+
|
715
|
+
history = {
|
716
|
+
total_trials: optimization_result[:trials_completed],
|
717
|
+
optimization_strategy: @mipro_config.optimization_strategy,
|
718
|
+
early_stopped: optimization_result[:trials_completed] < @mipro_config.num_trials,
|
719
|
+
score_history: optimization_result[:optimization_state][:best_score_history]
|
720
|
+
}
|
721
|
+
|
722
|
+
metadata = {
|
723
|
+
optimizer: "MIPROv2",
|
724
|
+
auto_mode: infer_auto_mode,
|
725
|
+
best_instruction: best_candidate&.instruction || "",
|
726
|
+
best_few_shot_count: best_candidate&.few_shot_examples&.size || 0,
|
727
|
+
best_candidate_type: best_candidate&.metadata&.fetch(:type, "unknown"),
|
728
|
+
optimization_timestamp: Time.now.iso8601
|
729
|
+
}
|
730
|
+
|
731
|
+
MIPROv2Result.new(
|
732
|
+
optimized_program: best_program,
|
733
|
+
scores: scores,
|
734
|
+
history: history,
|
735
|
+
best_score_name: "pass_rate",
|
736
|
+
best_score_value: best_score,
|
737
|
+
metadata: metadata,
|
738
|
+
evaluated_candidates: @evaluated_candidates,
|
739
|
+
optimization_trace: optimization_result[:optimization_state] || {},
|
740
|
+
bootstrap_statistics: bootstrap_result.statistics,
|
741
|
+
proposal_statistics: proposal_result.analysis
|
742
|
+
)
|
743
|
+
end
|
744
|
+
|
745
|
+
# Helper methods
|
746
|
+
sig { params(program: T.untyped).returns(T.nilable(String)) }
|
747
|
+
def extract_current_instruction(program)
|
748
|
+
if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
|
749
|
+
program.prompt.instruction
|
750
|
+
elsif program.respond_to?(:system_signature)
|
751
|
+
system_sig = program.system_signature
|
752
|
+
system_sig.is_a?(String) ? system_sig : nil
|
753
|
+
else
|
754
|
+
nil
|
755
|
+
end
|
756
|
+
end
|
757
|
+
|
758
|
+
sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
|
759
|
+
def extract_signature_class(program)
|
760
|
+
program.respond_to?(:signature_class) ? program.signature_class : nil
|
761
|
+
end
|
762
|
+
|
763
|
+
sig { params(example: T.untyped).returns(T.nilable(String)) }
|
764
|
+
def extract_reasoning_from_example(example)
|
765
|
+
case example
|
766
|
+
when DSPy::Example
|
767
|
+
if example.expected_values.key?(:reasoning)
|
768
|
+
example.expected_values[:reasoning]
|
769
|
+
elsif example.expected_values.key?(:explanation)
|
770
|
+
example.expected_values[:explanation]
|
771
|
+
else
|
772
|
+
nil
|
773
|
+
end
|
774
|
+
else
|
775
|
+
nil
|
776
|
+
end
|
777
|
+
end
|
778
|
+
|
779
|
+
# Infer auto mode based on configuration
|
780
|
+
sig { returns(String) }
|
781
|
+
def infer_auto_mode
|
782
|
+
case @mipro_config.num_trials
|
783
|
+
when 0..6 then "light"
|
784
|
+
when 7..12 then "medium"
|
785
|
+
else "heavy"
|
786
|
+
end
|
787
|
+
end
|
788
|
+
end
|
789
|
+
end
|
790
|
+
end
|