desiru 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/settings.local.json +11 -0
- data/CHANGELOG.md +73 -0
- data/CLAUDE.local.md +3 -0
- data/CLAUDE.md +6 -1
- data/Gemfile.lock +1 -1
- data/README.md +7 -1
- data/desiru-development-swarm.yml +185 -0
- data/lib/desiru/core/compiler.rb +231 -0
- data/lib/desiru/core/example.rb +96 -0
- data/lib/desiru/core/prediction.rb +108 -0
- data/lib/desiru/core/trace.rb +330 -0
- data/lib/desiru/core/traceable.rb +61 -0
- data/lib/desiru/core.rb +12 -0
- data/lib/desiru/module.rb +8 -0
- data/lib/desiru/modules/best_of_n.rb +306 -0
- data/lib/desiru/modules/multi_chain_comparison.rb +72 -20
- data/lib/desiru/modules/predict.rb +7 -0
- data/lib/desiru/modules/program_of_thought.rb +227 -28
- data/lib/desiru/optimizers/base.rb +31 -1
- data/lib/desiru/optimizers/mipro_v2.rb +889 -0
- data/lib/desiru/persistence/repositories/base_repository.rb +1 -1
- data/lib/desiru/version.rb +1 -1
- data/lib/desiru.rb +10 -0
- metadata +13 -1
@@ -0,0 +1,889 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Desiru
|
6
|
+
module Optimizers
|
7
|
+
# MIPROv2 - Multi-objective Instruction Prompt Optimization v2
|
8
|
+
# Uses Bayesian optimization to optimize prompts and demonstrations across multiple objectives
|
9
|
+
class MIPROv2 < Base
|
10
|
+
attr_reader :optimization_history, :pareto_frontier, :trace_collector
|
11
|
+
|
12
|
+
def initialize(metric: :exact_match, objectives: nil, **config)
|
13
|
+
super(metric: metric, **config)
|
14
|
+
@objectives = normalize_objectives(objectives || [metric])
|
15
|
+
@optimization_history = []
|
16
|
+
@pareto_frontier = []
|
17
|
+
@gaussian_process = GaussianProcess.new
|
18
|
+
@acquisition_function = config[:acquisition_function] || :expected_improvement
|
19
|
+
@trace_collector = config[:trace_collector] || Core.trace_collector
|
20
|
+
@instruction_candidates = []
|
21
|
+
@demonstration_candidates = []
|
22
|
+
end
|
23
|
+
|
24
|
+
def compile(program, trainset:, valset: nil)
|
25
|
+
trace_optimization('Starting MIPROv2 optimization', {
|
26
|
+
trainset_size: trainset.size,
|
27
|
+
valset_size: valset&.size || 0,
|
28
|
+
objectives: @objectives.map(&:to_s),
|
29
|
+
config: config
|
30
|
+
})
|
31
|
+
|
32
|
+
begin
|
33
|
+
# Initialize optimization state
|
34
|
+
@current_program = deep_copy_program(program)
|
35
|
+
@trainset = trainset
|
36
|
+
@valset = valset || trainset
|
37
|
+
@iteration = 0
|
38
|
+
|
39
|
+
# Clear trace collector for fresh optimization
|
40
|
+
@trace_collector.clear if config[:clear_traces]
|
41
|
+
|
42
|
+
# Enable tracing on all modules
|
43
|
+
enable_program_tracing(@current_program)
|
44
|
+
|
45
|
+
# Run Bayesian optimization loop
|
46
|
+
while @iteration < config[:max_iterations] && !should_stop?
|
47
|
+
@iteration += 1
|
48
|
+
trace_optimization("Iteration #{@iteration}", { phase: 'start' })
|
49
|
+
|
50
|
+
# Generate candidates using acquisition function
|
51
|
+
candidates = generate_candidates
|
52
|
+
|
53
|
+
# Evaluate candidates
|
54
|
+
evaluated_candidates = evaluate_candidates(candidates)
|
55
|
+
|
56
|
+
# Update Gaussian Process with results
|
57
|
+
update_gaussian_process(evaluated_candidates)
|
58
|
+
|
59
|
+
# Update Pareto frontier for multi-objective optimization
|
60
|
+
update_pareto_frontier(evaluated_candidates)
|
61
|
+
|
62
|
+
# Select best candidate
|
63
|
+
best_candidate = select_best_candidate(evaluated_candidates)
|
64
|
+
|
65
|
+
# Apply best candidate to program
|
66
|
+
apply_candidate(@current_program, best_candidate) if best_candidate
|
67
|
+
|
68
|
+
# Log iteration results - always log even if no best candidate
|
69
|
+
if best_candidate
|
70
|
+
log_iteration_results(best_candidate, evaluated_candidates)
|
71
|
+
elsif evaluated_candidates.any?
|
72
|
+
# Log with the first candidate if no best found
|
73
|
+
log_iteration_results(evaluated_candidates.first, evaluated_candidates)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Restore trace state
|
78
|
+
disable_program_tracing(@current_program) if config[:restore_trace_state]
|
79
|
+
|
80
|
+
# Return optimized program
|
81
|
+
@current_program
|
82
|
+
rescue StandardError => e
|
83
|
+
trace_optimization('Optimization failed', { error: e.message, backtrace: e.backtrace.first(3) })
|
84
|
+
begin
|
85
|
+
disable_program_tracing(@current_program) if config[:restore_trace_state]
|
86
|
+
rescue StandardError
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
|
90
|
+
# Return original program on error
|
91
|
+
program
|
92
|
+
ensure
|
93
|
+
# Always disable tracing at the end if enabled
|
94
|
+
begin
|
95
|
+
disable_program_tracing(@current_program) if config[:restore_trace_state]
|
96
|
+
rescue StandardError
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def optimize_module(module_instance, examples)
|
103
|
+
trace_optimization('Optimizing module with MIPROv2', {
|
104
|
+
module: module_instance.class.name,
|
105
|
+
examples_count: examples.size
|
106
|
+
})
|
107
|
+
|
108
|
+
# Generate instruction variants
|
109
|
+
instruction_variants = generate_instruction_variants(module_instance, examples)
|
110
|
+
|
111
|
+
# Generate demonstration sets
|
112
|
+
demo_sets = generate_demonstration_sets(module_instance, examples)
|
113
|
+
|
114
|
+
# Evaluate all combinations
|
115
|
+
best_config = nil
|
116
|
+
best_score = -Float::INFINITY
|
117
|
+
|
118
|
+
instruction_variants.each do |instruction|
|
119
|
+
demo_sets.each do |demos|
|
120
|
+
score = evaluate_module_config(module_instance, instruction, demos, examples)
|
121
|
+
|
122
|
+
if score > best_score
|
123
|
+
best_score = score
|
124
|
+
best_config = { instruction: instruction, demos: demos }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Create optimized module
|
130
|
+
optimized = module_instance.with_demos(best_config[:demos])
|
131
|
+
optimized.instruction = best_config[:instruction] if optimized.respond_to?(:instruction=)
|
132
|
+
|
133
|
+
optimized
|
134
|
+
end
|
135
|
+
|
136
|
+
def generate_instruction_variants(module_instance, _examples)
|
137
|
+
# Generate different instruction styles
|
138
|
+
signature = module_instance.signature
|
139
|
+
[
|
140
|
+
generate_instruction(signature, 'concise', 0.2),
|
141
|
+
generate_instruction(signature, 'detailed', 0.5),
|
142
|
+
generate_instruction(signature, 'step-by-step', 0.8)
|
143
|
+
]
|
144
|
+
end
|
145
|
+
|
146
|
+
def generate_demonstration_sets(_module_instance, examples)
|
147
|
+
return [[]] if examples.empty?
|
148
|
+
|
149
|
+
# Generate different demo sets
|
150
|
+
sets = []
|
151
|
+
|
152
|
+
# Empty set
|
153
|
+
sets << []
|
154
|
+
|
155
|
+
# Random subset
|
156
|
+
[1, 2, 3].each do |count|
|
157
|
+
break if count > examples.size
|
158
|
+
|
159
|
+
sets << examples.sample(count)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Diverse set
|
163
|
+
sets << select_diverse_demonstrations(examples, [examples.size, 3].min, Random.new) if examples.size > 1
|
164
|
+
|
165
|
+
sets
|
166
|
+
end
|
167
|
+
|
168
|
+
def evaluate_module_config(module_instance, instruction, demos, examples)
|
169
|
+
# Simple evaluation - could be enhanced
|
170
|
+
test_module = module_instance.with_demos(demos)
|
171
|
+
|
172
|
+
test_module.instruction = instruction if test_module.respond_to?(:instruction=) && instruction
|
173
|
+
|
174
|
+
# Evaluate on subset of examples
|
175
|
+
eval_examples = examples.sample([examples.size, 5].min)
|
176
|
+
scores = eval_examples.map do |ex|
|
177
|
+
# Extract inputs (exclude answer/output fields)
|
178
|
+
inputs = {}
|
179
|
+
ex.to_h.each do |k, v|
|
180
|
+
inputs[k] = v unless %i[answer output].include?(k)
|
181
|
+
end
|
182
|
+
|
183
|
+
result = test_module.call(inputs)
|
184
|
+
score_prediction(result, ex)
|
185
|
+
rescue StandardError
|
186
|
+
0.0
|
187
|
+
end
|
188
|
+
|
189
|
+
scores.empty? ? 0.0 : scores.sum.to_f / scores.size
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def normalize_objectives(objectives)
|
195
|
+
objectives.map { |obj| normalize_metric(obj) }
|
196
|
+
end
|
197
|
+
|
198
|
+
def generate_candidates
|
199
|
+
trace_optimization("Generating candidates", {
|
200
|
+
iteration: @iteration,
|
201
|
+
acquisition_function: @acquisition_function
|
202
|
+
})
|
203
|
+
|
204
|
+
# Use Gaussian Process to guide candidate generation
|
205
|
+
if @optimization_history.empty?
|
206
|
+
# Initial random sampling
|
207
|
+
generate_random_candidates(config[:num_candidates])
|
208
|
+
else
|
209
|
+
# Use acquisition function to generate candidates
|
210
|
+
generate_guided_candidates(config[:num_candidates])
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def generate_random_candidates(num)
|
215
|
+
(1..num).map do |i|
|
216
|
+
{
|
217
|
+
id: "random_#{@iteration}_#{i}",
|
218
|
+
instruction_seed: rand,
|
219
|
+
demo_seed: rand,
|
220
|
+
temperature: 0.1 + (rand * 0.8),
|
221
|
+
demo_count: rand(1..config[:max_bootstrapped_demos]),
|
222
|
+
instruction_style: %w[concise detailed step-by-step].sample,
|
223
|
+
demo_selection: %w[random diverse similar].sample
|
224
|
+
}
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def generate_guided_candidates(num)
|
229
|
+
candidates = []
|
230
|
+
|
231
|
+
# Get best performers from history
|
232
|
+
best_historical = @optimization_history
|
233
|
+
.sort_by { |h| -h[:scores].values.sum }
|
234
|
+
.first(5)
|
235
|
+
|
236
|
+
# Generate variations of best performers
|
237
|
+
best_historical.each do |hist|
|
238
|
+
next unless hist[:candidate] # Skip if no candidate
|
239
|
+
|
240
|
+
2.times do
|
241
|
+
candidate = mutate_candidate(hist[:candidate])
|
242
|
+
candidates << candidate
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# Fill remaining slots with acquisition function-guided candidates
|
247
|
+
while candidates.size < num
|
248
|
+
candidate = generate_acquisition_candidate
|
249
|
+
candidates << candidate
|
250
|
+
end
|
251
|
+
|
252
|
+
candidates.first(num)
|
253
|
+
end
|
254
|
+
|
255
|
+
def mutate_candidate(base_candidate)
|
256
|
+
return generate_random_candidates(1).first if base_candidate.nil?
|
257
|
+
|
258
|
+
mutated = base_candidate.dup
|
259
|
+
mutated[:id] = "mutated_#{@iteration}_#{rand(1000)}"
|
260
|
+
|
261
|
+
# Mutate parameters with small variations
|
262
|
+
mutated[:instruction_seed] = constrain((base_candidate[:instruction_seed] || rand) + gaussian_noise(0.1), 0, 1)
|
263
|
+
mutated[:demo_seed] = constrain((base_candidate[:demo_seed] || rand) + gaussian_noise(0.1), 0, 1)
|
264
|
+
mutated[:temperature] = constrain((base_candidate[:temperature] || 0.5) + gaussian_noise(0.05), 0.1, 0.9)
|
265
|
+
mutated[:demo_count] = constrain(
|
266
|
+
(base_candidate[:demo_count] || 2) + gaussian_noise(0.5).round,
|
267
|
+
1,
|
268
|
+
config[:max_bootstrapped_demos]
|
269
|
+
)
|
270
|
+
|
271
|
+
mutated
|
272
|
+
end
|
273
|
+
|
274
|
+
def generate_acquisition_candidate
|
275
|
+
# Use acquisition function to find promising regions
|
276
|
+
best_point = optimize_acquisition_function
|
277
|
+
|
278
|
+
{
|
279
|
+
id: "acquisition_#{@iteration}_#{rand(1000)}",
|
280
|
+
instruction_seed: best_point[0],
|
281
|
+
demo_seed: best_point[1],
|
282
|
+
temperature: best_point[2],
|
283
|
+
demo_count: best_point[3].round.clamp(1, config[:max_bootstrapped_demos]),
|
284
|
+
instruction_style: select_instruction_style(best_point[0]),
|
285
|
+
demo_selection: select_demo_strategy(best_point[1])
|
286
|
+
}
|
287
|
+
end
|
288
|
+
|
289
|
+
def evaluate_candidates(candidates)
|
290
|
+
trace_optimization("Evaluating #{candidates.size} candidates", {})
|
291
|
+
|
292
|
+
candidates.map do |candidate|
|
293
|
+
# Validate candidate has required fields
|
294
|
+
next unless candidate.is_a?(Hash) && candidate[:id]
|
295
|
+
|
296
|
+
# Apply candidate configuration to program
|
297
|
+
test_program = deep_copy_program(@current_program)
|
298
|
+
apply_candidate(test_program, candidate)
|
299
|
+
|
300
|
+
# Evaluate on validation set
|
301
|
+
scores = evaluate_multi_objective(test_program, @valset)
|
302
|
+
|
303
|
+
# Collect traces for this candidate
|
304
|
+
candidate_traces = collect_candidate_traces(candidate[:id])
|
305
|
+
|
306
|
+
{
|
307
|
+
candidate: candidate,
|
308
|
+
scores: scores,
|
309
|
+
traces: candidate_traces,
|
310
|
+
timestamp: Time.now
|
311
|
+
}
|
312
|
+
rescue StandardError => e
|
313
|
+
trace_optimization("Candidate evaluation failed", {
|
314
|
+
candidate_id: candidate[:id] || 'unknown',
|
315
|
+
error: e.message
|
316
|
+
})
|
317
|
+
{
|
318
|
+
candidate: candidate,
|
319
|
+
scores: {},
|
320
|
+
traces: [],
|
321
|
+
timestamp: Time.now,
|
322
|
+
error: e.message
|
323
|
+
}
|
324
|
+
end.compact
|
325
|
+
end
|
326
|
+
|
327
|
+
def evaluate_multi_objective(program, dataset)
|
328
|
+
scores = {}
|
329
|
+
|
330
|
+
@objectives.each do |objective|
|
331
|
+
evaluator = create_evaluator(objective)
|
332
|
+
result = evaluator.evaluate(program, dataset)
|
333
|
+
scores[objective] = result[:average_score]
|
334
|
+
end
|
335
|
+
|
336
|
+
scores
|
337
|
+
end
|
338
|
+
|
339
|
+
def create_evaluator(objective)
|
340
|
+
# Create a temporary evaluator for each objective
|
341
|
+
self.class.superclass.new(metric: objective, config: config)
|
342
|
+
end
|
343
|
+
|
344
|
+
def update_gaussian_process(evaluated_candidates)
|
345
|
+
# Convert candidates to feature vectors
|
346
|
+
evaluated_candidates.each do |eval|
|
347
|
+
features = candidate_to_features(eval[:candidate])
|
348
|
+
# For multi-objective, use scalarized score
|
349
|
+
score = scalarize_objectives(eval[:scores])
|
350
|
+
@gaussian_process.add_observation(features, score)
|
351
|
+
end
|
352
|
+
|
353
|
+
@gaussian_process.update
|
354
|
+
end
|
355
|
+
|
356
|
+
def candidate_to_features(candidate)
|
357
|
+
[
|
358
|
+
candidate[:instruction_seed],
|
359
|
+
candidate[:demo_seed],
|
360
|
+
candidate[:temperature],
|
361
|
+
candidate[:demo_count].to_f / config[:max_bootstrapped_demos]
|
362
|
+
]
|
363
|
+
end
|
364
|
+
|
365
|
+
def scalarize_objectives(scores)
|
366
|
+
# Simple weighted sum - could be improved with user preferences
|
367
|
+
weights = @objectives.map { 1.0 / @objectives.size }
|
368
|
+
scores.values.zip(weights).map { |s, w| (s || 0) * w }.sum
|
369
|
+
end
|
370
|
+
|
371
|
+
def update_pareto_frontier(evaluated_candidates)
|
372
|
+
# Add new candidates to frontier
|
373
|
+
evaluated_candidates.each do |eval|
|
374
|
+
@pareto_frontier << eval
|
375
|
+
end
|
376
|
+
|
377
|
+
# Remove dominated solutions
|
378
|
+
@pareto_frontier = compute_pareto_frontier(@pareto_frontier)
|
379
|
+
|
380
|
+
trace_optimization("Updated Pareto frontier", {
|
381
|
+
size: @pareto_frontier.size,
|
382
|
+
best_scores: @pareto_frontier.first(3).map { |e| e[:scores] }
|
383
|
+
})
|
384
|
+
end
|
385
|
+
|
386
|
+
def compute_pareto_frontier(candidates)
|
387
|
+
frontier = []
|
388
|
+
|
389
|
+
candidates.each do |candidate|
|
390
|
+
dominated = false
|
391
|
+
|
392
|
+
candidates.each do |other|
|
393
|
+
next if candidate == other
|
394
|
+
|
395
|
+
if dominates?(other[:scores], candidate[:scores])
|
396
|
+
dominated = true
|
397
|
+
break
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
frontier << candidate unless dominated
|
402
|
+
end
|
403
|
+
|
404
|
+
frontier
|
405
|
+
end
|
406
|
+
|
407
|
+
def dominates?(scores1, scores2)
|
408
|
+
# For minimization objectives, flip the comparison
|
409
|
+
at_least_one_better = false
|
410
|
+
|
411
|
+
@objectives.each do |obj|
|
412
|
+
# Handle nil scores
|
413
|
+
score1 = scores1[obj] || 0
|
414
|
+
score2 = scores2[obj] || 0
|
415
|
+
|
416
|
+
return false if score1 < score2
|
417
|
+
|
418
|
+
at_least_one_better = true if score1 > score2
|
419
|
+
end
|
420
|
+
|
421
|
+
at_least_one_better
|
422
|
+
end
|
423
|
+
|
424
|
+
def select_best_candidate(evaluated_candidates)
|
425
|
+
return nil if evaluated_candidates.empty?
|
426
|
+
|
427
|
+
# Filter out candidates with nil scores
|
428
|
+
valid_candidates = evaluated_candidates.reject { |c| c[:scores].nil? || c[:scores].empty? }
|
429
|
+
return nil if valid_candidates.empty?
|
430
|
+
|
431
|
+
# For single objective, pick best
|
432
|
+
if @objectives.size == 1
|
433
|
+
valid_candidates.max_by { |e| e[:scores][@objectives.first] || 0 }
|
434
|
+
else
|
435
|
+
# For multi-objective, pick from Pareto frontier based on preferences
|
436
|
+
# Filter valid candidates from frontier
|
437
|
+
valid_frontier = @pareto_frontier.reject { |c| c[:scores].nil? || c[:scores].empty? }
|
438
|
+
return nil if valid_frontier.empty?
|
439
|
+
|
440
|
+
valid_frontier.max_by { |e| scalarize_objectives(e[:scores]) }
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
444
|
+
def apply_candidate(program, candidate)
|
445
|
+
return unless candidate
|
446
|
+
|
447
|
+
# Apply instruction modifications
|
448
|
+
apply_instruction_changes(program, candidate)
|
449
|
+
|
450
|
+
# Apply demonstration selection
|
451
|
+
apply_demonstration_changes(program, candidate)
|
452
|
+
|
453
|
+
# Store candidate configuration in program metadata
|
454
|
+
return unless program.respond_to?(:metadata=)
|
455
|
+
|
456
|
+
program.metadata[:mipro_config] = candidate
|
457
|
+
end
|
458
|
+
|
459
|
+
def apply_instruction_changes(program, candidate)
|
460
|
+
modules = extract_program_modules(program)
|
461
|
+
|
462
|
+
modules.each_value do |mod|
|
463
|
+
next unless mod.respond_to?(:signature)
|
464
|
+
|
465
|
+
# Generate instruction based on candidate parameters
|
466
|
+
instruction = generate_instruction(
|
467
|
+
mod.signature,
|
468
|
+
candidate[:instruction_style],
|
469
|
+
candidate[:instruction_seed]
|
470
|
+
)
|
471
|
+
|
472
|
+
# Apply if module supports custom instructions
|
473
|
+
mod.instruction = instruction if mod.respond_to?(:instruction=)
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
def apply_demonstration_changes(program, candidate)
|
478
|
+
modules = extract_program_modules(program)
|
479
|
+
|
480
|
+
modules.each_value do |mod|
|
481
|
+
# Select demonstrations based on candidate strategy
|
482
|
+
demos = select_demonstrations(
|
483
|
+
mod,
|
484
|
+
@trainset,
|
485
|
+
candidate[:demo_count],
|
486
|
+
candidate[:demo_selection],
|
487
|
+
candidate[:demo_seed]
|
488
|
+
)
|
489
|
+
|
490
|
+
# Apply demonstrations
|
491
|
+
optimized_module = mod.with_demos(demos)
|
492
|
+
update_program_module(program, mod, optimized_module)
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
496
|
+
def generate_instruction(signature, style, seed)
|
497
|
+
# Use seed for reproducibility
|
498
|
+
seed ||= rand # Fallback if seed is nil
|
499
|
+
Random.new((seed * 1_000_000).to_i)
|
500
|
+
|
501
|
+
# Handle both string and Signature object
|
502
|
+
if signature.is_a?(String)
|
503
|
+
# Parse signature string to extract input/output fields
|
504
|
+
parts = signature.split('->').map(&:strip)
|
505
|
+
return signature unless parts.size == 2
|
506
|
+
|
507
|
+
input_fields = parts[0].split(',').map(&:strip).map { |f| f.split(':').first.strip }
|
508
|
+
output_fields = parts[1].split(',').map(&:strip).map { |f| f.split(':').first.strip }
|
509
|
+
|
510
|
+
# Fallback for simple signatures
|
511
|
+
|
512
|
+
else
|
513
|
+
# It's a Signature object
|
514
|
+
input_fields = signature.input_fields.keys
|
515
|
+
output_fields = signature.output_fields.keys
|
516
|
+
end
|
517
|
+
|
518
|
+
base_instruction = signature.to_s
|
519
|
+
style ||= 'concise' # Default style if nil
|
520
|
+
|
521
|
+
case style
|
522
|
+
when 'concise'
|
523
|
+
"Given #{input_fields.join(', ')}, output #{output_fields.join(', ')}."
|
524
|
+
when 'detailed'
|
525
|
+
if signature.is_a?(String)
|
526
|
+
"Process the following inputs: #{input_fields.join(', ')}. " \
|
527
|
+
"Generate these outputs: #{output_fields.join(', ')}. Be thorough and accurate."
|
528
|
+
else
|
529
|
+
input_desc = signature.input_fields.map { |k, f| "#{k} (#{f.type})" }.join(', ')
|
530
|
+
output_desc = signature.output_fields.map { |k, f| "#{k} (#{f.type})" }.join(', ')
|
531
|
+
"Process the following inputs: #{input_desc}. " \
|
532
|
+
"Generate these outputs: #{output_desc}. Be thorough and accurate."
|
533
|
+
end
|
534
|
+
when 'step-by-step'
|
535
|
+
"Follow these steps:\n" \
|
536
|
+
"1. Analyze the inputs: #{input_fields.join(', ')}\n" \
|
537
|
+
"2. Process the information carefully\n" \
|
538
|
+
"3. Generate outputs: #{output_fields.join(', ')}"
|
539
|
+
else
|
540
|
+
base_instruction
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
def select_demonstrations(module_instance, examples, count, strategy, seed)
|
545
|
+
count ||= 0 # Default count if nil
|
546
|
+
return [] if count.zero? || examples.empty?
|
547
|
+
|
548
|
+
# Use seed for reproducibility
|
549
|
+
seed ||= rand # Fallback if seed is nil
|
550
|
+
rng = Random.new((seed * 1_000_000).to_i)
|
551
|
+
available = examples.dup
|
552
|
+
|
553
|
+
case strategy
|
554
|
+
when 'random'
|
555
|
+
available.sample(count, random: rng)
|
556
|
+
when 'diverse'
|
557
|
+
select_diverse_demonstrations(available, count, rng)
|
558
|
+
when 'similar'
|
559
|
+
select_similar_demonstrations(module_instance, available, count, rng)
|
560
|
+
else
|
561
|
+
available.first(count)
|
562
|
+
end
|
563
|
+
end
|
564
|
+
|
565
|
+
def select_diverse_demonstrations(examples, count, rng)
|
566
|
+
selected = []
|
567
|
+
remaining = examples.shuffle(random: rng)
|
568
|
+
|
569
|
+
while selected.size < count && remaining.any?
|
570
|
+
# Add most different from current selection
|
571
|
+
best_candidate = remaining.max_by do |ex|
|
572
|
+
min_distance_to_selected(ex, selected)
|
573
|
+
end
|
574
|
+
|
575
|
+
selected << best_candidate
|
576
|
+
remaining.delete(best_candidate)
|
577
|
+
end
|
578
|
+
|
579
|
+
selected
|
580
|
+
end
|
581
|
+
|
582
|
+
def select_similar_demonstrations(_module_instance, examples, count, rng)
|
583
|
+
# Group by similarity and select representatives
|
584
|
+
clusters = cluster_examples(examples, count)
|
585
|
+
clusters.map { |cluster| cluster.sample(random: rng) }.compact.first(count)
|
586
|
+
end
|
587
|
+
|
588
|
+
def min_distance_to_selected(example, selected)
|
589
|
+
return Float::INFINITY if selected.empty?
|
590
|
+
|
591
|
+
selected.map { |sel| example_distance(example, sel) }.min
|
592
|
+
end
|
593
|
+
|
594
|
+
def example_distance(ex1, ex2)
|
595
|
+
# Simple distance based on shared keys and values
|
596
|
+
keys1 = ex1.keys.to_set
|
597
|
+
keys2 = ex2.keys.to_set
|
598
|
+
|
599
|
+
shared_keys = keys1 & keys2
|
600
|
+
return 1.0 if shared_keys.empty?
|
601
|
+
|
602
|
+
differences = shared_keys.count { |k| ex1[k] != ex2[k] }
|
603
|
+
differences.to_f / shared_keys.size
|
604
|
+
end
|
605
|
+
|
606
|
+
def cluster_examples(examples, num_clusters)
|
607
|
+
# Simple clustering - could be improved with k-means
|
608
|
+
return [examples] if num_clusters == 1
|
609
|
+
|
610
|
+
clusters = Array.new(num_clusters) { [] }
|
611
|
+
examples.each_with_index do |ex, i|
|
612
|
+
clusters[i % num_clusters] << ex
|
613
|
+
end
|
614
|
+
|
615
|
+
clusters.reject(&:empty?)
|
616
|
+
end
|
617
|
+
|
618
|
+
def collect_candidate_traces(candidate_id)
|
619
|
+
# Filter traces that occurred during this candidate's evaluation
|
620
|
+
@trace_collector.traces.select do |trace|
|
621
|
+
trace.metadata[:candidate_id] == candidate_id
|
622
|
+
end
|
623
|
+
end
|
624
|
+
|
625
|
+
def log_iteration_results(best_candidate, all_candidates)
|
626
|
+
@optimization_history << {
|
627
|
+
iteration: @iteration,
|
628
|
+
best_candidate: best_candidate[:candidate],
|
629
|
+
scores: best_candidate[:scores] || {},
|
630
|
+
all_scores: all_candidates.map { |c| c[:scores] || {} },
|
631
|
+
pareto_size: @pareto_frontier.size,
|
632
|
+
timestamp: Time.now
|
633
|
+
}
|
634
|
+
|
635
|
+
trace_optimization("Iteration #{@iteration} complete", {
|
636
|
+
best_scores: best_candidate[:scores] || {},
|
637
|
+
candidates_evaluated: all_candidates.size,
|
638
|
+
traces_collected: @trace_collector.size
|
639
|
+
})
|
640
|
+
end
|
641
|
+
|
642
|
+
def should_stop?
|
643
|
+
return true if @iteration >= config[:max_iterations]
|
644
|
+
|
645
|
+
# Check if we've reached target performance
|
646
|
+
if @optimization_history.any?
|
647
|
+
best_score = @optimization_history.last[:scores].values.max
|
648
|
+
return true if best_score >= config[:stop_at_score]
|
649
|
+
end
|
650
|
+
|
651
|
+
# Check for convergence
|
652
|
+
if @optimization_history.size >= 5
|
653
|
+
recent_scores = @optimization_history.last(5).map { |h| h[:scores].values.max }
|
654
|
+
variance = statistical_variance(recent_scores)
|
655
|
+
return true if variance < config[:convergence_threshold]
|
656
|
+
end
|
657
|
+
|
658
|
+
false
|
659
|
+
end
|
660
|
+
|
661
|
+
def statistical_variance(values)
|
662
|
+
mean = values.sum.to_f / values.size
|
663
|
+
values.map { |v| (v - mean)**2 }.sum / values.size
|
664
|
+
end
|
665
|
+
|
666
|
+
def deep_copy_program(program)
|
667
|
+
# This needs proper implementation based on program structure
|
668
|
+
# For now, just return the program as optimizers typically create new modules
|
669
|
+
program
|
670
|
+
end
|
671
|
+
|
672
|
+
def extract_program_modules(program)
|
673
|
+
modules = {}
|
674
|
+
|
675
|
+
# Check instance variables
|
676
|
+
program.instance_variables.each do |var|
|
677
|
+
value = program.instance_variable_get(var)
|
678
|
+
modules[var.to_s.delete('@').to_sym] = value if value.is_a?(Desiru::Module)
|
679
|
+
end
|
680
|
+
|
681
|
+
# Check if program has a modules method
|
682
|
+
if program.respond_to?(:modules)
|
683
|
+
program.modules.each do |name, mod|
|
684
|
+
modules[name] = mod if mod.is_a?(Desiru::Module)
|
685
|
+
end
|
686
|
+
end
|
687
|
+
|
688
|
+
modules
|
689
|
+
end
|
690
|
+
|
691
|
+
def update_program_module(program, old_module, new_module)
|
692
|
+
# Update instance variable if it matches
|
693
|
+
program.instance_variables.each do |var|
|
694
|
+
value = program.instance_variable_get(var)
|
695
|
+
program.instance_variable_set(var, new_module) if value == old_module
|
696
|
+
end
|
697
|
+
|
698
|
+
# Update in modules hash if program supports it
|
699
|
+
return unless program.respond_to?(:modules) && program.modules.is_a?(Hash)
|
700
|
+
|
701
|
+
program.modules.each do |name, mod|
|
702
|
+
program.modules[name] = new_module if mod == old_module
|
703
|
+
end
|
704
|
+
end
|
705
|
+
|
706
|
+
def enable_program_tracing(program)
|
707
|
+
modules = extract_program_modules(program)
|
708
|
+
modules.each_value do |mod|
|
709
|
+
mod.enable_trace! if mod.respond_to?(:enable_trace!)
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
def disable_program_tracing(program)
|
714
|
+
modules = extract_program_modules(program)
|
715
|
+
modules.each_value do |mod|
|
716
|
+
mod.disable_trace! if mod.respond_to?(:disable_trace!)
|
717
|
+
end
|
718
|
+
end
|
719
|
+
|
720
|
+
def optimize_acquisition_function
|
721
|
+
# Simple grid search - could be improved with gradient-based optimization
|
722
|
+
best_point = nil
|
723
|
+
best_value = -Float::INFINITY
|
724
|
+
|
725
|
+
10.times do
|
726
|
+
point = [rand, rand, 0.1 + (rand * 0.8), rand * config[:max_bootstrapped_demos]]
|
727
|
+
value = compute_acquisition_value(point)
|
728
|
+
|
729
|
+
if value > best_value
|
730
|
+
best_value = value
|
731
|
+
best_point = point
|
732
|
+
end
|
733
|
+
end
|
734
|
+
|
735
|
+
best_point
|
736
|
+
end
|
737
|
+
|
738
|
+
def compute_acquisition_value(point)
|
739
|
+
case @acquisition_function
|
740
|
+
when :expected_improvement
|
741
|
+
expected_improvement(point)
|
742
|
+
when :upper_confidence_bound
|
743
|
+
upper_confidence_bound(point)
|
744
|
+
else
|
745
|
+
@gaussian_process.predict(point)[:mean]
|
746
|
+
end
|
747
|
+
end
|
748
|
+
|
749
|
+
def expected_improvement(point)
|
750
|
+
prediction = @gaussian_process.predict(point)
|
751
|
+
mean = prediction[:mean]
|
752
|
+
std = prediction[:std]
|
753
|
+
|
754
|
+
return 0.0 if std.zero?
|
755
|
+
|
756
|
+
best_so_far = @optimization_history.map { |h| scalarize_objectives(h[:scores]) }.max || 0
|
757
|
+
z = (mean - best_so_far) / std
|
758
|
+
|
759
|
+
# EI = (mean - best) * CDF(z) + std * PDF(z)
|
760
|
+
((mean - best_so_far) * standard_normal_cdf(z)) + (std * standard_normal_pdf(z))
|
761
|
+
rescue StandardError => e
|
762
|
+
trace_optimization("Expected improvement calculation failed", { error: e.message })
|
763
|
+
0.0 # Return 0 on error
|
764
|
+
end
|
765
|
+
|
766
|
+
def upper_confidence_bound(point, beta = 2.0)
|
767
|
+
prediction = @gaussian_process.predict(point)
|
768
|
+
prediction[:mean] + (beta * prediction[:std])
|
769
|
+
end
|
770
|
+
|
771
|
+
def standard_normal_pdf(value)
|
772
|
+
Math.exp(-0.5 * (value**2)) / Math.sqrt(2 * Math::PI)
|
773
|
+
end
|
774
|
+
|
775
|
+
def standard_normal_cdf(value)
|
776
|
+
0.5 * (1 + Math.erf(value / Math.sqrt(2)))
|
777
|
+
end
|
778
|
+
|
779
|
+
def gaussian_noise(std_dev)
|
780
|
+
# Box-Muller transform for Gaussian noise
|
781
|
+
u1 = rand
|
782
|
+
u2 = rand
|
783
|
+
Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math::PI * u2) * std_dev
|
784
|
+
end
|
785
|
+
|
786
|
+
def constrain(value, min, max)
|
787
|
+
value.clamp(min, max)
|
788
|
+
end
|
789
|
+
|
790
|
+
def select_instruction_style(seed)
|
791
|
+
styles = %w[concise detailed step-by-step]
|
792
|
+
styles[(seed * styles.size).to_i]
|
793
|
+
end
|
794
|
+
|
795
|
+
def select_demo_strategy(seed)
|
796
|
+
strategies = %w[random diverse similar]
|
797
|
+
strategies[(seed * strategies.size).to_i]
|
798
|
+
end
|
799
|
+
|
800
|
+
def default_config
|
801
|
+
super.merge({
|
802
|
+
max_iterations: 20,
|
803
|
+
num_candidates: 8,
|
804
|
+
convergence_threshold: 0.001,
|
805
|
+
clear_traces: true,
|
806
|
+
restore_trace_state: true,
|
807
|
+
acquisition_function: :expected_improvement,
|
808
|
+
max_bootstrapped_demos: 3
|
809
|
+
})
|
810
|
+
end
|
811
|
+
|
812
|
+
# Simplified Gaussian Process implementation without matrix library
|
813
|
+
class GaussianProcess
|
814
|
+
def initialize(kernel = :rbf, length_scale = 1.0, noise = 0.1)
|
815
|
+
@kernel = kernel
|
816
|
+
@length_scale = length_scale
|
817
|
+
@noise = noise
|
818
|
+
@observations = []
|
819
|
+
@trained = false
|
820
|
+
end
|
821
|
+
|
822
|
+
def add_observation(features, value)
|
823
|
+
@observations << { features: features, value: value }
|
824
|
+
@trained = false
|
825
|
+
end
|
826
|
+
|
827
|
+
def update
|
828
|
+
# Simplified update - just mark as trained
|
829
|
+
@trained = !@observations.empty?
|
830
|
+
rescue StandardError => e
|
831
|
+
Desiru.logger&.warn("Gaussian Process update failed: #{e.message}")
|
832
|
+
@trained = false
|
833
|
+
end
|
834
|
+
|
835
|
+
def predict(features)
|
836
|
+
return { mean: 0.0, std: 1.0 } unless @trained && !@observations.empty?
|
837
|
+
|
838
|
+
# Simplified prediction using weighted average based on kernel similarity
|
839
|
+
weights = @observations.map do |obs|
|
840
|
+
kernel_function(features, obs[:features])
|
841
|
+
end
|
842
|
+
|
843
|
+
total_weight = weights.sum
|
844
|
+
return { mean: 0.0, std: 1.0 } if total_weight.zero?
|
845
|
+
|
846
|
+
# Normalize weights
|
847
|
+
weights = weights.map { |w| w / total_weight }
|
848
|
+
|
849
|
+
# Compute weighted mean
|
850
|
+
mean = @observations.zip(weights).map { |obs, w| obs[:value] * w }.sum
|
851
|
+
|
852
|
+
# Compute weighted variance for uncertainty
|
853
|
+
variance = @observations.zip(weights).map do |obs, w|
|
854
|
+
w * ((obs[:value] - mean)**2)
|
855
|
+
end.sum
|
856
|
+
|
857
|
+
std = Math.sqrt([variance + @noise, 0].max)
|
858
|
+
|
859
|
+
{ mean: mean, std: std }
|
860
|
+
rescue StandardError => e
|
861
|
+
Desiru.logger&.warn("Gaussian Process prediction failed: #{e.message}")
|
862
|
+
{ mean: 0.0, std: 1.0 }
|
863
|
+
end
|
864
|
+
|
865
|
+
private
|
866
|
+
|
867
|
+
def kernel_function(features1, features2)
|
868
|
+
# Only RBF kernel supported for now
|
869
|
+
rbf_kernel(features1, features2)
|
870
|
+
end
|
871
|
+
|
872
|
+
def rbf_kernel(features1, features2)
|
873
|
+
# Radial Basis Function kernel
|
874
|
+
distance = euclidean_distance(features1, features2)
|
875
|
+
Math.exp(-0.5 * ((distance / @length_scale)**2))
|
876
|
+
end
|
877
|
+
|
878
|
+
def euclidean_distance(features1, features2)
|
879
|
+
Math.sqrt(features1.zip(features2).map { |a, b| (a - b)**2 }.sum)
|
880
|
+
end
|
881
|
+
end
|
882
|
+
end
|
883
|
+
end
|
884
|
+
end
|
885
|
+
|
886
|
+
# Register in the main module namespace for convenience
|
887
|
+
module Desiru
|
888
|
+
MIPROv2 = Optimizers::MIPROv2
|
889
|
+
end
|